Skip to content
Snippets Groups Projects
Unverified Commit ba2e979d authored by Marcin Wątroba's avatar Marcin Wątroba Committed by GitHub
Browse files

Merge pull request #13 from CLARIN-PL/change_data_model

Change data model
parents fd925859 f77d8a13
Branches
No related tags found
1 merge request!13Change data model
Showing
with 729 additions and 166 deletions
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# IDE settings
.vscode/
# macOS
.DS_Store
debug_run/
publish_to_theliver.sh
.idea
experiment_data
cache_asr_scripts
docker/embedding_models/
docker/wav2vec2for_ctc_models/
.dvc
*.bin
*.gz
/config.local
/tmp
/cache
[core]
remote = clarin-biz-asr-benchmarks
autostage = true
hardlink_lock = true
['remote "clarin-biz-asr-benchmarks"']
url = s3://projects/clarin-biz-asr-benchmark/dvc
endpointurl = https://s3.clarin-pl.eu
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"facet": {
"field": "rev",
"type": "nominal"
},
"spec": {
"transform": [
{
"aggregate": [
{
"op": "count",
"as": "xy_count"
}
],
"groupby": [
"<DVC_METRIC_Y>",
"<DVC_METRIC_X>"
]
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_Y>"
],
"key": "<DVC_METRIC_X>",
"value": 0
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_X>"
],
"key": "<DVC_METRIC_Y>",
"value": 0
},
{
"joinaggregate": [
{
"op": "max",
"field": "xy_count",
"as": "max_count"
}
],
"groupby": []
},
{
"calculate": "datum.xy_count / datum.max_count",
"as": "percent_of_max"
}
],
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_Y_LABEL>"
}
},
"layer": [
{
"mark": "rect",
"width": 300,
"height": 300,
"encoding": {
"color": {
"field": "xy_count",
"type": "quantitative",
"title": "",
"scale": {
"domainMin": 0,
"nice": true
}
}
}
},
{
"mark": "text",
"encoding": {
"text": {
"field": "xy_count",
"type": "quantitative"
},
"color": {
"condition": {
"test": "datum.percent_of_max > 0.5",
"value": "white"
},
"value": "black"
}
}
}
]
}
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"facet": {
"field": "rev",
"type": "nominal"
},
"spec": {
"transform": [
{
"aggregate": [
{
"op": "count",
"as": "xy_count"
}
],
"groupby": [
"<DVC_METRIC_Y>",
"<DVC_METRIC_X>"
]
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_Y>"
],
"key": "<DVC_METRIC_X>",
"value": 0
},
{
"impute": "xy_count",
"groupby": [
"rev",
"<DVC_METRIC_X>"
],
"key": "<DVC_METRIC_Y>",
"value": 0
},
{
"joinaggregate": [
{
"op": "sum",
"field": "xy_count",
"as": "sum_y"
}
],
"groupby": [
"<DVC_METRIC_Y>"
]
},
{
"calculate": "datum.xy_count / datum.sum_y",
"as": "percent_of_y"
}
],
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "nominal",
"sort": "ascending",
"title": "<DVC_METRIC_Y_LABEL>"
}
},
"layer": [
{
"mark": "rect",
"width": 300,
"height": 300,
"encoding": {
"color": {
"field": "percent_of_y",
"type": "quantitative",
"title": "",
"scale": {
"domain": [
0,
1
]
}
}
}
},
{
"mark": "text",
"encoding": {
"text": {
"field": "percent_of_y",
"type": "quantitative",
"format": ".2f"
},
"color": {
"condition": {
"test": "datum.percent_of_y > 0.5",
"value": "white"
},
"value": "black"
}
}
}
]
}
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"layer": [
{
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"layer": [
{
"mark": "line"
},
{
"selection": {
"label": {
"type": "single",
"nearest": true,
"on": "mouseover",
"encodings": [
"x"
],
"empty": "none",
"clear": "mouseout"
}
},
"mark": "point",
"encoding": {
"opacity": {
"condition": {
"selection": "label",
"value": 1
},
"value": 0
}
}
}
]
},
{
"transform": [
{
"filter": {
"selection": "label"
}
}
],
"layer": [
{
"mark": {
"type": "rule",
"color": "gray"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
}
}
},
{
"encoding": {
"text": {
"type": "quantitative",
"field": "<DVC_METRIC_Y>"
},
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative"
}
},
"layer": [
{
"mark": {
"type": "text",
"align": "left",
"dx": 5,
"dy": -5
},
"encoding": {
"color": {
"type": "nominal",
"field": "rev"
}
}
}
]
}
]
}
]
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"layer": [
{
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"layer": [
{
"mark": "point"
},
{
"selection": {
"label": {
"type": "single",
"nearest": true,
"on": "mouseover",
"encodings": [
"x"
],
"empty": "none",
"clear": "mouseout"
}
},
"mark": "point",
"encoding": {
"opacity": {
"condition": {
"selection": "label",
"value": 1
},
"value": 0
}
}
}
]
},
{
"transform": [
{
"filter": {
"selection": "label"
}
}
],
"layer": [
{
"encoding": {
"text": {
"type": "quantitative",
"field": "<DVC_METRIC_Y>"
},
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative"
}
},
"layer": [
{
"mark": {
"type": "text",
"align": "left",
"dx": 5,
"dy": -5
},
"encoding": {
"color": {
"type": "nominal",
"field": "rev"
}
}
}
]
}
]
}
]
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"width": 300,
"height": 300,
"mark": {
"type": "line"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
}
}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": "<DVC_METRIC_DATA>"
},
"title": "<DVC_METRIC_TITLE>",
"mark": {
"type": "line"
},
"encoding": {
"x": {
"field": "<DVC_METRIC_X>",
"type": "quantitative",
"title": "<DVC_METRIC_X_LABEL>"
},
"y": {
"field": "<DVC_METRIC_Y>",
"type": "quantitative",
"title": "<DVC_METRIC_Y_LABEL>",
"scale": {
"zero": false
}
},
"color": {
"field": "rev",
"type": "nominal"
}
},
"transform": [
{
"loess": "<DVC_METRIC_Y>",
"on": "<DVC_METRIC_X>",
"groupby": [
"rev"
],
"bandwidth": 0.3
}
]
}
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
name: CI
on: pull_request
jobs:
tests:
strategy:
fail-fast: false
matrix:
python-version: [ 3.9.6 ]
poetry-version: [ 1.1.5 ]
# os: [ ubuntu-20.04, macos-latest, windows-latest ]
os: [ ubuntu-20.04 ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
uses: abatilo/actions-poetry@v2.0.0
with:
poetry-version: ${{ matrix.poetry-version }}
- name: Install deps
run: poetry install -vv
- name: Run tests
run: poetry run poe test
lint:
strategy:
fail-fast: false
matrix:
python-version: [ 3.9.6 ]
poetry-version: [ 1.1.5 ]
os: [ ubuntu-20.04 ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
uses: abatilo/actions-poetry@v2.0.0
with:
poetry-version: ${{ matrix.poetry-version }}
- name: Install deps
run: poetry install -vv
- name: Check
run: poetry run poe check
publish:
needs:
- tests
- lint
environment: Test deployment
strategy:
fail-fast: false
matrix:
python-version: [ 3.9.6 ]
poetry-version: [ 1.1.5 ]
os: [ ubuntu-20.04 ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install poetry
uses: abatilo/actions-poetry@v2.0.0
with:
poetry-version: ${{ matrix.poetry-version }}
- name: Install deps
run: poetry install -vv
- name: Build and publish
run: |
poetry version "$(poetry version --short)-alpha.${GITHUB_RUN_NUMBER}"
poetry build
poetry config repositories.clarinpypi https://pypi.clarin-pl.eu/
poetry publish -r clarinpypi --username ${{ secrets.PYPI_USER }} --password ${{ secrets.PYPI_PASS }}
#name: CI
#on: pull_request
#
#jobs:
# tests:
# strategy:
# fail-fast: false
# matrix:
# python-version: [ 3.9.6 ]
# poetry-version: [ 1.1.5 ]
# # os: [ ubuntu-20.04, macos-latest, windows-latest ]
# os: [ ubuntu-20.04 ]
# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v2
# - uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Install poetry
# uses: abatilo/actions-poetry@v2.0.0
# with:
# poetry-version: ${{ matrix.poetry-version }}
# - name: Install deps
# run: poetry install -vv
# - name: Run tests
# run: poetry run poe test
# lint:
# strategy:
# fail-fast: false
# matrix:
# python-version: [ 3.9.6 ]
# poetry-version: [ 1.1.5 ]
# os: [ ubuntu-20.04 ]
# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v2
# - uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Install poetry
# uses: abatilo/actions-poetry@v2.0.0
# with:
# poetry-version: ${{ matrix.poetry-version }}
# - name: Install deps
# run: poetry install -vv
# - name: Check
# run: poetry run poe check
# publish:
# needs:
# - tests
# - lint
# environment: Test deployment
# strategy:
# fail-fast: false
# matrix:
# python-version: [ 3.9.6 ]
# poetry-version: [ 1.1.5 ]
# os: [ ubuntu-20.04 ]
# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v2
# with:
# fetch-depth: 0
# - uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Install poetry
# uses: abatilo/actions-poetry@v2.0.0
# with:
# poetry-version: ${{ matrix.poetry-version }}
# - name: Install deps
# run: poetry install -vv
# - name: Build and publish
# run: |
# poetry version "$(poetry version --short)-alpha.${GITHUB_RUN_NUMBER}"
# poetry build
# poetry config repositories.clarinpypi https://pypi.clarin-pl.eu/
# poetry publish -r clarinpypi --username ${{ secrets.PYPI_USER }} --password ${{ secrets.PYPI_PASS }}
# Byte-compiled / optimized / DLL files
__pycache__/
**/__pycache__/
*.py[cod]
*$py.class
......@@ -109,3 +110,8 @@ ENV/
debug_run/
publish_to_theliver.sh
.idea
docker/embedding_models/
docker/wav2vec2for_ctc_models/
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="6">
<item index="0" class="java.lang.String" itemvalue="tensorflow-gpu" />
<item index="1" class="java.lang.String" itemvalue="tensorflow" />
<item index="2" class="java.lang.String" itemvalue="mysql-connector-python" />
<item index="3" class="java.lang.String" itemvalue="dateutil" />
<item index="4" class="java.lang.String" itemvalue="pytest" />
<item index="5" class="java.lang.String" itemvalue="celery" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E402" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredIdentifiers">
<list>
<option value="dict.__getitem__" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (asr-benchmarks)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/asr-benchmarks.iml" filepath="$PROJECT_DIR$/.idea/asr-benchmarks.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="MypyConfigService">
<option name="customMypyPath" value="$USER_HOME$/miniconda3/envs/asr-benchmarks/bin/mypy" />
</component>
</project>
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment