diff --git a/combo/utils/download.py b/combo/utils/download.py index 172927067a70089f1e1b4d6816bdae880bd62dbe..03f310d625cd6411f269a5d5f02091d114a14f15 100644 --- a/combo/utils/download.py +++ b/combo/utils/download.py @@ -15,7 +15,7 @@ DATA_TO_PATH = { "ud25" : "ud_25", "ud27" : "ud_27", "ud29" : "ud_29"} -_URL = "http://s3.clarin-pl.eu/models/combo/{data}/{model}.tar.gz" +_URL = "http://s3.clarin-pl.eu/dspace/combo/{data}/{model}.tar.gz" _HOME_DIR = os.getenv("HOME", os.curdir) _CACHE_DIR = os.getenv("COMBO_DIR", os.path.join(_HOME_DIR, ".combo")) diff --git a/docs/performance.md b/docs/performance.md index eb76ed9d68d950a007d3fb57c41e514229aa6bc5..ea1b51c76b82101f6126cde0fa2a4b7da7f95fae 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -5,105 +5,110 @@ We list here only models trained on the Universal Dependencies version 2.9 datas for more. -|Treebank | Model name |UPOS |XPOS |UFeats|AllTags|Lemmas|UAS |LAS |CLAS |MLAS |BLEX | Language model |LICENSE | -|-------------------------|--------------------------------------------------------------------------------------------------------------|-----|------|------|-------|------|-----|-----|-----|-----|-----|--------------------|---------------------------------------------------------------------------------------| -|UD_English-EWT | [english-bert-base-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-bert-base-ud29.tar.gz) |96.58|96.24|97.17|94.65|97.18|91.7|89.4|87.01|82.03|83.84| bert-base-cased |https://github.com/UniversalDependencies/UD_English-EWT/blob/r2.9/LICENSE.txt| -|UD_Polish_PDB| [polish-herbert-base-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/polish-herbert-base-ud29.tar.gz) |98.96|96.4|96.67|95.64|98|95.75|94.05|92.55|87.65|90.01| herbert-base |https://github.com/UniversalDependencies/UD_Polish_PDB/blob/r2.9/LICENSE.txt| -|UD_Polish_PDB| [polish-herbert-large-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/polish-herbert-large-ud29.tar.gz) |99.01|96.53|96.92|95.92|98.1|95.62|93.93|92.39|87.77|90.01| herbert-large |https://github.com/UniversalDependencies/UD_Polish_PDB/blob/r2.9/LICENSE.txt| -|UD_Turkish-Kenet| [turkish-kenet-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-kenet-ud29.tar.gz) |92.75|100|88.65|87.5|92.88|81.82|66.94|65.89|55.35|60.72| fastText |https://github.com/UniversalDependencies/UD_Turkish-Kenet/blob/r2.9/LICENSE.txt| -|UD_Icelandic-IcePaHC| [icelandic-icepahc-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/icelandic-icepahc-ud29.tar.gz) |96.32|92.1|90.56|85.29|95.56|86.98|82.83|77.58|64.84|73.64| fastText |https://github.com/UniversalDependencies/UD_Icelandic-IcePaHC/blob/r2.9/LICENSE.txt| -|UD_Dutch-LassySmall| [dutch-lassysmall-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/dutch-lassysmall-ud29.tar.gz) |95.66|93.78|95.48|92.64|89.75|89.37|85.25|80.6|74.4|67.45| fastText |https://github.com/UniversalDependencies/UD_Dutch-LassySmall/blob/r2.9/LICENSE.txt| -|UD_Bulgarian-BTB| [bulgarian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/bulgarian-ud29.tar.gz) |98.93|95.61|97.53|95.06|97.51|93.2|89.93|86.58|82.97|83.44| fastText |https://github.com/UniversalDependencies/UD_Bulgarian-BTB/blob/r2.9/LICENSE.txt| -|UD_Czech-CLTT| [czech-cltt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/czech-cltt-ud29.tar.gz) |98.74|89.51|89.34|88.01|95.11|86.33|83.38|80.06|69.3|75.57| fastText |https://github.com/UniversalDependencies/UD_Czech-CLTT/blob/r2.9/LICENSE.txt| -|UD_Serbian-SET| [serbian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/serbian-ud29.tar.gz) |97.86|93.14|93.31|92.49|96.64|89.9|86.66|84.03|75.91|80.65| fastText |https://github.com/UniversalDependencies/UD_Serbian-SET/blob/r2.9/LICENSE.txt| -|UD_Russian-Taiga| [russian-taiga-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/russian-taiga-ud29.tar.gz) |96.2|100|93.22|91.76|94.73|81.83|77.55|74.74|67.06|69.91| fastText |https://github.com/UniversalDependencies/UD_Russian-Taiga/blob/r2.9/LICENSE.txt| -|UD_Belarusian-HSE| [belarusian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/belarusian-ud29.tar.gz) |98.17|96.11|93.71|91.37|95.78|87.24|84.58|81.47|74.22|77| fastText |https://github.com/UniversalDependencies/UD_Belarusian-HSE/blob/r2.9/LICENSE.txt| -|UD_Indonesian-GSD| [indonesian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/indonesian-ud29.tar.gz) |94.45|91.18|95.87|86.15|97.65|86.93|80.38|76.36|70.21|74.31| fastText |https://github.com/UniversalDependencies/UD_Indonesian-GSD/blob/r2.9/LICENSE.txt| -|UD_Norwegian-NynorskLIA| [norwegian-nynorsklia-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/norwegian-nynorsklia-ud29.tar.gz) |93.53|100|92.03|89.47|96.68|76.72|70.75|65.36|57.09|62.72| fastText |https://github.com/UniversalDependencies/UD_Norwegian-NynorskLIA/blob/r2.9/LICENSE.txt| -|UD_Romanian-SiMoNERo| [romanian-simonero-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/romanian-simonero-ud29.tar.gz) |97.91|96.85|96.27|95.89|98.92|93.35|91.09|87.88|82.48|86.75| fastText |https://github.com/UniversalDependencies/UD_Romanian-SiMoNERo/blob/r2.9/LICENSE.txt| -|UD_Afrikaans-AfriBooms| [afrikaans-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/afrikaans-ud29.tar.gz) |96.7|90.57|96.37|90.02|97|87|83.33|76.83|71.14|73.02| fastText |https://github.com/UniversalDependencies/UD_Afrikaans-AfriBooms/blob/r2.9/LICENSE.txt| -|UD_Armenian-ArmTDP| [armenian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/armenian-ud29.tar.gz) |94.08|100|87.92|85.75|93.83|83.41|77.14|71.17|58.72|66.62| fastText |https://github.com/UniversalDependencies/UD_Armenian-ArmTDP/blob/r2.9/LICENSE.txt| -|UD_Catalan-AnCora| [catalan-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/catalan-ud29.tar.gz) |98.83|96.18|98.47|95.82|99.3|93.53|91.29|86.69|84.06|86.1| fastText |https://github.com/UniversalDependencies/UD_Catalan-AnCora/blob/r2.9/LICENSE.txt| -|UD_Czech-PDT| [czech-pdt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/czech-pdt-ud29.tar.gz) |98.98|96.44|96.63|95.61|98.59|93.44|91.54|90.04|85.97|88.56| fastText |https://github.com/UniversalDependencies/UD_Czech-PDT/blob/r2.9/LICENSE.txt| -|UD_Swedish-LinES| [swedish-lines-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/swedish-lines-ud29.tar.gz) |96.26|93.08|89.19|85.54|96.93|87.58|83.26|79.8|65.9|76.74| fastText |https://github.com/UniversalDependencies/UD_Swedish-LinES/blob/r2.9/LICENSE.txt| -|UD_French-Sequoia| [french-sequoia-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/french-sequoia-ud29.tar.gz) |97.75|100|100|97.75|98.36|90.84|88.56|84.39|82.27|82.6| fastText |https://github.com/UniversalDependencies/UD_French-Sequoia/blob/r2.9/LICENSE.txt| -|UD_Turkish-Tourism| [turkish-tourism-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-tourism-ud29.tar.gz) |98.39|100|94.47|94.11|98.9|95.61|89.46|86.63|79.02|85.64| fastText |https://github.com/UniversalDependencies/UD_Turkish-Tourism/blob/r2.9/LICENSE.txt| -|UD_Latin-UDante| [latin-udante-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/latin-udante-ud29.tar.gz) |86.59|65.23|71.34|61.08|83.82|65.28|54.53|44.94|27.36|37.31| fastText |https://github.com/UniversalDependencies/UD_Latin-UDante/blob/r2.9/LICENSE.txt| -|UD_Chinese-GSD| [chinese-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/chinese-gsd-ud29.tar.gz) |94.73|94.56|99.1|93.58|98.87|83.2|79.25|77.76|72.24|76.5| fastText |https://github.com/UniversalDependencies/UD_Chinese-GSD/blob/r2.9/LICENSE.txt| -|UD_Polish-LFG| [polish-lfg-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/polish-lfg-ud29.tar.gz) |98.01|93.84|94.93|92.66|97.32|95.9|93.85|92.02|85.49|89.18| fastText |https://github.com/UniversalDependencies/UD_Polish-LFG/blob/r2.9/LICENSE.txt| -|UD_Turkish-IMST| [turkish-imst-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-imst-ud29.tar.gz) |92.49|91.85|88.47|84.27|96.41|73.34|65.76|61.04|50.37|59.29| fastText |https://github.com/UniversalDependencies/UD_Turkish-IMST/blob/r2.9/LICENSE.txt| -|UD_Latin-LLCT| [latin-llct-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/latin-llct-ud29.tar.gz) |99.46|96.9|96.84|96.39|97.22|95.55|94.55|93.67|89.55|90.31| fastText |https://github.com/UniversalDependencies/UD_Latin-LLCT/blob/r2.9/LICENSE.txt| -|UD_Norwegian-Bokmaal| [norwegian-bokmaal-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/norwegian-bokmaal-ud29.tar.gz) |97.38|100|96.25|95.43|98.02|92.08|89.99|87.4|82.1|84.96| fastText |https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal/blob/r2.9/LICENSE.txt| -|UD_Italian-ISDT| [italian-isdt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/italian-isdt-ud29.tar.gz) |98.09|97.95|100|97.84|98.2|92.91|90.9|86.61|84.5|84.42| fastText |https://github.com/UniversalDependencies/UD_Italian-ISDT/blob/r2.9/LICENSE.txt| -|UD_Danish-DDT| [danish-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/danish-ud29.tar.gz) |96.8|100|95.99|94.89|97.14|85.61|82.84|79.63|73.1|76.78| fastText |https://github.com/UniversalDependencies/UD_Danish-DDT/blob/r2.9/LICENSE.txt| -|UD_Spanish-GSD| [spanish-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/spanish-gsd-ud29.tar.gz) |96.17|100|96.77|94.22|98.61|90.27|87.22|81.89|74.02|80.23| fastText |https://github.com/UniversalDependencies/UD_Spanish-GSD/blob/r2.9/LICENSE.txt| -|UD_Persian-Seraji| [persian-seraji-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/persian-seraji-ud29.tar.gz) |97.62|97.5|97.45|96.95|95.64|89.78|86.12|82.91|80.57|78.52| fastText |https://github.com/UniversalDependencies/UD_Persian-Seraji/blob/r2.9/LICENSE.txt| -|UD_Turkish-Atis| [turkish-atis-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-atis-ud29.tar.gz) |98.09|100|97.59|97.2|98.63|89.94|87.51|87.02|84.88|86.18| fastText |https://github.com/UniversalDependencies/UD_Turkish-Atis/blob/r2.9/LICENSE.txt| -|UD_Italian-PoSTWITA| [italian-postwita-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/italian-postwita-ud29.tar.gz) |95.39|95.06|95.71|93.03|96.49|84.9|79.6|74.38|68.51|71.57| fastText |https://github.com/UniversalDependencies/UD_Italian-PoSTWITA/blob/r2.9/LICENSE.txt| -|UD_Icelandic-Modern| [icelandic-modern-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/icelandic-modern-ud29.tar.gz) |98.98|97.43|98.02|97.19|98.73|94.31|92.75|90.97|88.74|89.83| fastText |https://github.com/UniversalDependencies/UD_Icelandic-Modern/blob/r2.9/LICENSE.txt| -|UD_Basque-BDT| [basque-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/basque-ud29.tar.gz) |94.58|100|91.42|88.58|95.6|84.64|80.61|79|68.55|75.24| fastText |https://github.com/UniversalDependencies/UD_Basque-BDT/blob/r2.9/LICENSE.txt| -|UD_Latvian-LVTB| [latvian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/latvian-ud29.tar.gz) |96.32|86.82|93.33|86.03|95.45|87.88|84.39|81.92|73.51|77.76| fastText |https://github.com/UniversalDependencies/UD_Latvian-LVTB/blob/r2.9/LICENSE.txt| -|UD_English-GUM| [english-gum-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-gum-ud29.tar.gz) |96.31|96.02|97.04|94.74|97.69|89|86.15|81.61|76.59|78.64| fastText |https://github.com/UniversalDependencies/UD_English-GUM/blob/r2.9/LICENSE.txt| -|UD_Welsh-CCG| [welsh-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/welsh-ud29.tar.gz) |92.58|90.77|86.14|82.55|89.72|83.37|75.6|68.95|53.54|58.95| fastText |https://github.com/UniversalDependencies/UD_Welsh-CCG/blob/r2.9/LICENSE.txt| -|UD_Russian-GSD| [russian-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/russian-gsd-ud29.tar.gz) |97.02|96.48|91.72|90.27|95.7|87.47|83.43|81.47|72.15|77.4| fastText |https://github.com/UniversalDependencies/UD_Russian-GSD/blob/r2.9/LICENSE.txt| -|UD_Finnish-TDT| [finnish-tdt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/finnish-tdt-ud29.tar.gz) |96.41|97.97|94.01|92.55|88.29|89.27|86.46|84.61|77.32|71.77| fastText |https://github.com/UniversalDependencies/UD_Finnish-TDT/blob/r2.9/LICENSE.txt| -|UD_Norwegian-Nynorsk| [norwegian-nynorsk-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/norwegian-nynorsk-ud29.tar.gz) |97.22|100|95.29|94.31|97.38|91.76|89.51|87.13|80.26|83.93| fastText |https://github.com/UniversalDependencies/UD_Norwegian-Nynorsk/blob/r2.9/LICENSE.txt| -|UD_Irish-IDT| [irish-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/irish-ud29.tar.gz) |94.01|93.71|88.33|84.39|93.53|85.57|79.24|73.83|59.54|67.72| fastText |https://github.com/UniversalDependencies/UD_Irish-IDT/blob/r2.9/LICENSE.txt| -|UD_Urdu-UDTB| [urdu-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/urdu-ud29.tar.gz) |93.6|91.66|82.84|78.25|96.1|86.58|80.76|74.62|54.91|70.95| fastText |https://github.com/UniversalDependencies/UD_Urdu-UDTB/blob/r2.9/LICENSE.txt| -|UD_Portuguese-Bosque| [portuguese-bosque-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/portuguese-bosque-ud29.tar.gz) |97.4|100|96.3|94.95|98.21|91.06|87.92|82.65|76.02|80.44| fastText |https://github.com/UniversalDependencies/UD_Portuguese-Bosque/blob/r2.9/LICENSE.txt| -|UD_Spanish-AnCora| [spanish-ancora-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/spanish-ancora-ud29.tar.gz) |98.83|95.97|98.57|95.4|99.34|92.38|90.09|85.84|83.37|85.23| fastText |https://github.com/UniversalDependencies/UD_Spanish-AnCora/blob/r2.9/LICENSE.txt| -|UD_Czech-FicTree| [czech-fictree-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/czech-fictree-ud29.tar.gz) |98.03|93.98|95.37|93.01|98.26|92.5|90.06|87.28|80.76|85.2| fastText |https://github.com/UniversalDependencies/UD_Czech-FicTree/blob/r2.9/LICENSE.txt| -|UD_Italian-VIT| [italian-vit-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/italian-vit-ud29.tar.gz) |97.64|96.91|97.34|95.64|98.41|89.73|86.15|80.57|76.01|78.86| fastText |https://github.com/UniversalDependencies/UD_Italian-VIT/blob/r2.9/LICENSE.txt| -|UD_Dutch-Alpino| [dutch-alpino-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/dutch-alpino-ud29.tar.gz) |96.37|94.28|96.48|93.5|89.09|91.45|88|82.53|76.73|67.01| fastText |https://github.com/UniversalDependencies/UD_Dutch-Alpino/blob/r2.9/LICENSE.txt| -|UD_Hindi-HDTB| [hindi-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/hindi-ud29.tar.gz) |97.05|96.59|93.92|91.5|98.83|94.58|91.24|87.49|77.22|86.19| fastText |https://github.com/UniversalDependencies/UD_Hindi-HDTB/blob/r2.9/LICENSE.txt| -|UD_Persian-PerDT| [persian-perdt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/persian-perdt-ud29.tar.gz) |97.31|97.22|97.78|95.23|98.89|93.3|90.9|89|85|87.8| fastText |https://github.com/UniversalDependencies/UD_Persian-PerDT/blob/r2.9/LICENSE.txt| -|UD_Japanese-GSD| [japanese-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/japanese-gsd-ud29.tar.gz) |98.29|96.88|100|96.51|98.9|94.05|92.73|88.76|86.52|87.53| fastText |https://github.com/UniversalDependencies/UD_Japanese-GSD/blob/r2.9/LICENSE.txt| -|UD_German-GSD| [german-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/german-gsd-ud29.tar.gz) |94.15|96.5|90|84.5|96.38|86.53|81.63|77.2|59.57|72.5| fastText |https://github.com/UniversalDependencies/UD_German-GSD/blob/r2.9/LICENSE.txt| -|UD_Slovak-SNK| [slovak-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/slovak-ud29.tar.gz) |95.24|86.04|89.75|84.73|94.86|90.08|87.15|85.16|72.59|79.93| fastText |https://github.com/UniversalDependencies/UD_Slovak-SNK/blob/r2.9/LICENSE.txt| -|UD_Uyghur-UDT| [uyghur-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/uyghur-ud29.tar.gz) |86.11|88.5|84.47|72.88|94.33|74.94|61.66|54.07|38.58|50.33| fastText |https://github.com/UniversalDependencies/UD_Uyghur-UDT/blob/r2.9/LICENSE.txt| -|UD_Slovenian-SSJ| [slovenian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/slovenian-ud29.tar.gz) |98.12|94.74|95.11|93.95|97.96|92.28|90.44|87.81|82|85.84| fastText |https://github.com/UniversalDependencies/UD_Slovenian-SSJ/blob/r2.9/LICENSE.txt| -|UD_Turkish-Penn| [turkish-penn-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-penn-ud29.tar.gz) |95.64|100|93.02|91.92|93.82|83.96|69.96|66.62|58.07|61.81| fastText |https://github.com/UniversalDependencies/UD_Turkish-Penn/blob/r2.9/LICENSE.txt| -|UD_Galician-CTG| [galician-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/galician-ud29.tar.gz) |97.27|95.83|100|95.53|98.3|85.82|82.78|77.39|70.62|75.79| fastText |https://github.com/UniversalDependencies/UD_Galician-CTG/blob/r2.9/LICENSE.txt| -|UD_Czech-CAC| [czech-cac-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/czech-cac-ud29.tar.gz) |99.04|95.49|95.47|94.47|97.74|92.39|90.25|88.36|83.05|85.91| fastText |https://github.com/UniversalDependencies/UD_Czech-CAC/blob/r2.9/LICENSE.txt| -|UD_Finnish-FTB| [finnish-ftb-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/finnish-ftb-ud29.tar.gz) |94.55|92.55|94.39|91.16|95.16|89.65|86.33|83.38|77.1|80.03| fastText |https://github.com/UniversalDependencies/UD_Finnish-FTB/blob/r2.9/LICENSE.txt| -|UD_Latin-ITTB| [latin-ittb-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/latin-ittb-ud29.tar.gz) |98.77|94.21|96.51|92.8|99.18|91.26|89.14|86.78|81.86|86.31| fastText |https://github.com/UniversalDependencies/UD_Latin-ITTB/blob/r2.9/LICENSE.txt| -|UD_Russian-SynTagRus| [russian-syntagrus-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/russian-syntagrus-ud29.tar.gz) |98.39|100|93.24|92.83|97.93|93.55|90.94|89.5|80.49|87.16| fastText |https://github.com/UniversalDependencies/UD_Russian-SynTagRus/blob/r2.9/LICENSE.txt| -|UD_Greek-GDT| [greek-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/greek-ud29.tar.gz) |97.31|97.34|93.68|92.82|95.51|91.01|88.55|83.71|75.08|77.83| fastText |https://github.com/UniversalDependencies/UD_Greek-GDT/blob/r2.9/LICENSE.txt| -|UD_Turkish-BOUN| [turkish-boun-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-boun-ud29.tar.gz) |90.29|90.77|91.48|84.19|95.27|77.93|70.78|68.21|55.56|64.27| fastText |https://github.com/UniversalDependencies/UD_Turkish-BOUN/blob/r2.9/LICENSE.txt| -|UD_Vietnamese-VTB| [vietnamese-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/vietnamese-ud29.tar.gz) |86.75|81.94|99.41|81.52|99.87|66.32|56.8|54.07|48.5|53.99| fastText |https://github.com/UniversalDependencies/UD_Vietnamese-VTB/blob/r2.9/LICENSE.txt| -|UD_Romanian-RRT| [romanian-rrt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/romanian-rrt-ud29.tar.gz) |97.77|96.73|96.96|96.34|97.95|90.61|86.52|82.75|78.43|80.99| fastText |https://github.com/UniversalDependencies/UD_Romanian-RRT/blob/r2.9/LICENSE.txt| -|UD_Japanese-GSDLUW| [japanese-gsdluw-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/japanese-gsdluw-ud29.tar.gz) |97.86|96.58|100|96.43|95.72|94.29|93.01|86.51|83.15|79.3| fastText |https://github.com/UniversalDependencies/UD_Japanese-GSDLUW/blob/r2.9/LICENSE.txt| -|UD_English-LinES| [english-lines-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-lines-ud29.tar.gz) |96.27|95.47|95.67|92.19|97.91|86.69|82.68|78.29|71.58|76.1| fastText |https://github.com/UniversalDependencies/UD_English-LinES/blob/r2.9/LICENSE.txt| -|UD_Polish-PDB| [polish-pdb-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/polish-pdb-ud29.tar.gz) |98.33|94.37|94.62|93.34|97.55|92.97|90.55|88.24|81.56|85.71| fastText |https://github.com/UniversalDependencies/UD_Polish-PDB/blob/r2.9/LICENSE.txt| -|UD_Telugu-MTG| [telugu-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/telugu-ud29.tar.gz) |91.96|92.37|100|91.82|100|91.54|82.11|77.78|73.18|77.78| fastText |https://github.com/UniversalDependencies/UD_Telugu-MTG/blob/r2.9/LICENSE.txt| -|UD_English-Atis| [english-atis-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-atis-ud29.tar.gz) |98.51|100|97.83|97.2|99.91|94.59|92.33|90.05|85.7|89.98| fastText |https://github.com/UniversalDependencies/UD_English-Atis/blob/r2.9/LICENSE.txt| -|UD_Hungarian-Szeged| [hungarian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/hungarian-ud29.tar.gz) |92.42|100|88.06|85.29|92.42|80.81|75.26|73.18|57.65|66| fastText |https://github.com/UniversalDependencies/UD_Hungarian-Szeged/blob/r2.9/LICENSE.txt| -|UD_French-Rhapsodie| [french-rhapsodie-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/french-rhapsodie-ud29.tar.gz) |94.61|97.79|92.26|88.2|96.44|81.24|76.01|69.06|59.42|65.88| fastText |https://github.com/UniversalDependencies/UD_French-Rhapsodie/blob/r2.9/LICENSE.txt| -|UD_Swedish-Talbanken| [swedish-talbanken-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/swedish-talbanken-ud29.tar.gz) |97.38|95.79|95.83|94.39|97.55|88.29|84.88|82.34|76.32|79.63| fastText |https://github.com/UniversalDependencies/UD_Swedish-Talbanken/blob/r2.9/LICENSE.txt| -|UD_Chinese-GSDSimp| [chinese-gsdsimp-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/chinese-gsdsimp-ud29.tar.gz) |94.54|94.39|99.13|93.29|98.98|82.77|78.57|77.07|71.55|75.89| fastText |https://github.com/UniversalDependencies/UD_Chinese-GSDSimp/blob/r2.9/LICENSE.txt| -|UD_Estonian-EDT| [estonian-edt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/estonian-edt-ud29.tar.gz) |97.06|97.93|95.38|93.91|85.22|87.75|84.68|82.93|77.25|65.94| fastText |https://github.com/UniversalDependencies/UD_Estonian-EDT/blob/r2.9/LICENSE.txt| -|UD_Italian-TWITTIRO| [italian-twittiro-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/italian-twittiro-ud29.tar.gz) |92.24|91.16|90.49|86.16|92.37|79.77|73.39|65.18|54.05|57.76| fastText |https://github.com/UniversalDependencies/UD_Italian-TWITTIRO/blob/r2.9/LICENSE.txt| -|UD_Portuguese-GSD| [portuguese-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/portuguese-gsd-ud29.tar.gz) |98.01|98|99.88|97.91|99.14|92.84|91.3|87.07|84.91|86.15| fastText |https://github.com/UniversalDependencies/UD_Portuguese-GSD/blob/r2.9/LICENSE.txt| -|UD_Romanian-Nonstandard| [romanian-nonstandard-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/romanian-nonstandard-ud29.tar.gz) |97.1|92.51|90.87|89.45|95.59|90.53|86.44|82.19|69.15|77.48| fastText |https://github.com/UniversalDependencies/UD_Romanian-Nonstandard/blob/r2.9/LICENSE.txt| -|UD_Italian-ParTUT| [italian-partut-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/italian-partut-ud29.tar.gz) |97.77|97.64|97.2|96.29|97.69|91.24|88.93|82.65|78.75|80.09| fastText |https://github.com/UniversalDependencies/UD_Italian-ParTUT/blob/r2.9/LICENSE.txt| -|UD_English-ParTUT| [english-partut-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-partut-ud29.tar.gz) |96.01|95.72|95.33|93.66|97.51|88.41|85.56|80.54|73.66|78.23| fastText |https://github.com/UniversalDependencies/UD_English-ParTUT/blob/r2.9/LICENSE.txt| -|UD_Maltese-MUDT| [maltese-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/maltese-ud29.tar.gz) |93.02|91.68|100|91.02|100|81.13|74.82|65.6|59.4|65.6| fastText |https://github.com/UniversalDependencies/UD_Maltese-MUDT/blob/r2.9/LICENSE.txt| -|UD_English-EWT| [english-ewt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/english-ewt-ud29.tar.gz) |95.76|95.37|96.48|93.43|96.96|89.25|86.43|83.3|77.53|80.17| fastText |https://github.com/UniversalDependencies/UD_English-EWT/blob/r2.9/LICENSE.txt| -|UD_Estonian-EWT| [estonian-ewt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/estonian-ewt-ud29.tar.gz) |91.16|94.05|88.09|84.48|82.93|76.86|70.4|66.36|55.58|52.79| fastText |https://github.com/UniversalDependencies/UD_Estonian-EWT/blob/r2.9/LICENSE.txt| -|UD_Lithuanian-ALKSNIS| [lithuanian-alksnis-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/lithuanian-alksnis-ud29.tar.gz) |93.16|86.49|88.35|84.77|91.74|78.34|73.17|70.62|60.75|64.71| fastText |https://github.com/UniversalDependencies/UD_Lithuanian-ALKSNIS/blob/r2.9/LICENSE.txt| -|UD_Hebrew-HTB| [hebrew-htb-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/hebrew-htb-ud29.tar.gz) |96.84|96.83|95.42|94.45|96.15|89.91|86.93|80.92|74.15|75.98| fastText |https://github.com/UniversalDependencies/UD_Hebrew-HTB/blob/r2.9/LICENSE.txt| -|UD_Latin-PROIEL| [latin-proiel-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/latin-proiel-ud29.tar.gz) |95.98|96.13|89.22|88|95.95|81.95|76.74|73.69|63.49|71.56| fastText |https://github.com/UniversalDependencies/UD_Latin-PROIEL/blob/r2.9/LICENSE.txt| -|UD_French-GSD| [french-gsd-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/french-gsd-ud29.tar.gz) |97.86|100|97.98|97.03|98.41|92.6|90.36|86.08|82.51|84.19| fastText |https://github.com/UniversalDependencies/UD_French-GSD/blob/r2.9/LICENSE.txt| -|UD_Ukrainian-IU| [ukrainian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/ukrainian-ud29.tar.gz) |96.09|91.07|91.2|89.47|96.54|85.8|82.35|78.71|69.97|75.88| fastText |https://github.com/UniversalDependencies/UD_Ukrainian-IU/blob/r2.9/LICENSE.txt| -|UD_Croatian-SET| [croatian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/croatian-ud29.tar.gz) |97.83|94|94.57|93.35|96.79|89.39|85.51|82.79|75.46|79.2| fastText |https://github.com/UniversalDependencies/UD_Croatian-SET/blob/r2.9/LICENSE.txt| -|UD_Arabic-PADT| [arabic-padt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/arabic-padt-ud29.tar.gz) |96.82|93.89|93.93|93.3|93.68|87.42|82.53|79.19|73.1|73.42| fastText |https://github.com/UniversalDependencies/UD_Arabic-PADT/blob/r2.9/LICENSE.txt| -|UD_Turkish-FrameNet| [turkish-framenet-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/turkish-framenet-ud29.tar.gz) |94|100|90.12|88.82|93.8|91|80.37|77.04|66.76|71.35| fastText |https://github.com/UniversalDependencies/UD_Turkish-FrameNet/blob/r2.9/LICENSE.txt| -|UD_Tamil-TTB| [tamil-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/tamil-ud29.tar.gz) |82.76|77.33|82.45|71.69|92.31|71.9|60.58|56.31|44.83|51.86| fastText |https://github.com/UniversalDependencies/UD_Tamil-TTB/blob/r2.9/LICENSE.txt| -|UD_French-ParTUT| [french-partut-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/french-partut-ud29.tar.gz) |96.5|96|93.66|92.66|96.58|91.39|88.71|84.02|73.11|79.17| fastText |https://github.com/UniversalDependencies/UD_French-ParTUT/blob/r2.9/LICENSE.txt| -|UD_Scottish_Gaelic-ARCOSG| [scottish-gaelic-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/scottish-gaelic-ud29.tar.gz) |95.27|89.46|91.8|87.94|95.23|85.92|81.11|75.86|66.46|71.11| fastText |https://github.com/UniversalDependencies/UD_Scottish_Gaelic-ARCOSG/blob/r2.9/LICENSE.txt| -|UD_Old_French-SRCMF| [old-french-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/old-french-ud29.tar.gz) |95.53|95.33|97.12|94.2|100|89.76|85.37|82.38|77.57|82.38| fastText - French |https://github.com/UniversalDependencies/UD_Old_French-SRCMF/blob/r2.9/LICENSE.txt| -|UD_Ancient_Greek-Perseus| [ancient-greek-perseus-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/ancient-greek-perseus-ud29.tar.gz) |90.17|80.42|87.09|78.93|83.8|76.96|70.29|63.75|46.19|49.43| fastText - Greek |https://github.com/UniversalDependencies/UD_Ancient_Greek-Perseus/blob/r2.9/LICENSE.txt| -|UD_Ancient_Greek-PROIEL| [ancient-greek-proiel-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/ancient-greek-proiel-ud29.tar.gz) |97.42|97.56|90.99|89.79|95.6|85.56|81.06|76.31|65.19|72.3| fastText - Greek |https://github.com/UniversalDependencies/UD_Ancient_Greek-PROIEL/blob/r2.9/LICENSE.txt| -|UD_Western_Armenian-ArmTDP| [western-armenian-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/western-armenian-ud29.tar.gz) |95.73|100|90.19|88.93|95.71|85.83|80.17|72.81|61.2|69.05| fastText - Armenian |https://github.com/UniversalDependencies/UD_Western_Armenian-ArmTDP/blob/r2.9/LICENSE.txt| -|UD_Classical_Chinese-Kyoto| [classical-chiense-kyoto-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/classical-chiense-kyoto-ud29.tar.gz) |91.72|85.04|100|83.3|97.53|82.73|76.73|76|72.81|74.27| fastText - Chinese |https://github.com/UniversalDependencies/UD_Classical_Chinese-Kyoto/blob/r2.9/LICENSE.txt| -|UD_German-HDT| [german-hdt-ud29](http://s3.clarin-pl.eu/models/combo/ud_29/german-hdt-ud29.tar.gz) |98.45|98.40|93.57|93.14|93.43|97.00|95.96|93.65|83.41|83.73| fastText |https://github.com/UniversalDependencies/UD_German-HDT/blob/r2.9/LICENSE.txt| \ No newline at end of file +|Treebank | Model name |UPOS |XPOS |UFeats|AllTags|Lemmas|UAS |LAS |CLAS |MLAS |BLEX | Language model |LICENSE | +|-------------------------|--------------------------------------------------------------------------------------------------------------|-----|------|------|-------|------|-----|-----|-----|-----|-----|---------------------|--------------------------------------------------------------------------------------| +|UD_English-EWT |[english-bert-base-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-bert-base-ud29.tar.gz)|96.58|96.24|97.17|94.65|97.18|91.7|89.4|87.01|82.03|83.84| bert-base-cased |https://github.com/UniversalDependencies/UD_English-EWT/blob/r2.9/LICENSE.txt| +|UD_Polish_PDB|[polish-herbert-base-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/polish-herbert-base-ud29.tar.gz)|98.96|96.4|96.67|95.64|98|95.75|94.05|92.55|87.65|90.01| herbert-base |https://github.com/UniversalDependencies/UD_Polish_PDB/blob/r2.9/LICENSE.txt| +|UD_Polish_PDB|[polish-herbert-large-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/polish-herbert-large-ud29.tar.gz)|99.01|96.53|96.92|95.92|98.1|95.62|93.93|92.39|87.77|90.01| herbert-large |https://github.com/UniversalDependencies/UD_Polish_PDB/blob/r2.9/LICENSE.txt| +|UD_Turkish-Kenet|[turkish-kenet-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-kenet-ud29.tar.gz)|92.75|100|88.65|87.5|92.88|81.82|66.94|65.89|55.35|60.72| fastText |https://github.com/UniversalDependencies/UD_Turkish-Kenet/blob/r2.9/LICENSE.txt| +|UD_Icelandic-IcePaHC|[icelandic-icepahc-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/icelandic-icepahc-ud29.tar.gz)|96.32|92.1|90.56|85.29|95.56|86.98|82.83|77.58|64.84|73.64| fastText |https://github.com/UniversalDependencies/UD_Icelandic-IcePaHC/blob/r2.9/LICENSE.txt| +|UD_Dutch-LassySmall|[dutch-lassysmall-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/dutch-lassysmall-ud29.tar.gz)|95.66|93.78|95.48|92.64|89.75|89.37|85.25|80.6|74.4|67.45| fastText |https://github.com/UniversalDependencies/UD_Dutch-LassySmall/blob/r2.9/LICENSE.txt| +|UD_Bulgarian-BTB|[bulgarian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/bulgarian-ud29.tar.gz)|98.93|95.61|97.53|95.06|97.51|93.2|89.93|86.58|82.97|83.44| fastText |https://github.com/UniversalDependencies/UD_Bulgarian-BTB/blob/r2.9/LICENSE.txt| +|UD_Czech-CLTT|[czech-cltt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/czech-cltt-ud29.tar.gz)|98.74|89.51|89.34|88.01|95.11|86.33|83.38|80.06|69.3|75.57| fastText |https://github.com/UniversalDependencies/UD_Czech-CLTT/blob/r2.9/LICENSE.txt| +|UD_Serbian-SET|[serbian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/serbian-ud29.tar.gz)|97.86|93.14|93.31|92.49|96.64|89.9|86.66|84.03|75.91|80.65| fastText |https://github.com/UniversalDependencies/UD_Serbian-SET/blob/r2.9/LICENSE.txt| +|UD_Russian-Taiga|[russian-taiga-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/russian-taiga-ud29.tar.gz)|96.2|100|93.22|91.76|94.73|81.83|77.55|74.74|67.06|69.91| fastText |https://github.com/UniversalDependencies/UD_Russian-Taiga/blob/r2.9/LICENSE.txt| +|UD_Belarusian-HSE|[belarusian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/belarusian-ud29.tar.gz)|98.17|96.11|93.71|91.37|95.78|87.24|84.58|81.47|74.22|77| fastText |https://github.com/UniversalDependencies/UD_Belarusian-HSE/blob/r2.9/LICENSE.txt| +|UD_Indonesian-GSD|[indonesian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/indonesian-ud29.tar.gz)|94.45|91.18|95.87|86.15|97.65|86.93|80.38|76.36|70.21|74.31| fastText |https://github.com/UniversalDependencies/UD_Indonesian-GSD/blob/r2.9/LICENSE.txt| +|UD_Norwegian-NynorskLIA|[norwegian-nynorsklia-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/norwegian-nynorsklia-ud29.tar.gz)|93.53|100|92.03|89.47|96.68|76.72|70.75|65.36|57.09|62.72| fastText |https://github.com/UniversalDependencies/UD_Norwegian-NynorskLIA/blob/r2.9/LICENSE.txt| +|UD_Romanian-SiMoNERo|[romanian-simonero-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/romanian-simonero-ud29.tar.gz)|97.91|96.85|96.27|95.89|98.92|93.35|91.09|87.88|82.48|86.75| fastText |https://github.com/UniversalDependencies/UD_Romanian-SiMoNERo/blob/r2.9/LICENSE.txt| +|UD_Afrikaans-AfriBooms|[afrikaans-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/afrikaans-ud29.tar.gz)|96.7|90.57|96.37|90.02|97|87|83.33|76.83|71.14|73.02| fastText |https://github.com/UniversalDependencies/UD_Afrikaans-AfriBooms/blob/r2.9/LICENSE.txt| +|UD_Armenian-ArmTDP|[armenian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/armenian-ud29.tar.gz)|94.08|100|87.92|85.75|93.83|83.41|77.14|71.17|58.72|66.62| fastText |https://github.com/UniversalDependencies/UD_Armenian-ArmTDP/blob/r2.9/LICENSE.txt| +|UD_Catalan-AnCora|[catalan-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/catalan-ud29.tar.gz)|98.83|96.18|98.47|95.82|99.3|93.53|91.29|86.69|84.06|86.1| fastText |https://github.com/UniversalDependencies/UD_Catalan-AnCora/blob/r2.9/LICENSE.txt| +|UD_Czech-PDT|[czech-pdt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/czech-pdt-ud29.tar.gz)|98.98|96.44|96.63|95.61|98.59|93.44|91.54|90.04|85.97|88.56| fastText |https://github.com/UniversalDependencies/UD_Czech-PDT/blob/r2.9/LICENSE.txt| +|UD_Swedish-LinES|[swedish-lines-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/swedish-lines-ud29.tar.gz)|96.26|93.08|89.19|85.54|96.93|87.58|83.26|79.8|65.9|76.74| fastText |https://github.com/UniversalDependencies/UD_Swedish-LinES/blob/r2.9/LICENSE.txt| +|UD_French-Sequoia|[french-sequoia-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/french-sequoia-ud29.tar.gz)|97.75|100|100|97.75|98.36|90.84|88.56|84.39|82.27|82.6| fastText |https://github.com/UniversalDependencies/UD_French-Sequoia/blob/r2.9/LICENSE.txt| +|UD_Arabic-NYUAD|[arabic-nyuad-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/arabic-nyuad-ud29.tar.gz)|33.94|17.4|5.17|3.98|84.49|33.48|6.18|4.42|0.05|3.68| fastText |https://github.com/UniversalDependencies/UD_Arabic-NYUAD/blob/r2.9/LICENSE.txt| +|UD_French-FTB|[french-ftb-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/french-ftb-ud29.tar.gz)|28.36|100|23.29|9.83|99.8|27.38|12.53|9.89|4.41|9.48| fastText |https://github.com/UniversalDependencies/UD_French-FTB/blob/r2.9/LICENSE.txt| +|UD_Turkish-Tourism|[turkish-tourism-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-tourism-ud29.tar.gz)|98.39|100|94.47|94.11|98.9|95.61|89.46|86.63|79.02|85.64| fastText |https://github.com/UniversalDependencies/UD_Turkish-Tourism/blob/r2.9/LICENSE.txt| +|UD_Latin-UDante|[latin-udante-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/latin-udante-ud29.tar.gz)|86.59|65.23|71.34|61.08|83.82|65.28|54.53|44.94|27.36|37.31| fastText |https://github.com/UniversalDependencies/UD_Latin-UDante/blob/r2.9/LICENSE.txt| +|UD_Chinese-GSD|[chinese-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/chinese-gsd-ud29.tar.gz)|94.73|94.56|99.1|93.58|98.87|83.2|79.25|77.76|72.24|76.5| fastText |https://github.com/UniversalDependencies/UD_Chinese-GSD/blob/r2.9/LICENSE.txt| +|UD_Polish-LFG|[polish-lfg-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/polish-lfg-ud29.tar.gz)|98.01|93.84|94.93|92.66|97.32|95.9|93.85|92.02|85.49|89.18| fastText |https://github.com/UniversalDependencies/UD_Polish-LFG/blob/r2.9/LICENSE.txt| +|UD_Turkish-IMST|[turkish-imst-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-imst-ud29.tar.gz)|92.49|91.85|88.47|84.27|96.41|73.34|65.76|61.04|50.37|59.29| fastText |https://github.com/UniversalDependencies/UD_Turkish-IMST/blob/r2.9/LICENSE.txt| +|UD_Latin-LLCT|[latin-llct-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/latin-llct-ud29.tar.gz)|99.46|96.9|96.84|96.39|97.22|95.55|94.55|93.67|89.55|90.31| fastText |https://github.com/UniversalDependencies/UD_Latin-LLCT/blob/r2.9/LICENSE.txt| +|UD_Norwegian-Bokmaal|[norwegian-bokmaal-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/norwegian-bokmaal-ud29.tar.gz)|97.38|100|96.25|95.43|98.02|92.08|89.99|87.4|82.1|84.96| fastText |https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal/blob/r2.9/LICENSE.txt| +|UD_Italian-ISDT|[italian-isdt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/italian-isdt-ud29.tar.gz)|98.09|97.95|100|97.84|98.2|92.91|90.9|86.61|84.5|84.42| fastText |https://github.com/UniversalDependencies/UD_Italian-ISDT/blob/r2.9/LICENSE.txt| +|UD_Danish-DDT|[danish-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/danish-ud29.tar.gz)|96.8|100|95.99|94.89|97.14|85.61|82.84|79.63|73.1|76.78| fastText |https://github.com/UniversalDependencies/UD_Danish-DDT/blob/r2.9/LICENSE.txt| +|UD_Spanish-GSD|[spanish-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/spanish-gsd-ud29.tar.gz)|96.17|100|96.77|94.22|98.61|90.27|87.22|81.89|74.02|80.23| fastText |https://github.com/UniversalDependencies/UD_Spanish-GSD/blob/r2.9/LICENSE.txt| +|UD_Persian-Seraji|[persian-seraji-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/persian-seraji-ud29.tar.gz)|97.62|97.5|97.45|96.95|95.64|89.78|86.12|82.91|80.57|78.52| fastText |https://github.com/UniversalDependencies/UD_Persian-Seraji/blob/r2.9/LICENSE.txt| +|UD_Turkish-Atis|[turkish-atis-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-atis-ud29.tar.gz)|98.09|100|97.59|97.2|98.63|89.94|87.51|87.02|84.88|86.18| fastText |https://github.com/UniversalDependencies/UD_Turkish-Atis/blob/r2.9/LICENSE.txt| +|UD_Italian-PoSTWITA|[italian-postwita-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/italian-postwita-ud29.tar.gz)|95.39|95.06|95.71|93.03|96.49|84.9|79.6|74.38|68.51|71.57| fastText |https://github.com/UniversalDependencies/UD_Italian-PoSTWITA/blob/r2.9/LICENSE.txt| +|UD_Icelandic-Modern|[icelandic-modern-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/icelandic-modern-ud29.tar.gz)|98.98|97.43|98.02|97.19|98.73|94.31|92.75|90.97|88.74|89.83| fastText |https://github.com/UniversalDependencies/UD_Icelandic-Modern/blob/r2.9/LICENSE.txt| +|UD_Basque-BDT|[basque-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/basque-ud29.tar.gz)|94.58|100|91.42|88.58|95.6|84.64|80.61|79|68.55|75.24| fastText |https://github.com/UniversalDependencies/UD_Basque-BDT/blob/r2.9/LICENSE.txt| +|UD_Latvian-LVTB|[latvian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/latvian-ud29.tar.gz)|96.32|86.82|93.33|86.03|95.45|87.88|84.39|81.92|73.51|77.76| fastText |https://github.com/UniversalDependencies/UD_Latvian-LVTB/blob/r2.9/LICENSE.txt| +|UD_English-GUM|[english-gum-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-gum-ud29.tar.gz)|96.31|96.02|97.04|94.74|97.69|89|86.15|81.61|76.59|78.64| fastText |https://github.com/UniversalDependencies/UD_English-GUM/blob/r2.9/LICENSE.txt| +|UD_Welsh-CCG|[welsh-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/welsh-ud29.tar.gz)|92.58|90.77|86.14|82.55|89.72|83.37|75.6|68.95|53.54|58.95| fastText |https://github.com/UniversalDependencies/UD_Welsh-CCG/blob/r2.9/LICENSE.txt| +|UD_Russian-GSD|[russian-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/russian-gsd-ud29.tar.gz)|97.02|96.48|91.72|90.27|95.7|87.47|83.43|81.47|72.15|77.4| fastText |https://github.com/UniversalDependencies/UD_Russian-GSD/blob/r2.9/LICENSE.txt| +|UD_Finnish-TDT|[finnish-tdt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/finnish-tdt-ud29.tar.gz)|96.41|97.97|94.01|92.55|88.29|89.27|86.46|84.61|77.32|71.77| fastText |https://github.com/UniversalDependencies/UD_Finnish-TDT/blob/r2.9/LICENSE.txt| +|UD_Japanese-BCCWJLUW|[japanese-bccwjluw-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/japanese-bccwjluw-ud29.tar.gz)|32.92|100|100|32.92|100|36.29|22.31|11.28|5.91|11.28| fastText |https://github.com/UniversalDependencies/UD_Japanese-BCCWJLUW/blob/r2.9/LICENSE.txt| +|UD_Norwegian-Nynorsk|[norwegian-nynorsk-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/norwegian-nynorsk-ud29.tar.gz)|97.22|100|95.29|94.31|97.38|91.76|89.51|87.13|80.26|83.93| fastText |https://github.com/UniversalDependencies/UD_Norwegian-Nynorsk/blob/r2.9/LICENSE.txt| +|UD_Irish-IDT|[irish-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/irish-ud29.tar.gz)|94.01|93.71|88.33|84.39|93.53|85.57|79.24|73.83|59.54|67.72| fastText |https://github.com/UniversalDependencies/UD_Irish-IDT/blob/r2.9/LICENSE.txt| +|UD_Urdu-UDTB|[urdu-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/urdu-ud29.tar.gz)|93.6|91.66|82.84|78.25|96.1|86.58|80.76|74.62|54.91|70.95| fastText |https://github.com/UniversalDependencies/UD_Urdu-UDTB/blob/r2.9/LICENSE.txt| +|UD_Portuguese-Bosque|[portuguese-bosque-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/portuguese-bosque-ud29.tar.gz)|97.4|100|96.3|94.95|98.21|91.06|87.92|82.65|76.02|80.44| fastText |https://github.com/UniversalDependencies/UD_Portuguese-Bosque/blob/r2.9/LICENSE.txt| +|UD_Spanish-AnCora|[spanish-ancora-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/spanish-ancora-ud29.tar.gz)|98.83|95.97|98.57|95.4|99.34|92.38|90.09|85.84|83.37|85.23| fastText |https://github.com/UniversalDependencies/UD_Spanish-AnCora/blob/r2.9/LICENSE.txt| +|UD_Czech-FicTree|[czech-fictree-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/czech-fictree-ud29.tar.gz)|98.03|93.98|95.37|93.01|98.26|92.5|90.06|87.28|80.76|85.2| fastText |https://github.com/UniversalDependencies/UD_Czech-FicTree/blob/r2.9/LICENSE.txt| +|UD_Italian-VIT|[italian-vit-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/italian-vit-ud29.tar.gz)|97.64|96.91|97.34|95.64|98.41|89.73|86.15|80.57|76.01|78.86| fastText |https://github.com/UniversalDependencies/UD_Italian-VIT/blob/r2.9/LICENSE.txt| +|UD_Dutch-Alpino|[dutch-alpino-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/dutch-alpino-ud29.tar.gz)|96.37|94.28|96.48|93.5|89.09|91.45|88|82.53|76.73|67.01| fastText |https://github.com/UniversalDependencies/UD_Dutch-Alpino/blob/r2.9/LICENSE.txt| +|UD_Hindi-HDTB|[hindi-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/hindi-ud29.tar.gz)|97.05|96.59|93.92|91.5|98.83|94.58|91.24|87.49|77.22|86.19| fastText |https://github.com/UniversalDependencies/UD_Hindi-HDTB/blob/r2.9/LICENSE.txt| +|UD_Persian-PerDT|[persian-perdt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/persian-perdt-ud29.tar.gz)|97.31|97.22|97.78|95.23|98.89|93.3|90.9|89|85|87.8| fastText |https://github.com/UniversalDependencies/UD_Persian-PerDT/blob/r2.9/LICENSE.txt| +|UD_Japanese-GSD|[japanese-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/japanese-gsd-ud29.tar.gz)|98.29|96.88|100|96.51|98.9|94.05|92.73|88.76|86.52|87.53| fastText |https://github.com/UniversalDependencies/UD_Japanese-GSD/blob/r2.9/LICENSE.txt| +|UD_German-GSD|[german-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/german-gsd-ud29.tar.gz)|94.15|96.5|90|84.5|96.38|86.53|81.63|77.2|59.57|72.5| fastText |https://github.com/UniversalDependencies/UD_German-GSD/blob/r2.9/LICENSE.txt| +|UD_Slovak-SNK|[slovak-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/slovak-ud29.tar.gz)|95.24|86.04|89.75|84.73|94.86|90.08|87.15|85.16|72.59|79.93| fastText |https://github.com/UniversalDependencies/UD_Slovak-SNK/blob/r2.9/LICENSE.txt| +|UD_Uyghur-UDT|[uyghur-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/uyghur-ud29.tar.gz)|86.11|88.5|84.47|72.88|94.33|74.94|61.66|54.07|38.58|50.33| fastText |https://github.com/UniversalDependencies/UD_Uyghur-UDT/blob/r2.9/LICENSE.txt| +|UD_Slovenian-SSJ|[slovenian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/slovenian-ud29.tar.gz)|98.12|94.74|95.11|93.95|97.96|92.28|90.44|87.81|82|85.84| fastText |https://github.com/UniversalDependencies/UD_Slovenian-SSJ/blob/r2.9/LICENSE.txt| +|UD_Turkish-Penn|[turkish-penn-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-penn-ud29.tar.gz)|95.64|100|93.02|91.92|93.82|83.96|69.96|66.62|58.07|61.81| fastText |https://github.com/UniversalDependencies/UD_Turkish-Penn/blob/r2.9/LICENSE.txt| +|UD_English-ESL|[english-esl-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-esl-ud29.tar.gz)|24.26|19.46|100|14.31|100|31.25|6.75|5.52|1.45|5.52| fastText |https://github.com/UniversalDependencies/UD_English-ESL/blob/r2.9/LICENSE.txt| +|UD_Galician-CTG|[galician-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/galician-ud29.tar.gz)|97.27|95.83|100|95.53|98.3|85.82|82.78|77.39|70.62|75.79| fastText |https://github.com/UniversalDependencies/UD_Galician-CTG/blob/r2.9/LICENSE.txt| +|UD_Czech-CAC|[czech-cac-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/czech-cac-ud29.tar.gz)|99.04|95.49|95.47|94.47|97.74|92.39|90.25|88.36|83.05|85.91| fastText |https://github.com/UniversalDependencies/UD_Czech-CAC/blob/r2.9/LICENSE.txt| +|UD_Finnish-FTB|[finnish-ftb-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/finnish-ftb-ud29.tar.gz)|94.55|92.55|94.39|91.16|95.16|89.65|86.33|83.38|77.1|80.03| fastText |https://github.com/UniversalDependencies/UD_Finnish-FTB/blob/r2.9/LICENSE.txt| +|UD_Latin-ITTB|[latin-ittb-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/latin-ittb-ud29.tar.gz)|98.77|94.21|96.51|92.8|99.18|91.26|89.14|86.78|81.86|86.31| fastText |https://github.com/UniversalDependencies/UD_Latin-ITTB/blob/r2.9/LICENSE.txt| +|UD_Russian-SynTagRus|[russian-syntagrus-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/russian-syntagrus-ud29.tar.gz)|98.39|100|93.24|92.83|97.93|93.55|90.94|89.5|80.49|87.16| fastText |https://github.com/UniversalDependencies/UD_Russian-SynTagRus/blob/r2.9/LICENSE.txt| +|UD_Greek-GDT|[greek-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/greek-ud29.tar.gz)|97.31|97.34|93.68|92.82|95.51|91.01|88.55|83.71|75.08|77.83| fastText |https://github.com/UniversalDependencies/UD_Greek-GDT/blob/r2.9/LICENSE.txt| +|UD_Turkish-BOUN|[turkish-boun-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-boun-ud29.tar.gz)|90.29|90.77|91.48|84.19|95.27|77.93|70.78|68.21|55.56|64.27| fastText |https://github.com/UniversalDependencies/UD_Turkish-BOUN/blob/r2.9/LICENSE.txt| +|UD_Vietnamese-VTB|[vietnamese-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/vietnamese-ud29.tar.gz)|86.75|81.94|99.41|81.52|99.87|66.32|56.8|54.07|48.5|53.99| fastText |https://github.com/UniversalDependencies/UD_Vietnamese-VTB/blob/r2.9/LICENSE.txt| +|UD_Romanian-RRT|[romanian-rrt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/romanian-rrt-ud29.tar.gz)|97.77|96.73|96.96|96.34|97.95|90.61|86.52|82.75|78.43|80.99| fastText |https://github.com/UniversalDependencies/UD_Romanian-RRT/blob/r2.9/LICENSE.txt| +|UD_Japanese-GSDLUW|[japanese-gsdluw-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/japanese-gsdluw-ud29.tar.gz)|97.86|96.58|100|96.43|95.72|94.29|93.01|86.51|83.15|79.3| fastText |https://github.com/UniversalDependencies/UD_Japanese-GSDLUW/blob/r2.9/LICENSE.txt| +|UD_English-LinES|[english-lines-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-lines-ud29.tar.gz)|96.27|95.47|95.67|92.19|97.91|86.69|82.68|78.29|71.58|76.1| fastText |https://github.com/UniversalDependencies/UD_English-LinES/blob/r2.9/LICENSE.txt| +|UD_Polish-PDB|[polish-pdb-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/polish-pdb-ud29.tar.gz)|98.33|94.37|94.62|93.34|97.55|92.97|90.55|88.24|81.56|85.71| fastText |https://github.com/UniversalDependencies/UD_Polish-PDB/blob/r2.9/LICENSE.txt| +|UD_Telugu-MTG|[telugu-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/telugu-ud29.tar.gz)|91.96|92.37|100|91.82|100|91.54|82.11|77.78|73.18|77.78| fastText |https://github.com/UniversalDependencies/UD_Telugu-MTG/blob/r2.9/LICENSE.txt| +|UD_English-Atis|[english-atis-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-atis-ud29.tar.gz)|98.51|100|97.83|97.2|99.91|94.59|92.33|90.05|85.7|89.98| fastText |https://github.com/UniversalDependencies/UD_English-Atis/blob/r2.9/LICENSE.txt| +|UD_Hungarian-Szeged|[hungarian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/hungarian-ud29.tar.gz)|92.42|100|88.06|85.29|92.42|80.81|75.26|73.18|57.65|66| fastText |https://github.com/UniversalDependencies/UD_Hungarian-Szeged/blob/r2.9/LICENSE.txt| +|UD_French-Rhapsodie|[french-rhapsodie-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/french-rhapsodie-ud29.tar.gz)|94.61|97.79|92.26|88.2|96.44|81.24|76.01|69.06|59.42|65.88| fastText |https://github.com/UniversalDependencies/UD_French-Rhapsodie/blob/r2.9/LICENSE.txt| +|UD_Swedish-Talbanken|[swedish-talbanken-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/swedish-talbanken-ud29.tar.gz)|97.38|95.79|95.83|94.39|97.55|88.29|84.88|82.34|76.32|79.63| fastText |https://github.com/UniversalDependencies/UD_Swedish-Talbanken/blob/r2.9/LICENSE.txt| +|UD_Chinese-GSDSimp|[chinese-gsdsimp-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/chinese-gsdsimp-ud29.tar.gz)|94.54|94.39|99.13|93.29|98.98|82.77|78.57|77.07|71.55|75.89| fastText |https://github.com/UniversalDependencies/UD_Chinese-GSDSimp/blob/r2.9/LICENSE.txt| +|UD_Estonian-EDT|[estonian-edt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/estonian-edt-ud29.tar.gz)|97.06|97.93|95.38|93.91|85.22|87.75|84.68|82.93|77.25|65.94| fastText |https://github.com/UniversalDependencies/UD_Estonian-EDT/blob/r2.9/LICENSE.txt| +|UD_Japanese-BCCWJ|[japanese-bccwj-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/japanese-bccwj-ud29.tar.gz)|32.86|100|100|32.86|100|32.3|15.45|2.2|0.56|2.2| fastText |https://github.com/UniversalDependencies/UD_Japanese-BCCWJ/blob/r2.9/LICENSE.txt| +|UD_Italian-TWITTIRO|[italian-twittiro-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/italian-twittiro-ud29.tar.gz)|92.24|91.16|90.49|86.16|92.37|79.77|73.39|65.18|54.05|57.76| fastText |https://github.com/UniversalDependencies/UD_Italian-TWITTIRO/blob/r2.9/LICENSE.txt| +|UD_Portuguese-GSD|[portuguese-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/portuguese-gsd-ud29.tar.gz)|98.01|98|99.88|97.91|99.14|92.84|91.3|87.07|84.91|86.15| fastText |https://github.com/UniversalDependencies/UD_Portuguese-GSD/blob/r2.9/LICENSE.txt| +|UD_Romanian-Nonstandard|[romanian-nonstandard-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/romanian-nonstandard-ud29.tar.gz)|97.1|92.51|90.87|89.45|95.59|90.53|86.44|82.19|69.15|77.48| fastText |https://github.com/UniversalDependencies/UD_Romanian-Nonstandard/blob/r2.9/LICENSE.txt| +|UD_Italian-ParTUT|[italian-partut-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/italian-partut-ud29.tar.gz)|97.77|97.64|97.2|96.29|97.69|91.24|88.93|82.65|78.75|80.09| fastText |https://github.com/UniversalDependencies/UD_Italian-ParTUT/blob/r2.9/LICENSE.txt| +|UD_English-ParTUT|[english-partut-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-partut-ud29.tar.gz)|96.01|95.72|95.33|93.66|97.51|88.41|85.56|80.54|73.66|78.23| fastText |https://github.com/UniversalDependencies/UD_English-ParTUT/blob/r2.9/LICENSE.txt| +|UD_Maltese-MUDT|[maltese-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/maltese-ud29.tar.gz)|93.02|91.68|100|91.02|100|81.13|74.82|65.6|59.4|65.6| fastText |https://github.com/UniversalDependencies/UD_Maltese-MUDT/blob/r2.9/LICENSE.txt| +|UD_English-EWT|[english-ewt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-ewt-ud29.tar.gz)|95.76|95.37|96.48|93.43|96.96|89.25|86.43|83.3|77.53|80.17| fastText |https://github.com/UniversalDependencies/UD_English-EWT/blob/r2.9/LICENSE.txt| +|UD_Estonian-EWT|[estonian-ewt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/estonian-ewt-ud29.tar.gz)|91.16|94.05|88.09|84.48|82.93|76.86|70.4|66.36|55.58|52.79| fastText |https://github.com/UniversalDependencies/UD_Estonian-EWT/blob/r2.9/LICENSE.txt| +|UD_Lithuanian-ALKSNIS|[lithuanian-alksnis-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/lithuanian-alksnis-ud29.tar.gz)|93.16|86.49|88.35|84.77|91.74|78.34|73.17|70.62|60.75|64.71| fastText |https://github.com/UniversalDependencies/UD_Lithuanian-ALKSNIS/blob/r2.9/LICENSE.txt| +|UD_Hebrew-HTB|[hebrew-htb-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/hebrew-htb-ud29.tar.gz)|96.84|96.83|95.42|94.45|96.15|89.91|86.93|80.92|74.15|75.98| fastText |https://github.com/UniversalDependencies/UD_Hebrew-HTB/blob/r2.9/LICENSE.txt| +|UD_Latin-PROIEL|[latin-proiel-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/latin-proiel-ud29.tar.gz)|95.98|96.13|89.22|88|95.95|81.95|76.74|73.69|63.49|71.56| fastText |https://github.com/UniversalDependencies/UD_Latin-PROIEL/blob/r2.9/LICENSE.txt| +|UD_French-GSD|[french-gsd-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/french-gsd-ud29.tar.gz)|97.86|100|97.98|97.03|98.41|92.6|90.36|86.08|82.51|84.19| fastText |https://github.com/UniversalDependencies/UD_French-GSD/blob/r2.9/LICENSE.txt| +|UD_Ukrainian-IU|[ukrainian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/ukrainian-ud29.tar.gz)|96.09|91.07|91.2|89.47|96.54|85.8|82.35|78.71|69.97|75.88| fastText |https://github.com/UniversalDependencies/UD_Ukrainian-IU/blob/r2.9/LICENSE.txt| +|UD_Croatian-SET|[croatian-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/croatian-ud29.tar.gz)|97.83|94|94.57|93.35|96.79|89.39|85.51|82.79|75.46|79.2| fastText |https://github.com/UniversalDependencies/UD_Croatian-SET/blob/r2.9/LICENSE.txt| +|UD_Arabic-PADT|[arabic-padt-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/arabic-padt-ud29.tar.gz)|96.82|93.89|93.93|93.3|93.68|87.42|82.53|79.19|73.1|73.42| fastText |https://github.com/UniversalDependencies/UD_Arabic-PADT/blob/r2.9/LICENSE.txt| +|UD_Turkish-FrameNet|[turkish-framenet-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/turkish-framenet-ud29.tar.gz)|94|100|90.12|88.82|93.8|91|80.37|77.04|66.76|71.35| fastText |https://github.com/UniversalDependencies/UD_Turkish-FrameNet/blob/r2.9/LICENSE.txt| +|UD_English-GUMReddit|[english-gumreddit-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/english-gumreddit-ud29.tar.gz)|21.53|15.39|30.46|5.71|100|26.05|4.63|05.04|0|05.04| fastText |https://github.com/UniversalDependencies/UD_English-GUMReddit/blob/r2.9/LICENSE.txt| +|UD_Tamil-TTB|[tamil-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/tamil-ud29.tar.gz)|82.76|77.33|82.45|71.69|92.31|71.9|60.58|56.31|44.83|51.86| fastText |https://github.com/UniversalDependencies/UD_Tamil-TTB/blob/r2.9/LICENSE.txt| +|UD_French-ParTUT|[french-partut-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/french-partut-ud29.tar.gz)|96.5|96|93.66|92.66|96.58|91.39|88.71|84.02|73.11|79.17| fastText |https://github.com/UniversalDependencies/UD_French-ParTUT/blob/r2.9/LICENSE.txt| +|UD_Scottish_Gaelic-ARCOSG|[scottish-gaelic-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/scottish-gaelic-ud29.tar.gz)|95.27|89.46|91.8|87.94|95.23|85.92|81.11|75.86|66.46|71.11| fastText |https://github.com/UniversalDependencies/UD_Scottish_Gaelic-ARCOSG/blob/r2.9/LICENSE.txt| +|UD_Old_French-SRCMF|[old-french-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/old-french-ud29.tar.gz)|95.53|95.33|97.12|94.2|100|89.76|85.37|82.38|77.57|82.38| fastText - French |https://github.com/UniversalDependencies/UD_Old_French-SRCMF/blob/r2.9/LICENSE.txt| +|UD_Ancient_Greek-Perseus|[ancient-greek-perseus-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/ancient-greek-perseus-ud29.tar.gz)|90.17|80.42|87.09|78.93|83.8|76.96|70.29|63.75|46.19|49.43| fastText - Greek |https://github.com/UniversalDependencies/UD_Ancient_Greek-Perseus/blob/r2.9/LICENSE.txt| +|UD_Ancient_Greek-PROIEL|[ancient-greek-proiel-ud29](http://s3.clarin-pl.eu/dspace/combo/ud_29/ancient-greek-proiel-ud29.tar.gz)|97.42|97.56|90.99|89.79|95.6|85.56|81.06|76.31|65.19|72.3| fastText - Greek |https://github.com/UniversalDependencies/UD_Ancient_Greek-PROIEL/blob/r2.9/LICENSE.txt| +|UD_Western_Armenian-ArmTDP|[western-armenian-ud-29](http://s3.clarin-pl.eu/dspace/combo/ud_29/western-armenian-ud-29.tar.gz)|95.73|100|90.19|88.93|95.71|85.83|80.17|72.81|61.2|69.05| fastText - Armenian |https://github.com/UniversalDependencies/UD_Western_Armenian-ArmTDP/blob/r2.9/LICENSE.txt| +|UD_Classical_Chinese-Kyoto|[classical-chiense-kyoto](http://s3.clarin-pl.eu/dspace/combo/ud_29/classical-chiense-kyoto.tar.gz)|91.72|85.04|100|83.3|97.53|82.73|76.73|76|72.81|74.27| fastText |https://github.com/UniversalDependencies/UD_Classical_Chinese-Kyoto/blob/r2.9/LICENSE.txt| \ No newline at end of file diff --git a/scripts/UD division notebook.ipynb b/scripts/UD division notebook.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0765658c10763c48de5c3e30a4754e4e20e6bfb1 --- /dev/null +++ b/scripts/UD division notebook.ipynb @@ -0,0 +1,293 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8a556c39", + "metadata": {}, + "source": [ + "# Notebook used to divide set of UDs into 3 approximately same training files " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e2a3aa1d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from os import listdir\n", + "from os.path import isfile, join\n", + "from tqdm import tqdm\n", + "import pandas as pd\n", + "\n", + "def get_dir_size(path='.'):\n", + " total = 0\n", + " with os.scandir(path) as it:\n", + " for entry in it:\n", + " if entry.is_file():\n", + " total += entry.stat().st_size\n", + " elif entry.is_dir():\n", + " total += get_dir_size(entry.path)\n", + " return total" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c0e73444", + "metadata": {}, + "outputs": [], + "source": [ + "UD_dir = \"/home/pszenny/Downloads/ud2.9\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "28bab566", + "metadata": {}, + "outputs": [], + "source": [ + "list1 = [] # stores names of UD datasets to include in 1st training file\n", + "list2 = [] # stores names of UD datasets to include in 2nd training file\n", + "list3 = [] # stores names of UD datasets to include in 3rd training file\n", + "list1s = 0\n", + "list2s = 0\n", + "list3s = 0\n", + "has_no_lemma = []\n", + "has_no_upos = []\n", + "has_no_xpos = []\n", + "has_no_feats = []\n", + "has_no_head = []\n", + "has_no_deprel = []" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "41a56660", + "metadata": {}, + "outputs": [], + "source": [ + "for file in os.listdir(UD_dir):\n", + " d = os.path.join(UD_dir, file)\n", + " if os.path.isdir(d):\n", + " onlyfiles = [f for f in listdir(d) if isfile(join(d, f))]\n", + " has_dev = any([\"dev\" in f and \".conllu\" in f for f in onlyfiles])\n", + " has_test = any([\"test\" in f and \".conllu\" in f for f in onlyfiles])\n", + " has_train = any([\"train\" in f and \".conllu\" in f for f in onlyfiles])\n", + " if not (has_train and has_dev and has_test):\n", + " continue\n", + " for f in listdir(d):\n", + " tmp_path = \"\"\n", + " if \".conllu\" in f and (\"dev\" in f or \"test\" in f or \"train\" in f):\n", + " tmp_path = os.path.join(d, f)\n", + " if \".conllu\" in f and \"train\" in f:\n", + " train_file = os.path.join(d, f)\n", + " if tmp_path and os.path.getsize(tmp_path) < 1000:\n", + " continue\n", + " size = get_dir_size(d)\n", + " lemmas = []\n", + " upos = []\n", + " xpos = []\n", + " feats = []\n", + " head = []\n", + " deprel = []\n", + " # checking what data is inside training file\n", + " with open(train_file,'r') as rf:\n", + " for line in rf:\n", + " words = line.split(\"\\t\")\n", + " if len(words) == 10:\n", + " lemmas.append(words[2])\n", + " upos.append(words[3])\n", + " xpos.append(words[4])\n", + " feats.append(words[5])\n", + " head.append(words[6])\n", + " deprel.append(words[7])\n", + " if line == \"\\n\":\n", + " break\n", + " if set(lemmas)=={'_'}:\n", + " has_no_lemma.append(file)\n", + " if set(upos)=={'_'}:\n", + " has_no_upos.append(file)\n", + " if set(xpos)=={'_'}:\n", + " has_no_xpos.append(file)\n", + " if set(feats)=={'_'}:\n", + " has_no_feats.append(file)\n", + " if set(head)=={'_'}:\n", + " has_no_head.append(file)\n", + " if set(deprel)=={'_'}:\n", + " has_no_deprel.append(file)\n", + " # file division\n", + " if list1s == min(list1s,list2s,list3s):\n", + " list1.append(file)\n", + " list1s += size\n", + " continue\n", + " if list2s == min(list1s,list2s,list3s):\n", + " list2.append(file)\n", + " list2s += size\n", + " continue\n", + " if list3s == min(list1s,list2s,list3s):\n", + " list3.append(file)\n", + " list3s += size\n", + " continue" + ] + }, + { + "cell_type": "markdown", + "id": "73b7aed2", + "metadata": {}, + "source": [ + "Adjusting train files to lack of data in files" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "32fa08d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Only lack of XPOS: \n", + "\n", + "{'UD_Turkish-Penn', 'UD_Armenian-ArmTDP', 'UD_Russian-Taiga', 'UD_Portuguese-Bosque', 'UD_Danish-DDT', 'UD_Norwegian-NynorskLIA', 'UD_Hungarian-Szeged', 'UD_French-GSD', 'UD_Basque-BDT', 'UD_Western_Armenian-ArmTDP'}\n", + "\n", + "Only lack of FEATS: \n", + "\n", + "{'UD_Galician-CTG', 'UD_Korean-GSD', 'UD_Korean-Kaist', 'UD_Italian-ISDT'}\n", + "\n", + "Only lack of LEMMA: \n", + "\n", + "{'UD_Old_French-SRCMF'}\n", + "\n", + "Only lack of XPOS LEMMA: \n", + "\n", + "set()\n", + "\n", + "Only lack of FEATS LEMMA: \n", + "\n", + "{'UD_Swedish_Sign_Language-SSLC', 'UD_Maltese-MUDT', 'UD_English-ESL'}\n", + "\n", + "Only lack of XPOS FEATS: \n", + "\n", + "set()\n", + "\n", + "Only lack of XPOS FEATS LEMMA: \n", + "\n", + "set()\n" + ] + } + ], + "source": [ + "#ADJUST TRAIN FILES\n", + "train_file = set(list1) #change list1 to list2 or list3 to adjust all train files. \n", + "\n", + "no_lemma_feats_xpos = set(has_no_lemma).intersection(set(has_no_feats),set(has_no_xpos))\n", + "no_xpos_lemma = set(has_no_xpos).intersection(set(has_no_lemma)) - no_lemma_feats_xpos\n", + "no_xpos_feats = set(has_no_xpos).intersection(set(has_no_feats)) - no_lemma_feats_xpos\n", + "no_lemma_feats = set(has_no_lemma).intersection(set(has_no_feats)) - no_lemma_feats_xpos\n", + "no_xpos = set(has_no_xpos)-no_xpos_lemma-no_xpos_feats\n", + "no_lemma = set(has_no_lemma)-no_xpos_lemma-no_lemma_feats\n", + "no_feats = set(has_no_feats)-no_xpos_feats-no_lemma_feats\n", + "\n", + "print(\"\\nOnly lacks of XPOS: \\n\")\n", + "print(train_file.intersection(no_xpos))\n", + "print(\"\\nOnly lacks of FEATS: \\n\")\n", + "print(train_file.intersection(no_feats))\n", + "print(\"\\nOnly lacks of LEMMA: \\n\")\n", + "print(train_file.intersection(no_lemma))\n", + "\n", + "print(\"\\nOnly lacks of XPOS LEMMA: \\n\")\n", + "print(train_file.intersection(no_xpos_lemma))\n", + "print(\"\\nOnly lacks of FEATS LEMMA: \\n\")\n", + "print(train_file.intersection(no_lemma_feats))\n", + "print(\"\\nOnly lacks of XPOS FEATS: \\n\")\n", + "print(train_file.intersection(no_xpos_feats))\n", + "\n", + "print(\"\\nOnly lacks of XPOS FEATS LEMMA: \\n\")\n", + "print(train_file.intersection(no_lemma_feats_xpos))\n", + "\n", + "# output of this cell should be used to adjust train.py " + ] + }, + { + "cell_type": "markdown", + "id": "2d11c2b2", + "metadata": {}, + "source": [ + "Additional info about training files. Cells creating dataset with number of sentences per UD language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aacbff01", + "metadata": {}, + "outputs": [], + "source": [ + "datasets = []\n", + "sent_count = []\n", + "\n", + "for file in tqdm(os.listdir(UD_dir)):\n", + " d = os.path.join(UD_dir, file)\n", + " if os.path.isdir(d):\n", + " onlyfiles = [f for f in listdir(d) if isfile(join(d, f))]\n", + " has_dev = any([\"dev\" in f and \".conllu\" in f for f in onlyfiles])\n", + " has_test = any([\"test\" in f and \".conllu\" in f for f in onlyfiles])\n", + " has_train = any([\"train\" in f and \".conllu\" in f for f in onlyfiles])\n", + " if not (has_train and has_dev and has_test):\n", + " continue\n", + " for f in listdir(d):\n", + " tmp_path = \"\"\n", + " if \".conllu\" in f and (\"dev\" in f or \"test\" in f or \"train\" in f):\n", + " tmp_path = os.path.join(d, f)\n", + " if \".conllu\" in f and \"train\" in f:\n", + " train_file = os.path.join(d, f)\n", + " if tmp_path and os.path.getsize(tmp_path) < 1000:\n", + " continue\n", + " datasets.append(file)\n", + " sent_count.append(count_sentences(train_file))\n", + "data_sentences = pd.DataFrame(\n", + " {'set': datasets,\n", + " 'sent_count': sent_count})\n", + "data_sentences.sort_values(by=['sent_count'], inplace=True) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "724e8e37", + "metadata": {}, + "outputs": [], + "source": [ + "data_sentences.head(10)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/create_performance_table.py b/scripts/create_performance_table.py new file mode 100644 index 0000000000000000000000000000000000000000..eb19faccdf647cfbf098b1f5ca97a63a096535d4 --- /dev/null +++ b/scripts/create_performance_table.py @@ -0,0 +1,102 @@ +from conll18_ud_eval import * +from absl import app +from absl import flags +import pathlib +import csv + +# this script requires having conll18_ud_eval.py file in the same directory. It is available here +# https://universaldependencies.org/conll18/ + +FLAGS = flags.FLAGS +flags.DEFINE_string(name="pred_dir", default=r"/home/pszenny/Desktop/IPI_PAN/evaluate_UD/predictions_UD_29/pred", + help="Path to directory with predictions on test sets.") +flags.DEFINE_string(name="ud_dir", default=r"/home/pszenny/Desktop/IPI_PAN/evaluate_UD/predictions_UD_29/ud_files", + help="Path to directory with UD datasets up to UD_treebank/files .") +flags.DEFINE_string(name="models_dir", default=r"/tmp/lustre_shared/lukasz/models_UD_2.9", + help="Path to directory with trained models treebank/allennlp_folder/files.") +flags.DEFINE_string(name="UD_version", default="29", + help="UD version number.") +flags.DEFINE_string(name="URL_download", default="http://s3.clarin-pl.eu/dspace/combo/ud_29/{model}.tar.gz", + help="template URL to download model with {model} where model name should be placed.") +flags.DEFINE_string(name="URL_licence", + default="https://github.com/UniversalDependencies/{treebank}/blob/r2.9/LICENSE.txt", + help="template URL to license.txt with {treebank} where treebank name should be placed.") + + +def evaluate_wrapper(gold_file, system_file): + # function that overloads function from conll18_ud_eval.py + # Load CoNLL-U files + gold_ud = load_conllu_file(gold_file) + system_ud = load_conllu_file(system_file) + return evaluate(gold_ud, system_ud) + + +def run(_): + path_to_folder_with_predictions = pathlib.Path(FLAGS.pred_dir) + path_to_folder_with_ud = pathlib.Path(FLAGS.ud_dir) + path_to_folder_with_models = pathlib.Path(FLAGS.models_dir) + URL_download = FLAGS.URL_download + URL_licence = FLAGS.URL_licence + + # changing model name and creating dictionary with key: treebank value: model name + directory = list(path_to_folder_with_models.iterdir()) + treebank_model_name = {} + for filename in directory: + allen_folders = list(filename.iterdir()) + assert len(allen_folders) == 1, f"Multiple allen nlp serialization folders." + allen_folder = allen_folders[0] + language = str(filename).split("/")[-1].split("_")[1].split("-")[0] + if "model.tar.gz" not in [str(files).split("/")[-1] for files in list(allen_folder.iterdir())]: + continue + if sum(language in str(s) for s in directory) != 1: + new_name = str(filename).split("/")[-1].split("_")[1].lower() + f"-ud{FLAGS.UD_version}.tar.gz" + else: + new_name = language.lower() + f"-ud{FLAGS.UD_version}.tar.gz" + model_path = allen_folder / "model.tar.gz" + model_path.rename(pathlib.Path(allen_folder, new_name)) + treebank_model_name[filename] = new_name + + # evaluating models + all_result = [["Treebank", "Model name", "Model link", "UPOS", "XPOS", "UFeats", "AllTags", "Lemmas", "UAS", "LAS", + "CLAS", "MLAS", "BLEX", "LICENSE"]] + + for filename in list(path_to_folder_with_predictions.iterdir()): + path_to_predictions = path_to_folder_with_predictions / filename + folder_with_data = str(filename).split("/")[-1].replace("predictions_test.conllu", "") + ud_folder = path_to_folder_with_ud / folder_with_data + + ud_files = list(ud_folder.iterdir()) + test_file = [f for f in ud_files if "test" in f.name and ".conllu" in f.name] + assert len(test_file) == 1, f"Couldn't find training file." + test_file_path = test_file[0] + + evaluation = evaluate_wrapper(str(test_file_path), str(path_to_predictions)) + metrics_evaluation = [folder_with_data, treebank_model_name[folder_with_data], + URL_download.format(model=treebank_model_name[folder_with_data])] + for metric in ["UPOS", "XPOS", "UFeats", "AllTags", "Lemmas", "UAS", "LAS", "CLAS", + "MLAS", "BLEX"]: + metrics_evaluation.append(round(100 * evaluation[metric].precision, 2)) + metrics_evaluation.append(URL_licence.format(treebank=folder_with_data)) + all_result.append(metrics_evaluation) + + # saving google sheet performance table + with open("google_sheet.csv", "w", newline="") as f: + writer = csv.writer(f) + writer.writerows(all_result) + + # creating gitlab performance table + performance_table_gitlab = [] + for row in all_result: + new_row = "|" + row[0] + "|[" + row[1] + "](" + row[2] + ")|" + "|".join(row[3:]) + "|" + performance_table_gitlab.append(new_row) + + with open('performance_git.txt', 'w') as fo: + fo.write('\n'.join(str(i) for i in performance_table_gitlab)) + + +def main(): + app.run(run) + + +if __name__ == "__main__": + main() diff --git a/scripts/predict_UD.py b/scripts/predict_UD.py new file mode 100644 index 0000000000000000000000000000000000000000..da1aea86e01e0b3d49fcb73b668998e94c9302fe --- /dev/null +++ b/scripts/predict_UD.py @@ -0,0 +1,98 @@ +"""Script to train Dependency Parsing models based on UD 2.x data.""" +import pathlib + +from absl import app +from absl import flags + +from scripts import utils + +# # ls -1 | xargs -i echo "\"{}\"," +# UD 2.7 +TREEBANKS = [ + "UD_Norwegian-Bokmaal", + "UD_Norwegian-Nynorsk", + "UD_Norwegian-NynorskLIA", + "UD_Persian-PerDT", + "UD_Persian-Seraji", + "UD_Polish-LFG", + "UD_Polish-PDB", + "UD_Portuguese-Bosque", + "UD_Portuguese-GSD", + "UD_Romanian-Nonstandard", + "UD_Romanian-RRT", + "UD_Romanian-SiMoNERo", + "UD_Russian-GSD", + "UD_Russian-SynTagRus", + "UD_Russian-Taiga", + "UD_Serbian-SET", + "UD_Slovak-SNK", + "UD_Slovenian-SSJ", + "UD_Spanish-AnCora", + "UD_Spanish-GSD", + "UD_Swedish-LinES", + "UD_Swedish-Talbanken", + # "UD_Tamil-TTB", + "UD_Telugu-MTG", + "UD_Turkish-Atis", + "UD_Turkish-BOUN", + # "UD_Turkish-FrameNet", + "UD_Turkish-IMST", + "UD_Turkish-Kenet", + "UD_Turkish-Penn", + "UD_Turkish-Tourism", + "UD_Ukrainian-IU", + "UD_Urdu-UDTB", + "UD_Uyghur-UDT", + "UD_Vietnamese-VTB", + "UD_Welsh-CCG", +] + +FLAGS = flags.FLAGS +flags.DEFINE_list(name="treebanks", default=TREEBANKS, + help=f"Treebanks to predict") +flags.DEFINE_string(name="data_dir", default="/tmp/lustre_shared/lukasz/UD_2.9/", + help="Path to UD data directory.") +flags.DEFINE_string(name="output_dir", default="/tmp/lustre_shared/lukasz/predictions_UD_29/", + help="Output directory for predictions") +flags.DEFINE_string(name="models_dir", default="/tmp/lustre_shared/lukasz/models_UD_2.9/", + help="Directory where models are stored in subfolders with the name of treebank") + + +def run(_): + treebanks_dir = pathlib.Path(FLAGS.data_dir) + for treebank in FLAGS.treebanks: + assert treebank in TREEBANKS, f"Unknown treebank {treebank}." + treebank_dir = treebanks_dir / treebank + + files = list(treebank_dir.iterdir()) + + test_file = [f for f in files if "test" in f.name and ".conllu" in f.name] + assert len(test_file) == 1, f"Couldn't find training file." + test_file_path = test_file[0] + + output_path = pathlib.Path(FLAGS.output_dir) / (treebank + "predictions_test.conllu") + + model_directory = pathlib.Path(FLAGS.models_dir) / treebank + files = list(model_directory.iterdir()) + assert len(files) == 1, f"Couldn't find model directory file." + model_directory = model_directory / files[0] + + command = f""" + time combo --mode predict --model_path {model_directory} + --input_file {test_file_path} + --output_file {output_path} + --cuda_device 0 + --batch_size 32 + --silent + """ + + utils.execute_command(command) + + +def main(): + app.run(run) + + +if __name__ == "__main__": + main() + diff --git a/scripts/scripts_usage.md b/scripts/scripts_usage.md new file mode 100644 index 0000000000000000000000000000000000000000..e263e5e0213696ec261d2c519e20893c303c69df --- /dev/null +++ b/scripts/scripts_usage.md @@ -0,0 +1,12 @@ +# Training models on Universal dependency datasets +1. Download fasttext embeddings using: + - download_fasttext.py +2. Divide UD dataset into train batches, adjust train scripts, using: + - UD division notebook.ipynb +3. Train models by running adjusted train.py scripts +4. Predict on test sets using: + - predict_UD.py +5. Compute metrics for google sheet and performance table in gitlab, first one will be saved in google_sheet.csv, +the latter one in performance_git.txt. Script requires to download official ud evaluation script from +https://universaldependencies.org/conll18/ and place it the same directory as create_performance_table.py script + - create_performance_table.py \ No newline at end of file diff --git a/scripts/train.py b/scripts/train.py index b75bbedb258928c42dae898dce883bece4de286a..16b2a3705c91cc68603e07564cd16889984bdb8f 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -223,22 +223,33 @@ def run(_): """ # Datasets without XPOS - if treebank in {"UD_Armenian-ArmTDP", "UD_Basque-BDT", "UD_Danish-DDT", "UD_Hungarian-Szeged", "UD_French-GSD", - "UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}: + if treebank in {'UD_Danish-DDT', 'UD_Western_Armenian-ArmTDP', 'UD_Basque-BDT', 'UD_Hungarian-Szeged', 'UD_Russian-Taiga', 'UD_Portuguese-Bosque', 'UD_Norwegian-NynorskLIA', 'UD_Turkish-Penn', 'UD_French-GSD', 'UD_Armenian-ArmTDP'}: command = command + " --targets deprel,head,upostag,lemma,feats" # Datasets without FEATS - if treebank in {"UD_Japanese-GSD", "UD_Korean-Kaist"}: + if treebank in {'UD_Galician-CTG', 'UD_Italian-ISDT', 'UD_Korean-Kaist', 'UD_Korean-GSD'}: command = command + " --targets deprel,head,upostag,xpostag,lemma" + # Datasets without LEMMA + if treebank in {'UD_Old_French-SRCMF'}: + command = command + " --targets deprel,head,upostag,xpostag,feats" + + # Datasets without XPOS and LEMMA + if treebank in {}: + command = command + " --targets deprel,head,upostag,feats" + # Datasets without LEMMA and FEATS - if treebank in {"UD_Maltese-MUDT"}: + if treebank in {'UD_English-ESL', 'UD_Maltese-MUDT', 'UD_Swedish_Sign_Language-SSLC'}: command = command + " --targets deprel,head,upostag,xpostag" # Datasets without XPOS and FEATS - if treebank in {"UD_Telugu-MTG"}: + if treebank in {}: command = command + " --targets deprel,head,upostag,lemma" + # Datasets without XPOS, FEATS and LEMMA + if treebank in {}: + command = command + " --targets deprel,head,upostag" + # Reduce word_batch_size word_batch_size = 2500 if treebank in {"UD_German-HDT", "UD_Marathi-UFAL"}: diff --git a/setup.py b/setup.py index f4a82e59b63cbed2c1de8bebefe3e99d51db9f55..8790d0a018b8160ad804a39efcd86d8b9bb43f5b 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ REQUIREMENTS = [ 'jsonnet==0.15.0', 'filelock==3.0;python_version>="3.9"', 'numpy==1.19.4;python_version<"3.9"', - 'numpy==1.22.0;python_version>="3.9"', + 'numpy==1.22.0', 'overrides==3.1.0', 'requests==2.23.0', 'sentencepiece==0.1.83;python_version<"3.8"', @@ -18,8 +18,7 @@ REQUIREMENTS = [ 'scipy<1.6.0;python_version<"3.7"', # SciPy 1.6.0 works for 3.7+ 'scipy==1.6.0;python_version>="3.7"', 'spacy==2.3.2', - 'scikit-learn<=0.23.2;python_version<"3.9"', - 'scikit-learn==0.23.2;python_version>="3.9"', + 'scikit-learn==0.23.2', 'torch==1.7.1', 'tqdm==4.43.0', 'transformers==4.0.1', @@ -38,7 +37,8 @@ setup( keywords="nlp natural-language-processing dependency-parsing", setup_requires=['pytest-runner', 'pytest-pylint', - 'numpy==1.22.0;python_version>="3.9"', + 'scikit-learn==0.23.2', + 'numpy==1.22.0', 'scipy==1.6.0;python_version>="3.7"'], tests_require=['pytest', 'pylint'], python_requires='>=3.6',