Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Merge requests
!36
Release 1.0.4
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Release 1.0.4
candidate_release_1.0.4
into
develop
Overview
0
Commits
28
Pipelines
1
Changes
21
Merged
Mateusz Klimaszewski
requested to merge
candidate_release_1.0.4
into
develop
3 years ago
Overview
0
Commits
28
Pipelines
1
Changes
2
Expand
0
0
Merge request reports
Viewing commit
d12e5ec7
Prev
Next
Show latest version
2 files
+
26
−
9
Expand all files
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
2
Search (e.g. *.vue) (Ctrl+P)
d12e5ec7
Add lv local model. Merge raw txt files.
· d12e5ec7
Mateusz Klimaszewski
authored
4 years ago
scripts/train_iwpt21.py
+
25
−
9
Options
@@ -36,7 +36,7 @@ FLAGS = flags.FLAGS
flags
.
DEFINE_list
(
name
=
"
lang
"
,
default
=
list
(
LANG2TREEBANK
.
keys
()),
help
=
f
"
Language of models to train. Possible values:
{
LANG2TREEBANK
.
keys
()
}
.
"
)
flags
.
DEFINE_string
(
name
=
"
data_dir
"
,
default
=
""
,
help
=
"
Path to
'
iwpt2020st
data
'
directory.
"
)
help
=
"
Path to
IWPT
'
21
data directory.
"
)
flags
.
DEFINE_string
(
name
=
"
serialization_dir
"
,
default
=
"
/tmp/
"
,
help
=
"
Model serialization dir.
"
)
flags
.
DEFINE_integer
(
name
=
"
cuda_device
"
,
default
=-
1
,
@@ -68,9 +68,11 @@ def run(_):
assert
data_dir
.
is_dir
(),
f
"'
{
data_dir
}
'
is not a directory!
"
treebanks
=
LANG2TREEBANK
[
lang
]
full_language
=
treebanks
[
0
].
split
(
"
-
"
)[
0
]
train_paths
=
[]
dev_paths
=
[]
train_raw_paths
=
[]
dev_raw_paths
=
[]
# TODO Uncomment when IWPT'21 Shared Task ends.
# During shared task duration test data is not available.
test_paths
=
[]
@@ -90,19 +92,33 @@ def run(_):
# elif "test" in name:
# collapse_nodes(data_dir, treebank_file, output)
# test_paths.append(output)
if
"
.txt
"
in
name
:
if
"
train
"
in
name
:
train_raw_paths
.
append
(
path_to_str
(
treebank_file
))
elif
"
dev
"
in
name
:
dev_raw_paths
.
append
(
path_to_str
(
treebank_file
))
lang_data_dir
=
pathlib
.
Path
(
data_dir
/
lang
)
merged_dataset_name
=
"
IWPT
"
lang_data_dir
=
pathlib
.
Path
(
data_dir
/
f
"
UD_
{
full_language
}
-
{
merged_dataset_name
}
"
)
lang_data_dir
.
mkdir
(
exist_ok
=
True
)
train_path
=
lang_data_dir
/
"
train.conllu
"
dev_path
=
lang_data_dir
/
"
dev.conllu
"
# TODO Uncomment
# test_path = lang_data_dir / "test.conllu"
suffix
=
f
"
{
lang
}
_
{
merged_dataset_name
}
-ud
"
.
lower
()
train_path
=
lang_data_dir
/
f
"
{
suffix
}
-train.conllu
"
dev_path
=
lang_data_dir
/
f
"
{
suffix
}
-dev.conllu
"
test_path
=
lang_data_dir
/
f
"
{
suffix
}
-test.conllu
"
train_raw_path
=
lang_data_dir
/
f
"
{
suffix
}
-train.txt
"
dev_raw_path
=
lang_data_dir
/
f
"
{
suffix
}
-dev.txt
"
test_raw_path
=
lang_data_dir
/
f
"
{
suffix
}
-test.txt
"
merge_files
(
train_paths
,
output
=
train_path
)
merge_files
(
dev_paths
,
output
=
dev_path
)
# TODO Uncomment
# merge_files(test_paths, output=test_path)
# TODO Change to test_paths instead of dev_paths after IWPT'21
merge_files
(
dev_paths
,
output
=
test_path
)
merge_files
(
train_raw_paths
,
output
=
train_raw_path
)
merge_files
(
dev_raw_paths
,
output
=
dev_raw_path
)
# TODO Change to test_raw_paths instead of dev_paths after IWPT'21
merge_files
(
dev_raw_paths
,
output
=
test_raw_path
)
serialization_dir
=
pathlib
.
Path
(
FLAGS
.
serialization_dir
)
/
lang
serialization_dir
.
mkdir
(
exist_ok
=
True
,
parents
=
True
)