Make targets and features flags as list.

3e11893a · Mateusz Klimaszewski · 90c6fbee · 3e11893a · 3e11893a · 3e11893a
Commit 3e11893a authored 5 years ago by Mateusz Klimaszewski
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Examples (for clarity without training/validation data paths):
 * train on gpu 0
    ```bash
-    combo --mode train --cuda_davice 0
+    combo --mode train --cuda_device 0
    ```
 * use pretrained embeddings:
@@ -42,13 +42,13 @@ Examples (for clarity without training/validation data paths):
 * predict only dependency tree:
    ```bash
-    combo --mode train --targets head --targets deprel
+    combo --mode train --targets head,deprel
    ```
 * use part-of-speech tags for predicting only dependency tree
    ```bash
-    combo --mode train --targets head --targets deprel --features token --features char --features upostag
+    combo --mode train --targets head,deprel --features token,char,upostag
    ```
 Advanced configuration: [Configuration](#configuration)

--- a/combo/data/dataset.py
+++ b/combo/data/dataset.py
@@ -69,7 +69,7 @@ class UniversalDependenciesDatasetReader(allen_data.DatasetReader):
    @overrides
    def _read(self, file_path: str) -> Iterable[allen_data.Instance]:
-        file_path = [file_path] if len(file_path.split(":")) == 0 else file_path.split(":")
+        file_path = [file_path] if len(file_path.split(",")) == 0 else file_path.split(",")
        for conllu_file in file_path:
            with open(conllu_file, "r") as file:

--- a/combo/main.py
+++ b/combo/main.py
@@ -17,6 +17,8 @@ from combo.data import dataset
 from combo.utils import checks
 logger = logging.getLogger(__name__)
+_FEATURES = ["token", "char", "upostag", "xpostag", "lemma", "feats"]
+_TARGETS = ["deprel", "feats", "head", "lemma", "upostag", "xpostag", "semrel", "sent"]
 FLAGS = flags.FLAGS
 flags.DEFINE_enum(name="mode", default=None, enum_values=["train", "predict"],
@@ -30,9 +32,9 @@ flags.DEFINE_string(name="output_file", default="output.log",
 # Training flags
 flags.DEFINE_list(name="training_data_path", default="./tests/fixtures/example.conllu",
-                  help="Training data path")
+                  help="Training data path(s)")
 flags.DEFINE_list(name="validation_data_path", default="",
-                  help="Validation data path")
+                  help="Validation data path(s)")
 flags.DEFINE_string(name="pretrained_tokens", default="",
                    help="Pretrained tokens embeddings path")
 flags.DEFINE_integer(name="embedding_dim", default=300,
@@ -42,14 +44,12 @@ flags.DEFINE_integer(name="num_epochs", default=400,
 flags.DEFINE_integer(name="word_batch_size", default=2500,
                     help="Minimum words in batch")
 flags.DEFINE_string(name="pretrained_transformer_name", default="",
-                    help="Pretrained transformer model name (see transformers from HuggingFace library for list of"
+                    help="Pretrained transformer model name (see transformers from HuggingFace library for list of "
                         "available models) for transformers based embeddings.")
-flags.DEFINE_multi_enum(name="features", default=["token", "char"],
+flags.DEFINE_list(name="features", default=["token", "char"],
-                        enum_values=["token", "char", "upostag", "xpostag", "lemma", "feats"],
+                  help=f"Features used to train model (required 'token' and 'char'). Possible values: {_FEATURES}.")
-                        help="Features used to train model (required 'token' and 'char')")
+flags.DEFINE_list(name="targets", default=["deprel", "feats", "head", "lemma", "upostag", "xpostag"],
-flags.DEFINE_multi_enum(name="targets", default=["deprel", "feats", "head", "lemma", "upostag", "xpostag"],
+                  help=f"Targets of the model (required `deprel` and `head`). Possible values: {_TARGETS}.")
-                        enum_values=["deprel", "feats", "head", "lemma", "upostag", "xpostag", "semrel", "sent"],
-                        help="Targets of the model (required `deprel` and `head`)")
 flags.DEFINE_string(name="serialization_dir", default=None,
                    help="Model serialization directory (default - system temp dir).")
 flags.DEFINE_boolean(name="tensorboard", default=False,
@@ -189,10 +189,22 @@ def _get_ext_vars(finetuning: bool = False) -> Dict:
 def main():
    """Parse flags."""
+    flags.register_validator(
+        "features",
+        lambda values: all(
+            value in _FEATURES for value in values),
+        message="Flags --features contains unknown value(s)."
+    )
    flags.register_validator(
        "mode",
        lambda value: value is not None,
        message="Flag --mode must be set with either `predict` or `train` value")
+    flags.register_validator(
+        "targets",
+        lambda values: all(
+            value in _TARGETS for value in values),
+        message="Flag --targets contains unknown value(s)."
+    )
    app.run(run)

--- a/config.template.jsonnet
+++ b/config.template.jsonnet
@@ -3,10 +3,10 @@
 ########################################################################################
 # Training data path, str
 # Must be in CONNLU format (or it's extended version with semantic relation field).
-# Can accepted multiple paths when concatenated with ':', "path1:path2"
+# Can accepted multiple paths when concatenated with ',', "path1,path2"
 local training_data_path = std.extVar("training_data_path");
 # Validation data path, str
-# Can accepted multiple paths when concatenated with ':', "path1:path2"
+# Can accepted multiple paths when concatenated with ',', "path1,path2"
 local validation_data_path = if std.length(std.extVar("validation_data_path")) > 0 then std.extVar("validation_data_path");
 # Path to pretrained tokens, str or null
 local pretrained_tokens = if std.length(std.extVar("pretrained_tokens")) > 0 then std.extVar("pretrained_tokens");
@@ -36,13 +36,13 @@ local embedding_dim = std.parseInt(std.extVar("embedding_dim"));
 local predictors_dropout = 0.25;
 # Xpostag embedding dimension, int
 # (discarded if xpostag not in features)
-local xpostag_dim = 100;
+local xpostag_dim = 32;
 # Upostag embedding dimension, int
 # (discarded if upostag not in features)
-local upostag_dim = 100;
+local upostag_dim = 32;
 # Feats embedding dimension, int
 # (discarded if feats not in featres)
-local feats_dim = 100;
+local feats_dim = 32;
 # Lemma embedding dimension, int
 # (discarded if lemma not in features)
 local lemma_char_dim = 64;

--- a/setup.py
+++ b/setup.py
@@ -3,11 +3,11 @@ from setuptools import find_packages, setup
 REQUIREMENTS = [
    'absl-py==0.9.0',
-    'allennlp==1.0.0rc4',
+    'allennlp==1.0.0rc5',
    'conllu==2.3.2',
    'joblib==0.14.1',
    'jsonnet==0.15.0',
-    'overrides==2.8.0',
+    'overrides==3.0.0',
    'tensorboard==2.1.0',
    'torch==1.5.0',
    'torchvision==0.6.0',