diff --git a/combo/data/dataset.py b/combo/data/dataset.py index 0b53df3284cde5d11e95ecd496059b6f55921e35..2877a65c8f64af347c96807dab9687a39c73b81b 100644 --- a/combo/data/dataset.py +++ b/combo/data/dataset.py @@ -124,7 +124,7 @@ class UniversalDependenciesDatasetReader(allen_data.DatasetReader): enhanced_deprels.append(deprels[idx]) t_deps = t["deps"] if t_deps and t_deps != "_": - t_heads, t_deprels = zip(*[tuple(d.split(":")) for d in t_deps.split("|")]) + t_deprels, t_heads = zip(*t_deps) enhanced_heads.extend([(idx, t) for t in t_heads]) enhanced_deprels.extend(t_deprels) fields_["enhanced_heads"] = allen_fields.AdjacencyField( @@ -137,7 +137,8 @@ class UniversalDependenciesDatasetReader(allen_data.DatasetReader): indices=enhanced_heads, sequence_field=text_field, labels=enhanced_deprels, - label_namespace="enhanced_deprels_labels", + # Label namespace should match regular tree parsing. + label_namespace="deprel_labels", padding_value=0, ) else: