Overview
Python package providing easy way to test correctness of annotations in CCL document.
Description
assert_has_ann
is the main method which checks if given tokens are annotated
with specified annotation and annotation is set correctly. For details,
check docstrings.
Dependencies
- deb packages
corpus2
- pip packages
corpus-ccl
Usage
AnnCfg = namedtuple('AnnCfg', 'tokens ann_names base_ann_value')
@pytest.mark.parametrize(
"ccl_file, wccl_dict, out_file, tagset, expected_anns",
[
pytest.param(
f"{TEST_ROOT_DIR}/doc_01.xml",
f"{TEST_ROOT_DIR}/wccl_dict_01.xml",
f"{TEST_OUT_DIR}/doc_01_annotated.xml",
"nkjp",
[
AnnCfg((0, 0, (0, 1)), TEST_ANNS, "plac zabaw"),
AnnCfg([(0, 0, 3)], TEST_ANNS, "tenis"),
AnnCfg((0, 0, (6, 7)), TEST_ANNS, "all inclusive"),
],
id="basic-test",
),
]
)
def test_annotations(
ccl_file: str,
wccl_dict: str,
out_file: str,
tagset: str,
expected_anns: List[AnnCfg],
):
doc = ccl.read(out_file)
doc_idx = ccl_ann_test.CclIndex(doc)
for exp in expected_anns:
tokens_indices = ccl_ann_test.expand_tok_idx(exp.tokens)
ccl_ann_test.assert_has_ann(
tokens_indices,
doc_idx,
exp.ann_names,
exp.base_ann_value,
)