Skip to content
Snippets Groups Projects
Name Last commit Last update
ccl_ann_test
README.md
requirements.txt
setup.py

Overview

Python package providing easy way to test correctness of annotations in CCL document.

Description

assert_has_ann is the main method which checks if given tokens are annotated with specified annotation and annotation is set correctly. For details, check docstrings.

Dependencies

  1. deb packages
    • corpus2
  2. pip packages corpus-ccl

Usage

AnnCfg = namedtuple('AnnCfg', 'tokens ann_names base_ann_value')

@pytest.mark.parametrize(
    "ccl_file, wccl_dict, out_file, tagset, expected_anns",
    [
        pytest.param(
            f"{TEST_ROOT_DIR}/doc_01.xml",
            f"{TEST_ROOT_DIR}/wccl_dict_01.xml",
            f"{TEST_OUT_DIR}/doc_01_annotated.xml",
            "nkjp",
            [
                AnnCfg((0, 0, (0, 1)), TEST_ANNS, "plac zabaw"),
                AnnCfg([(0, 0, 3)], TEST_ANNS, "tenis"),
                AnnCfg((0, 0, (6, 7)), TEST_ANNS, "all inclusive"),
            ],
            id="basic-test",
        ),
    ]
)
def test_annotations(
    ccl_file: str,
    wccl_dict: str,
    out_file: str,
    tagset: str,
    expected_anns: List[AnnCfg],
):
    doc = ccl.read(out_file)
    doc_idx = ccl_ann_test.CclIndex(doc)
    for exp in expected_anns:
        tokens_indices = ccl_ann_test.expand_tok_idx(exp.tokens)
        ccl_ann_test.assert_has_ann(
            tokens_indices,
            doc_idx,
            exp.ann_names,
            exp.base_ann_value,
        )