Skip to content
Snippets Groups Projects
Select Git revision
  • master
1 result

README.md

Blame
  • Overview

    Python package providing easy way to test correctness of annotations in CCL document.

    Description

    assert_has_ann is the main method which checks if given tokens are annotated with specified annotation and annotation is set correctly. For details, check docstrings.

    Dependencies

    1. deb packages
      • corpus2
    2. pip packages corpus-ccl

    Usage

    AnnCfg = namedtuple('AnnCfg', 'tokens ann_names base_ann_value')
    
    @pytest.mark.parametrize(
        "ccl_file, wccl_dict, out_file, tagset, expected_anns",
        [
            pytest.param(
                f"{TEST_ROOT_DIR}/doc_01.xml",
                f"{TEST_ROOT_DIR}/wccl_dict_01.xml",
                f"{TEST_OUT_DIR}/doc_01_annotated.xml",
                "nkjp",
                [
                    AnnCfg((0, 0, (0, 1)), TEST_ANNS, "plac zabaw"),
                    AnnCfg([(0, 0, 3)], TEST_ANNS, "tenis"),
                    AnnCfg((0, 0, (6, 7)), TEST_ANNS, "all inclusive"),
                ],
                id="basic-test",
            ),
        ]
    )
    def test_annotations(
        ccl_file: str,
        wccl_dict: str,
        out_file: str,
        tagset: str,
        expected_anns: List[AnnCfg],
    ):
        doc = ccl.read(out_file)
        doc_idx = ccl_ann_test.CclIndex(doc)
        for exp in expected_anns:
            tokens_indices = ccl_ann_test.expand_tok_idx(exp.tokens)
            ccl_ann_test.assert_has_ann(
                tokens_indices,
                doc_idx,
                exp.ann_names,
                exp.base_ann_value,
            )