# from src.annotation_types_old import AnnotationTypes from src.input_parsers.ccl import CCLInputParser from tempfile import NamedTemporaryFile from src.annotations import NerAnnotation, MorphosyntacticAnnotation example_ccl = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE chunkList SYSTEM "ccl.dtd"> <chunkList> <chunk type="p" id="ch1"> <sentence id="s1"> <tok> <orth>Marek</orth> <lex disamb="1"><base>Marek</base><ctag>subst:sg:nom:m1</ctag></lex> <lex disamb="1"><base>marek</base><ctag>subst:sg:nom:m1</ctag></lex> <ann chan="nam_liv" head="1">1</ann> <ann chan="nam_loc">0</ann> </tok> <tok> <orth>Kowalski</orth> <lex disamb="1"><base>Kowalski</base><ctag>subst:sg:nom:m1</ctag></lex> <ann chan="nam_liv">1</ann> <ann chan="nam_loc">0</ann> </tok> <tok> <orth>pojechał</orth> <lex disamb="1"><base>pojechać</base><ctag>praet:sg:m1:perf</ctag></lex> <ann chan="nam_liv">0</ann> <ann chan="nam_loc">0</ann> </tok> <tok> <orth>do</orth> <lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex> <ann chan="nam_liv">0</ann> <ann chan="nam_loc">0</ann> </tok> <tok> <orth>Wrocławia</orth> <lex disamb="1"><base>Wrocław</base><ctag>subst:sg:gen:m3</ctag></lex> <ann chan="nam_liv">0</ann> <ann chan="nam_loc" head="1">1</ann> </tok> <ns/> <tok> <orth>.</orth> <lex disamb="1"><base>.</base><ctag>interp</ctag></lex> <ann chan="nam_liv">0</ann> <ann chan="nam_loc">0</ann> </tok> </sentence> </chunk> </chunkList> """ def test_ccl_input_parser(): parser = CCLInputParser() with NamedTemporaryFile() as f: f.write(example_ccl.encode("utf-8")) f.flush() text, annotations = parser.parse(f.name) assert text == "Marek Kowalski pojechał do Wrocławia." assert len(annotations) == 8 assert (0, 14, NerAnnotation("nam_liv")) in annotations assert (27, 36, NerAnnotation("nam_loc")) in annotations assert (0, 5, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations assert (6, 14, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations assert (15, 23, MorphosyntacticAnnotation("praet:sg:m1:perf")) in annotations assert (24, 26, MorphosyntacticAnnotation("prep:gen")) in annotations assert (27, 36, MorphosyntacticAnnotation("subst:sg:gen:m3")) in annotations assert (36, 37, MorphosyntacticAnnotation("interp")) in annotations