Skip to content
Snippets Groups Projects
test_ccl.py 2.33 KiB
Newer Older
Michał Pogoda's avatar
Michał Pogoda committed
# from src.annotation_types_old import AnnotationTypes
from src.input_parsers.ccl import CCLInputParser
from tempfile import NamedTemporaryFile
Michał Pogoda's avatar
Michał Pogoda committed
from src.annotations import NerAnnotation, MorphosyntacticAnnotation

example_ccl = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE chunkList SYSTEM "ccl.dtd">
<chunkList>
 <chunk type="p" id="ch1">
  <sentence id="s1">
   <tok>
    <orth>Marek</orth>
    <lex disamb="1"><base>Marek</base><ctag>subst:sg:nom:m1</ctag></lex>
    <lex disamb="1"><base>marek</base><ctag>subst:sg:nom:m1</ctag></lex>
    <ann chan="nam_liv" head="1">1</ann>
    <ann chan="nam_loc">0</ann>
   </tok>
   <tok>
    <orth>Kowalski</orth>
    <lex disamb="1"><base>Kowalski</base><ctag>subst:sg:nom:m1</ctag></lex>
    <ann chan="nam_liv">1</ann>
    <ann chan="nam_loc">0</ann>
   </tok>
   <tok>
    <orth>pojechał</orth>
    <lex disamb="1"><base>pojechać</base><ctag>praet:sg:m1:perf</ctag></lex>
    <ann chan="nam_liv">0</ann>
    <ann chan="nam_loc">0</ann>
   </tok>
   <tok>
    <orth>do</orth>
    <lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex>
    <ann chan="nam_liv">0</ann>
    <ann chan="nam_loc">0</ann>
   </tok>
   <tok>
    <orth>Wrocławia</orth>
    <lex disamb="1"><base>Wrocław</base><ctag>subst:sg:gen:m3</ctag></lex>
    <ann chan="nam_liv">0</ann>
    <ann chan="nam_loc" head="1">1</ann>
   </tok>
   <ns/>
   <tok>
    <orth>.</orth>
    <lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
    <ann chan="nam_liv">0</ann>
    <ann chan="nam_loc">0</ann>
   </tok>
  </sentence>
 </chunk>
</chunkList>
"""

Michał Pogoda's avatar
Michał Pogoda committed
def test_ccl_input_parser():
    parser = CCLInputParser()
    text, annotations = parser.parse(example_ccl)
    assert text == "Marek Kowalski pojechał do Wrocławia."
Michał Pogoda's avatar
Michał Pogoda committed
    assert len(annotations) == 8
    
    assert (0, 14, NerAnnotation("nam_liv")) in annotations
    assert (27, 36, NerAnnotation("nam_loc")) in annotations
    
    assert (0, 5, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations
    assert (6, 14, MorphosyntacticAnnotation("subst:sg:nom:m1")) in annotations
    assert (15, 23, MorphosyntacticAnnotation("praet:sg:m1:perf")) in annotations
    assert (24, 26, MorphosyntacticAnnotation("prep:gen")) in annotations
    assert (27, 36, MorphosyntacticAnnotation("subst:sg:gen:m3")) in annotations
    assert (36, 37, MorphosyntacticAnnotation("interp")) in annotations