Skip to content
Snippets Groups Projects
Select Git revision
  • ee349a1234894251fc0ece8baee48d4f71aac752
  • main default protected
  • ud_training_script
  • fix_seed
  • merged-with-ner
  • multiword_fix_transformer
  • transformer_encoder
  • combo3
  • save_deprel_matrix_to_npz
  • master protected
  • combo-lambo
  • lambo-sent-attributes
  • adding_lambo
  • develop
  • update_allenlp2
  • develop_tmp
  • tokens_truncation
  • LR_test
  • eud_iwpt
  • iob
  • eud_iwpt_shared_task_bert_finetuning
  • 3.3.1
  • list
  • 3.2.1
  • 3.0.3
  • 3.0.1
  • 3.0.0
  • v1.0.6
  • v1.0.5
  • v1.0.4
  • v1.0.3
  • v1.0.2
  • v1.0.1
  • v1.0.0
34 results

evaluate_iwpt21.py

Blame
  • sample_polem.py 1.27 KiB
    import time
    from poldeepner2.models import PolDeepNer2, ModelFactory
    
    resources_path = "../poldeepner2_models"
    t0 = time.time()
    model = ModelFactory.get_resource("pdn2_cen_n82_roberta_large_sq_krnnt_cuda"
                                      ".pdn2", resources_path)
    ner = PolDeepNer2.load(model)
    time_model = time.time() - t0
    
    sentences = ["Spotkałem Marka Nowaka na Politechnice Wrocławskiej, który "
                 "pracuje w Intelu.",
                 "Wczoraj mieliśmy kontrolę Naczelnej Izby Skarbowej.",
                 open("tests/resources/text_krakow.txt", "r",
                      encoding="utf-8").read()]
    
    token_count = 0
    
    t0 = time.time()
    for sentence in sentences:
        print("-" * 20)
        print(sentence.strip())
        doc = ner.process_document(sentence)
        token_count += len(doc.tokens)
    
        for name in doc.annotations:
            name_range = "%d:%d" % (name.start, name.end)
            char_range = "%d:%d" % (doc.tokens[name.start].start,
                                    doc.tokens[name.end - 1].end)
            print(f"{name_range:<8} {char_range:<12} {name.label:<25} "
                  f"{name.get_text():<25}  {name.lemma}")
        print()
    
    print()
    print(f"Model loaded in   : {time_model} seconds")
    print(f"Texts processed in: {time.time()-t0} seconds")
    print(f"Number of tokens  : {token_count}")