Skip to content
Snippets Groups Projects
Commit c7a903ad authored by Michał Pogoda's avatar Michał Pogoda
Browse files

Added option to run from cli

parent 7eb47376
2 merge requests!10Anonimizer v2,!7Better coverage
Pipeline #6745 failed with stage
in 31 seconds
...@@ -139,3 +139,5 @@ cython_debug/ ...@@ -139,3 +139,5 @@ cython_debug/
.vscode .vscode
*.ipynb *.ipynb
/test.txt
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE chunkList SYSTEM "ccl.dtd">
<chunkList>
<chunk type="p" id="ch1">
<sentence id="s1">
<tok>
<orth>Marek</orth>
<lex disamb="1"><base>Marek</base><ctag>subst:sg:nom:m1</ctag></lex>
<lex disamb="1"><base>marek</base><ctag>subst:sg:nom:m1</ctag></lex>
<ann chan="person_first_nam" head="1">1</ann>
<ann chan="person_last_nam">0</ann>
<ann chan="city_nam">0</ann>
</tok>
<tok>
<orth>Kowalski</orth>
<lex disamb="1"><base>Kowalski</base><ctag>subst:sg:nom:m1</ctag></lex>
<ann chan="person_first_nam">0</ann>
<ann chan="person_last_nam" head="1">1</ann>
<ann chan="city_nam">0</ann>
</tok>
<tok>
<orth>pojechał</orth>
<lex disamb="1"><base>pojechać</base><ctag>praet:sg:m1:perf</ctag></lex>
<ann chan="person_first_nam">0</ann>
<ann chan="person_last_nam">0</ann>
<ann chan="city_nam">0</ann>
</tok>
<tok>
<orth>do</orth>
<lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex>
<ann chan="person_first_nam">0</ann>
<ann chan="person_last_nam">0</ann>
<ann chan="city_nam">0</ann>
</tok>
<tok>
<orth>Wrocławia</orth>
<lex disamb="1"><base>Wrocław</base><ctag>subst:sg:gen:m3</ctag></lex>
<ann chan="person_first_nam">0</ann>
<ann chan="person_last_nam">0</ann>
<ann chan="city_nam" head="1">1</ann>
</tok>
</sentence>
</chunk>
</chunkList>
\ No newline at end of file
--index-url https://pypi.clarin-pl.eu/simple/
nlp-ws nlp-ws
regex==2020.10.28 regex==2020.10.28
Babel==2.8.0 Babel==2.8.0
\ No newline at end of file
"""Implementation of anonymizer service."""
import argparse
from src.worker import Worker
from src.anonymizers.polish_anonymizer import PolishAnonymizer
from src.anonymizers.english_anonymizer import EnglishAnonymizer
from src.anonymizers.russian_anonymizer import RussianAnonymizer
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="anonymizer")
parser.add_argument(
"-l", "--language", help="Language of the input text", default="pl"
)
parser.add_argument("-m", "--method", help="Anonymization method", default="tag", choices=["delete", "tag", "pseudo"])
parser.add_argument("input_file", help="Path to input file")
parser.add_argument("output_file", help="Path to output file")
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
task_options = {
"method": args.method,
}
anonymizers = {
"pl": PolishAnonymizer,
"en": EnglishAnonymizer,
"ru": RussianAnonymizer,
}
anon = anonymizers.get(args.language, PolishAnonymizer)(task_options)
anon.process(args.input_file, args.output_file)
if __name__ == "__main__":
main()
...@@ -79,7 +79,7 @@ class PolishAnonymizer(BaseAnonymizer): ...@@ -79,7 +79,7 @@ class PolishAnonymizer(BaseAnonymizer):
self._pseudo_ann_list = list() self._pseudo_ann_list = list()
self._load_file() self._load_file()
def _load_file(self, filename='pl_dict.txt'): def _load_file(self, filename='dictionaries/pl_dict.txt'):
with open(filename, 'r', encoding='utf-8') as f: with open(filename, 'r', encoding='utf-8') as f:
for line in f.readlines(): for line in f.readlines():
l_list = line.split() l_list = line.split()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment