Commit aa49c7f5 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski

Create converters for different tagsets

parent 20f14741
......@@ -57,7 +57,7 @@ class WcclAnnotator(object):
(to avoid recreation which is time-consuming).
"""
def __init__(self):
self.converter = None
self.converters = {}
def process(self, ccl_file_or_list, out_file, wccl_dict='', annotation='mwe',
tagset='nkjp', ann_2_wccl_dict=None,
......@@ -76,17 +76,17 @@ class WcclAnnotator(object):
ann_2_wccl_dict = read_dict(ann_2_wccl_dict, separator)
else:
ann_2_wccl_dict = {annotation: wccl_dict}
if not self.converter:
self.converter = MWEConverter(ann_2_wccl_dict, tagset=tagset)
if tagset not in self.converters:
self.converters[tagset] = MWEConverter(ann_2_wccl_dict, tagset=tagset)
if batch_mode:
self._run_batch(ccl_file_or_list, avoid_merging, out_list=out_file,
verbose=verbose)
tagset=tagset, verbose=verbose)
else:
assert out_file
self.converter.convert(ccl_file_or_list, out_file, avoid_merging)
self.converters[tagset].convert(ccl_file_or_list, out_file, avoid_merging)
def _run_batch(self, ccls_list_path, avoid_merging, out_list=None,
verbose=False):
tagset='nkjp', verbose=False):
out_files = []
with open(ccls_list_path, 'r') as ifile:
for ccl_file in ifile:
......@@ -95,7 +95,7 @@ class WcclAnnotator(object):
if verbose:
print(f"Processing file '{ccl_file}' into '{out_file}' ...")
try:
self.converter.convert(ccl_file, out_file, avoid_merging)
self.converters[tagset].convert(ccl_file, out_file, avoid_merging)
except Exception as e:
print(f"Cannot process file '{ccl_file}':")
print(e)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment