Skip to content
Snippets Groups Projects

Resolve "NKJP parameter tuning"

Files

@@ -311,12 +311,22 @@ def convert_examples_to_features_nosq(examples, label_list, max_seq_length, enco
assert len(valid) == max_seq_length
assert len(label_mask) == max_seq_length
features.append(
InputFeatures(input_ids=token_ids,
input_mask=input_mask,
label_id=label_ids,
valid_ids=valid,
label_mask=label_mask))
if ex_index < 2:
logging.debug("*** Example ***")
logging.debug("guid: %s" % example.guid)
logging.debug("tokens: %s" % " ".join([str(x) for x in token_ids]))
logging.debug("input_ids: %s" % " ".join([str(x) for x in token_ids]))
logging.debug("input_mask: %s" % " ".join([str(x) for x in input_mask]))
logging.debug("label: %s (id = %s)" % (example.label, " ".join(map(str, label_ids))))
logging.debug("label_mask: %s" % " ".join([str(x) for x in label_mask]))
logging.debug("valid mask: %s" % " ".join([str(x) for x in valid]))
features.append(InputFeatures(input_ids=token_ids,
input_mask=input_mask,
label_id=label_ids,
valid_ids=valid,
label_mask=label_mask))
return features
@@ -463,17 +473,15 @@ def read_tsv(filename, with_labels=False):
return data
def save_tsv(filename, outfilename, predictions):
flat_predictions = [item for sublist in predictions for item in sublist]
i = 0
with codecs.open(outfilename, "w", "utf8") as fout:
for line in codecs.open(filename, "r", "utf-8"):
cols = line.split("\t")
if len(line.strip()) == 0 or line.startswith('-DOCSTART'):
fout.write(line)
else:
fout.write('%s\t%s\n' % (cols[0], flat_predictions[i]))
i += 1
def save_tsv(output_path, sentences, predictions):
with codecs.open(output_path, "w", "utf8") as fout:
assert len(sentences) == len(predictions)
for tokens, labels in zip(sentences, predictions):
for token, label in zip(tokens, labels):
fout.write(f'{token}\t{label}\n')
fout.write("\n")
def get_dict_for_record(json_ann):
@@ -483,7 +491,8 @@ def get_dict_for_record(json_ann):
if ann.find('derivType') < 0:
if ann.strip() != '':
annotation = ann.split('\t')[1].split(' ')[0]
token = ann.split('\t')[-1]
token = ann.split('\t')[-1]
if token in token_dict.keys():
token_dict[token] = ''.join([token_dict[token],'#',annotation])
else:
Loading