Skip to content
Snippets Groups Projects
Commit 3bed04eb authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski
Browse files

Add possibility to set annotation for expression (more than one token)

parent 9328f692
No related merge requests found
Pipeline #2684 passed with stage
in 29 seconds
......@@ -13,6 +13,7 @@ __all__ = [
'get_annotations',
'get_annotation',
'set_annotation_for_token',
'set_annotation_for_tokens',
'is_head_of'
]
......@@ -195,12 +196,34 @@ def set_annotation_for_token(sentence, token, key, value=None, set_head=False):
value (int, bool): annotation number (convertible to integer)
"""
idx = 0 if set_head else None
set_annotation_for_tokens(sentence, [token], key, value=value, head_index=idx)
def set_annotation_for_tokens(sentence, tokens, key, value=None, head_index=None):
"""
Set annotation for a group of tokens from same sentence. Tokens are treated
as representation of single expression and thus they get same annotation
number (value).
Args:
sentence (Corpus2.Sentence)
tokens (list of Corpus2.Token)
key (str): a name for annotation channel
value (int, bool): annotation number (convertible to integer)
head_index(int): index of token from passed list (counting starts from 0),
which will be marked as a head of annotation. If not
given, then head will not be set.
"""
if not tokens or not isinstance(tokens, list):
raise ValueError(f"List of tokens not given or invalid format: "
f"{tokens} (type: {type(tokens)})")
ann_sentence = annotate_sentence(sentence)
if key not in ann_sentence.all_channels():
ann_sentence.create_channel(key)
channel = ann_sentence.get_channel(key)
token_index = _find_token(sentence, token)
if value is not None:
try:
segment = int(value)
......@@ -208,9 +231,16 @@ def set_annotation_for_token(sentence, token, key, value=None, set_head=False):
raise Exception("Wrong value type - should be an integer.")
else:
segment = channel.get_new_segment_index()
channel.set_segment_at(token_index, segment)
if set_head:
channel.set_head_at(token_index, True)
for i, token in enumerate(tokens):
token_index = _find_token(sentence, token)
channel.set_segment_at(token_index, segment)
if head_index is not None:
if head_index < 0 or head_index >= len(tokens):
raise ValueError(f"head_index ({head_index}) does not match "
f"passed list of tokens.")
if i == head_index:
channel.set_head_at(token_index, True)
def is_head_of(sentence, token, key):
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment