Commit 64a66811 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski

Move method sentence2str, add missed methods to _all

parent f9038678
......@@ -12,7 +12,10 @@ __all__ = [
'read',
'write',
'read_from_directory',
'read_chunks_it',
'read_sentences_it',
'get_tagset',
'sentence2str'
]
......@@ -206,3 +209,30 @@ def read_sentences_it(filepath, tagset='nkjp'):
yield sentence
del reader
def sentence2str(sentence, use_bases=False, tagset='nkjp'):
""" Return corpus2.Sentence as a string.
Args:
sentence: a sentence object (corpus2.Sentence).
use_bases: if set to True, the we take base forms
instead of taking the orths.
Returns:
a string representation of the input sentence object.
"""
if isinstance(tagset, str):
tagset = corpus2.get_named_tagset(tagset)
text = []
for token in sentence.tokens():
text.append(" " if token.after_space() else "")
if not use_bases:
token_string = token.orth_utf8()
else:
token_string = token.get_preferred_lexeme(tagset).lemma_utf8()
text.append(token_string)
return "".join(text).strip()
......@@ -9,8 +9,7 @@ ENCODING = "utf-8"
__all__ = [
'copy_chunk',
'copy_sentence',
'copy_relation',
'sentence2str'
'copy_relation'
]
......@@ -119,31 +118,3 @@ def _copy_chunk_attributes(source_chunk, target_chunk):
"""
for key, value in list(source_chunk.attributes().items()):
target_chunk.set_attribute(key, value)
# todo: move somewhere else!
def sentence2str(sentence, use_bases=False, tagset='nkjp'):
""" Return corpus2.Sentence as a string.
Args:
sentence: a sentence object (corpus2.Sentence).
use_bases: if set to True, the we take base forms
instead of taking the orths.
Returns:
a string representation of the input sentence object.
"""
if isinstance(tagset, str):
tagset = corpus2.get_named_tagset(tagset)
text = []
for token in sentence.tokens():
text.append(" " if token.after_space() else "")
if not use_bases:
token_string = token.orth_utf8()
else:
token_string = token.get_preferred_lexeme(tagset).lemma_utf8()
text.append(token_string)
return "".join(text).strip()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment