Commit 038e8a00 authored by Arkadiusz Janz's avatar Arkadiusz Janz

Merge branch 'master' into 'develop'

Master merged

See merge request !2
parents 72657010 e5564824
......@@ -87,7 +87,7 @@ def set_attribute(token, key, value):
if not token.has_metadata():
token.create_metadata()
metadata = token.get_metadata()
metadata.set_attribute(_try_encode(key), _try_encode(value))
metadata.set_attribute(key, value)
def set_attributes(token, items):
......
......@@ -162,21 +162,21 @@ def get_tagset(tagset):
def read_chunks_it(filepath, tagset='nkjp'):
""" Returns a iterable chunk generator.
""" Returns a iterable chunk generator.
Args:
filepath: a path to CCL file
tagset: the name of the tagset that is used in the document or a tagset object itself.
Returns:
a iterable chunk generator.
an iterable chunk generator.
"""
tagset = get_tagset(tagset)
reader = corpus2.TokenReader_create_path_reader('ccl', tagset, filepath)
while True:
chunk = reader.get_next_chunk()
if not chunk:
break
yield chunk
......@@ -190,14 +190,14 @@ def read_sentences_it(filepath, tagset='nkjp'):
Args:
filepath: a path to CCL file
tagset: the name of the tagset that is used in the document or a tagset object itself.
Returns:
a iterable sentence generator.
"""
tagset = get_tagset(tagset)
reader = corpus2.TokenReader_create_path_reader('ccl', tagset, filepath)
while True:
sentence = reader.get_next_sentence()
......@@ -205,6 +205,4 @@ def read_sentences_it(filepath, tagset='nkjp'):
break
yield sentence
del reader
del reader
......@@ -6,7 +6,7 @@ setup(
author='Arkadiusz Janz, Anna Gut, Dominik Kaszewski',
description='''A convenient API based on Corpus2 library for analyzing textual
corpora in CCL format.''',
version='1.0.0',
version='1.0.2',
packages=['cclutils'],
zip_safe=False
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment