From 64a66811a0c4a52292e8a67d1b7c17e381643421 Mon Sep 17 00:00:00 2001 From: Grzegorz Kostkowski <grzegorz.kostkowski@pwr.edu.pl> Date: Thu, 25 Feb 2021 18:28:22 +0100 Subject: [PATCH 1/3] Move method sentence2str, add missed methods to _all --- cclutils/_base.py | 30 ++++++++++++++++++++++++++++++ cclutils/_copies.py | 31 +------------------------------ 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/cclutils/_base.py b/cclutils/_base.py index 8e49df1..7dd0e7e 100644 --- a/cclutils/_base.py +++ b/cclutils/_base.py @@ -12,7 +12,10 @@ __all__ = [ 'read', 'write', 'read_from_directory', + 'read_chunks_it', + 'read_sentences_it', 'get_tagset', + 'sentence2str' ] @@ -206,3 +209,30 @@ def read_sentences_it(filepath, tagset='nkjp'): yield sentence del reader + + +def sentence2str(sentence, use_bases=False, tagset='nkjp'): + """ Return corpus2.Sentence as a string. + + Args: + sentence: a sentence object (corpus2.Sentence). + use_bases: if set to True, the we take base forms + instead of taking the orths. + + Returns: + a string representation of the input sentence object. + + """ + if isinstance(tagset, str): + tagset = corpus2.get_named_tagset(tagset) + + text = [] + for token in sentence.tokens(): + text.append(" " if token.after_space() else "") + if not use_bases: + token_string = token.orth_utf8() + else: + token_string = token.get_preferred_lexeme(tagset).lemma_utf8() + text.append(token_string) + + return "".join(text).strip() diff --git a/cclutils/_copies.py b/cclutils/_copies.py index 9f033b2..ac92d92 100644 --- a/cclutils/_copies.py +++ b/cclutils/_copies.py @@ -9,8 +9,7 @@ ENCODING = "utf-8" __all__ = [ 'copy_chunk', 'copy_sentence', - 'copy_relation', - 'sentence2str' + 'copy_relation' ] @@ -119,31 +118,3 @@ def _copy_chunk_attributes(source_chunk, target_chunk): """ for key, value in list(source_chunk.attributes().items()): target_chunk.set_attribute(key, value) - - -# todo: move somewhere else! -def sentence2str(sentence, use_bases=False, tagset='nkjp'): - """ Return corpus2.Sentence as a string. - - Args: - sentence: a sentence object (corpus2.Sentence). - use_bases: if set to True, the we take base forms - instead of taking the orths. - - Returns: - a string representation of the input sentence object. - - """ - if isinstance(tagset, str): - tagset = corpus2.get_named_tagset(tagset) - - text = [] - for token in sentence.tokens(): - text.append(" " if token.after_space() else "") - if not use_bases: - token_string = token.orth_utf8() - else: - token_string = token.get_preferred_lexeme(tagset).lemma_utf8() - text.append(token_string) - - return "".join(text).strip() -- GitLab From 7f72cc26670f917e94245bc7ed274bed2092de14 Mon Sep 17 00:00:00 2001 From: Grzegorz Kostkowski <grzegorz.kostkowski@pwr.edu.pl> Date: Fri, 26 Feb 2021 06:22:13 +0100 Subject: [PATCH 2/3] Add gitlab CI config --- .gitlab-ci.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..174d777 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,37 @@ +image: clarinpl/python:3.6 + +before_script: + - pip install tox==2.9.1 + +cache: + paths: + - .tox + +stages: + - check_style + - push_wheel + +pep8: + stage: check_style + script: + - tox -v -e pep8 + +docstyle: + stage: check_style + script: + - tox -v -e docstyle + +push_wheel: + before_script: + - pip install twine + only: + - master + stage: push_wheel + when: on_success + script: + - python setup.py sdist bdist_wheel + - python -m twine upload + --repository-url https://pypi.clarin-pl.eu/ + -u $PIPY_USER -p $PIPY_PASS dist/cclutils*.whl + + -- GitLab From c1f5d0e30fec9890206ba734a146512511606763 Mon Sep 17 00:00:00 2001 From: Grzegorz Kostkowski <grzegorz.kostkowski@pwr.edu.pl> Date: Fri, 26 Feb 2021 09:02:44 +0100 Subject: [PATCH 3/3] Add missing tox.ini --- tox.ini | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..70f02ca --- /dev/null +++ b/tox.ini @@ -0,0 +1,45 @@ +[tox] +envlist = pep8,docstyle +skipsdist = True + +[testenv:pep8] +deps = + flake8 +basepython = python3 +commands = + flake8 {posargs} + +[testenv:docstyle] +deps = + pydocstyle +basepython = python3 +commands = + pydocstyle --verbose {posargs} + +[flake8] +# W504 skipped because it is overeager and unnecessary +ignore = W504 +show-source = True +exclude = .git,.venv,.tox,dist,doc,*egg,build,venv +import-order-style = pep8 +max-line-length = 80 + + +[pydocstyle] +# D104 Missing docstring in public package +# D203 1 blank line required before class docstring +# D213 Multi-line docstring summary should start at the second line +# D214 Section is over-indented +# D215 Section underline is over-indented +# D401 First line should be in imperative mood; try rephrasing +# D405 Section name should be properly capitalized +# D406 Section name should end with a newline +# D407 Missing dashed underline after section +# D408 Section underline should be in the line following the section’s name +# D409 Section underline should match the length of its name +# D410 Missing blank line after section +# D411 Missing blank line before section +ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411 +match-dir = ^(?!\.tox|venv).* +match = ^(?!setup).*\.py + -- GitLab