From 818616282dd93c901305651db15c5495334376ae Mon Sep 17 00:00:00 2001 From: Arkadiusz Janz <arkadiusz.janz@pwr.edu.pl> Date: Tue, 7 Jul 2020 11:54:30 +0000 Subject: [PATCH] Update README.md --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index 3b00726..3889816 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,17 @@ echo 'deb https://apt.clarin-pl.eu/ /' > /etc/apt/sources.list.d/clarin.list apt-get update && apt-get install corpus2-python3.6 ``` +It is also possible to use a docker: + +``` +FROM clarinpl/python:3.6 + +RUN apt-get update && apt-get install -y \ + corpus2-python3.6 + +RUN pip install --upgrade pip && pip install cclutils +``` + Install ======= @@ -168,4 +179,19 @@ True >>> token.set_wa(False) >>> token.after_space() False +``` + +Sentence manipulation +===================== + +1. Prints out sentences of a given document + +```python +document = cclutils.read('./example.xml') + +sentences = (sentence for paragraph in document.paragraphs() + for sentence in paragraph.sentences()) + +for sentence in sentences: + print(cclutils.sentence2str(sentence)) ``` \ No newline at end of file -- GitLab