Skip to content
Snippets Groups Projects
Commit fdebbc6f authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Merge branch 'tests' into 'master'

Tests

See merge request !1
parents f8cdf0a1 da034cf0
Branches
1 merge request!1Tests
Pipeline #7351 passed with stages
in 2 minutes and 14 seconds
Showing
with 417 additions and 16 deletions
......@@ -6,6 +6,8 @@ cache:
stages:
- check_style
- tests
- pages
- build_master
- build_develop
......@@ -23,6 +25,30 @@ docstyle:
script:
- tox -v -e docstyle
tests:
stage: tests
before_script:
- pip install tox==2.9.1
script:
- tox -v -e pytest
artifacts:
paths:
- htmlcov
expire_in: 1 week
reports:
junit:
- report.xml
pages:
stage: pages
script:
- mkdir -p public/coverage
- cp -r htmlcov/* public/coverage/
artifacts:
name: coverage
paths:
- public
build_develop:
except:
- master
......
{
"posconverter_lone": {"task": [{"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": false}}], "input": "remove_json_text.json", "expected": "remove_json_text.json"},
"post_spacy_tagger_json_text": {"task": [{"spacy": {"lang": "en", "method": "tagger"}}, {"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": true}}], "input": "post_spacy_input", "expected": "post_spacy_tagger_json_text_expected.json"},
"post_spacy_tagger_no_json_text": {"task": [{"spacy": {"lang": "en", "method": "tagger"}}, {"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": false}}], "input": "post_spacy_input", "expected": "post_spacy_tagger_no_json_text_expected.json"},
"post_spacy_ner_no_json_text": {"task": [{"spacy": {"lang": "en", "method": "ner"}}, {"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": false}}], "input": "post_spacy_input", "expected": "post_spacy_ner_no_json_text_expected.json"},
"post_postagger_json_json_text": {"task": [{"postagger": {"lang": "pl", "output": "json"}}, {"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": true}}], "input": "post_postagger_input", "expected": "post_postagger_json_json_text_expected.json"},
"post_postagger_ccl_json_text": {"task": [{"postagger": {"lang": "pl", "output": "ccl"}}, {"posconverter": {"input_format": "ccl", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": true}}], "input": "post_postagger_input", "expected": "post_postagger_ccl_json_text_expected.json"},
"post_postagger_ccl_no_json_text": {"task": [{"postagger": {"lang": "pl", "output": "ccl"}}, {"posconverter": {"input_format": "ccl", "output_format": "json", "input_tagset": "ud", "output_tagset": "identical", "json_text": false}}], "input": "post_postagger_input", "expected": "post_postagger_ccl_no_json_text_expected.json"},
"pre_fextor3": {"task": [{"posconverter": {"input_format": "json", "output_format": "json", "input_tagset": "nkjp", "output_tagset": "identical", "json_text": false}}, "fextor3"], "input": "pre_fextor3_input.json", "expected": "pre_fextor3_expected.json"}
}
......@@ -5,7 +5,7 @@ import os
import nlp_ws
import json
import pathlib
from xml.sax import handler, make_parser
from xml.sax import handler, make_parser, SAXParseException
from ccl2json.parse import CCLhandler
from xml.dom import pulldom
import configparser
......@@ -76,18 +76,43 @@ def ccl2json(path_in, path_out, tagset='nkjp', json_text=True):
parser = make_parser()
parser.setFeature(handler.feature_external_ges, False)
parser.setContentHandler(CCLhandler())
parser.parse(path_in)
with open(path_out, 'w', encoding='utf-8') as file_out:
data_out = {
"filename": pathlib.Path(path_in).stem,
'tagset': tagset,
'tokens': [_convert_tagset_in_token(token.as_dict(), tagset)
for token in parser.getContentHandler().get_tokens()]
}
if json_text:
data_out['text'] = parser.getContentHandler().get_text()
json.dump(data_out, file_out, ensure_ascii=False)
try:
parser.parse(path_in)
except SAXParseException:
raise Exception("Error parsing file, check file syntax") from None
else:
_log.log(logging.INFO, "Parser initialised and input data read ")
with open(path_out, 'w', encoding='utf-8') as file_out:
data_out = {
"filename": pathlib.Path(path_in).stem,
'tagset': tagset,
'tokens': [_convert_tagset_in_token(token.as_dict(), tagset)
for token in parser.getContentHandler().get_tokens()]
}
_log.log(logging.INFO, "Data to write prepared ")
if json_text:
data_out['text'] = parser.getContentHandler().get_text()
_log.log(logging.INFO, "Original text attached to output data ")
json.dump(data_out, file_out, ensure_ascii=False)
_log.log(logging.INFO, "Data dumped to output file ")
def remove_attribute(input_path, output_path, attribute):
"""Remove a single key from json file and write it to another one.
:param input_path: path to input json file
:type input_path: str
:param output_path: path to output json file
:type output_path: str
:param attribute: key being removed
:type param: str
"""
with open(input_path) as input_file:
json_obj = json.load(input_file)
_ = json_obj.pop(attribute, None)
with open(output_path, 'wt') as output_file:
json.dump(json_obj, output_file, ensure_ascii=False)
class ConverterWorker(nlp_ws.NLPWorker):
......@@ -128,9 +153,12 @@ class ConverterWorker(nlp_ws.NLPWorker):
(default = True)
:type json_text: bool
"""
if input_format == output_format and \
(output_format == "identical" or
input_format == output_format):
if input_format == "json" and output_format == "json" and \
not json_text:
remove_attribute(input_path, output_path, "text")
elif input_format == output_format and \
(output_tagset == "identical" or
input_tagset == output_tagset):
shutil.copyfile(input_path, output_path)
elif input_format == "ccl" and output_format == "lemmas":
ccl_2_lemmas(input_path, output_path)
......
from os.path import join
import pytest
from src.converter import ConverterWorker
@pytest.fixture
def dir_testdata():
return join("tests", "testdata")
@pytest.fixture
def input_dir(dir_testdata):
return join(dir_testdata, 'input')
@pytest.fixture
def output_dir(dir_testdata):
return join(dir_testdata, 'output')
@pytest.fixture
def expected_dir(dir_testdata):
return join(dir_testdata, 'expected')
@pytest.fixture
def identical_input_output_formats_input():
return 'identical_input_output_formats.json'
@pytest.fixture
def identical_input_output_formats_output():
return 'identical_input_output_formats.json'
@pytest.fixture
def identical_input_output_formats_expected():
return 'identical_input_output_formats.json'
@pytest.fixture
def identical_input_output_formats_task_options():
return {
'input_format': 'json',
'output_format': 'json',
'input_tagset': 'ud',
'output_tagset': 'identical',
'json_text': True,
}
@pytest.fixture
def remove_json_text_input():
return 'remove_json_text.json'
@pytest.fixture
def remove_json_text_output():
return 'remove_json_text.json'
@pytest.fixture
def remove_json_text_expected():
return 'remove_json_text.json'
@pytest.fixture
def remove_json_text_task_options():
return {
'input_format': 'json',
'output_format': 'json',
'input_tagset': 'ud',
'output_tagset': 'identical',
'json_text': False,
}
@pytest.fixture
def simple_ccl2json_conversion_input():
return 'simple_ccl2json_conversion.xml'
@pytest.fixture
def simple_ccl2json_conversion_output():
return 'simple_ccl2json_conversion.json'
@pytest.fixture
def simple_ccl2json_conversion_expected():
return 'simple_ccl2json_conversion.json'
@pytest.fixture
def simple_ccl2json_conversion_task_options():
return {
'input_format': 'ccl',
'output_format': 'json',
'input_tagset': 'nkjp',
'output_tagset': 'identical',
'json_text': False,
}
@pytest.fixture
def simple_nkjp2ud_conversion_input():
return 'simple_nkjp2ud_conversion.xml'
@pytest.fixture
def simple_nkjp2ud_conversion_output():
return 'simple_nkjp2ud_conversion.json'
@pytest.fixture
def simple_nkjp2ud_conversion_expected():
return 'simple_nkjp2ud_conversion.json'
@pytest.fixture
def simple_nkjp2ud_conversion_task_options():
return {
'input_format': 'ccl',
'output_format': 'json',
'input_tagset': 'nkjp',
'output_tagset': 'ud',
'json_text': False,
}
@pytest.fixture
def simple_ccl2lemmas_conversion_input():
return 'simple_ccl2lemmas_conversion.xml'
@pytest.fixture
def simple_ccl2lemmas_conversion_output():
return 'simple_ccl2lemmas_conversion'
@pytest.fixture
def simple_ccl2lemmas_conversion_expected():
return 'simple_ccl2lemmas_conversion'
@pytest.fixture
def simple_ccl2lemmas_conversion_task_options():
return {
'input_format': 'ccl',
'output_format': 'lemmas',
'input_tagset': 'nkjp',
'output_tagset': 'identical',
'json_text': False,
}
@pytest.fixture
def undefined_input_format_input():
return identical_input_output_formats_input
@pytest.fixture
def undefined_input_format_task_options():
return {
'input_format': 'foo',
'output_format': 'json',
'input_tagset': 'ud',
'output_tagset': 'identical',
'json_text': False,
}
@pytest.fixture
def undefined_output_format_input():
return identical_input_output_formats_input
@pytest.fixture
def undefined_output_format_task_options():
return {
'input_format': 'json',
'output_format': 'foobar',
'input_tagset': 'ud',
'output_tagset': 'identical',
'json_text': False,
}
@pytest.fixture
def config():
return {}
@pytest.fixture
def worker(config):
worker = ConverterWorker()
worker.static_init(config)
return worker
import os
from filecmp import cmp
from os.path import join
import pytest
from src.converter import ConverterWorker
def test_init():
worker = ConverterWorker()
assert type(worker).__name__ == 'ConverterWorker'
def test_identical_input_output_formats(input_dir, output_dir, expected_dir,
identical_input_output_formats_input,
identical_input_output_formats_task_options,
identical_input_output_formats_output,
identical_input_output_formats_expected,
worker):
worker.process(
join(input_dir, identical_input_output_formats_input),
identical_input_output_formats_task_options,
join(output_dir, identical_input_output_formats_output)
)
assert(cmp(join(output_dir, identical_input_output_formats_output), join(expected_dir, identical_input_output_formats_expected)))
os.remove(join(output_dir, identical_input_output_formats_output))
def test_remove_json_text(input_dir, output_dir, expected_dir,
remove_json_text_input,
remove_json_text_task_options,
remove_json_text_output,
remove_json_text_expected,
worker):
worker.process(
join(input_dir, remove_json_text_input),
remove_json_text_task_options,
join(output_dir, remove_json_text_output)
)
assert(cmp(join(output_dir, remove_json_text_output), join(expected_dir, remove_json_text_expected)))
os.remove(join(output_dir, remove_json_text_output))
def test_simple_ccl2json_conversion(input_dir, output_dir, expected_dir,
simple_ccl2json_conversion_input,
simple_ccl2json_conversion_task_options,
simple_ccl2json_conversion_output,
simple_ccl2json_conversion_expected,
worker):
worker.process(
join(input_dir, simple_ccl2json_conversion_input),
simple_ccl2json_conversion_task_options,
join(output_dir, simple_ccl2json_conversion_output)
)
assert(cmp(join(output_dir, simple_ccl2json_conversion_output), join(expected_dir, simple_ccl2json_conversion_expected)))
os.remove(join(output_dir, simple_ccl2json_conversion_output))
def test_simple_nkjp2ud_conversion(input_dir, output_dir, expected_dir,
simple_nkjp2ud_conversion_input,
simple_nkjp2ud_conversion_task_options,
simple_nkjp2ud_conversion_output,
simple_nkjp2ud_conversion_expected,
worker):
worker.process(
join(input_dir, simple_nkjp2ud_conversion_input),
simple_nkjp2ud_conversion_task_options,
join(output_dir, simple_nkjp2ud_conversion_output)
)
assert(cmp(join(output_dir, simple_nkjp2ud_conversion_output), join(expected_dir, simple_nkjp2ud_conversion_expected)))
os.remove(join(output_dir, simple_nkjp2ud_conversion_output))
def test_simple_ccl2lemmas_conversion(input_dir, output_dir, expected_dir,
simple_ccl2lemmas_conversion_input,
simple_ccl2lemmas_conversion_task_options,
simple_ccl2lemmas_conversion_output,
simple_ccl2lemmas_conversion_expected,
worker):
worker.process(
join(input_dir, simple_ccl2lemmas_conversion_input),
simple_ccl2lemmas_conversion_task_options,
join(output_dir, simple_ccl2lemmas_conversion_output)
)
assert(cmp(join(output_dir, simple_ccl2lemmas_conversion_output), join(expected_dir, simple_ccl2lemmas_conversion_expected)))
os.remove(join(output_dir, simple_ccl2lemmas_conversion_output))
#raises exception
def test_undefined_input_format(input_dir, output_dir,
undefined_input_format_input,
undefined_input_format_task_options,
worker):
with pytest.raises(Exception):
worker.process(
join(input_dir, undefined_input_format_input),
undefined_input_format_task_options,
join(output_dir, undefined_input_format_input)
)
# raises exception
def test_undefined_output_format(input_dir, output_dir,
undefined_output_format_input,
undefined_output_format_task_options,
worker):
with pytest.raises(Exception):
worker.process(
join(input_dir, undefined_output_format_input),
undefined_output_format_task_options,
join(output_dir, undefined_output_format_input)
)
{"filename": "simple_text_pl", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 7], "orth": "Nazywam", "lexemes": [{"lemma": "Nazywam", "mstag": "VERB", "disamb": true}]}, {"index": 2, "position": [8, 11], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 3, "position": [12, 15], "orth": "Jan", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [16, 24], "orth": "Kowalski", "lexemes": [{"lemma": "Kowalski", "mstag": "PROPN", "disamb": true}]}, {"index": 5, "position": [25, 26], "orth": "i", "lexemes": [{"lemma": "i", "mstag": "CCONJ", "disamb": true}]}, {"index": 6, "position": [27, 35], "orth": "mieszkam", "lexemes": [{"lemma": "mieszkać", "mstag": "VERB", "disamb": true}]}, {"index": 7, "position": [36, 38], "orth": "we", "lexemes": [{"lemma": "w", "mstag": "ADP", "disamb": true}]}, {"index": 8, "position": [39, 48], "orth": "Wrocławiu", "lexemes": [{"lemma": "Wrocław", "mstag": "PROPN", "disamb": true}]}, {"index": 9, "position": [49, 49], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 10, "position": [50, 53], "orth": "Mój", "lexemes": [{"lemma": "mój", "mstag": "DET", "disamb": true}]}, {"index": 11, "position": [54, 62], "orth": "rodzinny", "lexemes": [{"lemma": "rodzinny", "mstag": "ADJ", "disamb": true}]}, {"index": 12, "position": [63, 66], "orth": "dom", "lexemes": [{"lemma": "dom", "mstag": "NOUN", "disamb": true}]}, {"index": 13, "position": [67, 73], "orth": "mieści", "lexemes": [{"lemma": "mieścić", "mstag": "VERB", "disamb": true}]}, {"index": 14, "position": [74, 77], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 15, "position": [78, 82], "orth": "przy", "lexemes": [{"lemma": "przy", "mstag": "ADP", "disamb": true}]}, {"index": 16, "position": [83, 88], "orth": "aleji", "lexemes": [{"lemma": "aleji", "mstag": "NOUN", "disamb": true}]}, {"index": 17, "position": [89, 93], "orth": "Jana", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 18, "position": [94, 99], "orth": "Pawła", "lexemes": [{"lemma": "Paweł", "mstag": "PROPN", "disamb": true}]}, {"index": 19, "position": [100, 102], "orth": "II", "lexemes": [{"lemma": "II", "mstag": "ADJ", "disamb": true}]}, {"index": 20, "position": [103, 103], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}], "text": "Nazywam się Jan Kowalski i mieszkam we Wrocławiu. Mój rodzinny dom mieści się przy aleji Jana Pawła II."}
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{"filename": "post_spacy_input", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 4], "orth": "When", "lexemes": [{"lemma": "when", "mstag": "SCONJ", "disamb": true}]}, {"index": 2, "position": [5, 14], "orth": "Sebastian", "lexemes": [{"lemma": "Sebastian", "mstag": "PROPN", "disamb": true}]}, {"index": 3, "position": [15, 20], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [21, 28], "orth": "started", "lexemes": [{"lemma": "start", "mstag": "VERB", "disamb": true}]}, {"index": 5, "position": [29, 36], "orth": "working", "lexemes": [{"lemma": "work", "mstag": "VERB", "disamb": true}]}, {"index": 6, "position": [37, 39], "orth": "on", "lexemes": [{"lemma": "on", "mstag": "ADP", "disamb": true}]}, {"index": 7, "position": [40, 44], "orth": "self", "lexemes": [{"lemma": "self", "mstag": "NOUN", "disamb": true}]}, {"index": 8, "position": [45, 45], "orth": "-", "lexemes": [{"lemma": "-", "mstag": "PUNCT", "disamb": true}]}, {"index": 9, "position": [46, 52], "orth": "driving", "lexemes": [{"lemma": "drive", "mstag": "VERB", "disamb": true}]}, {"index": 10, "position": [53, 57], "orth": "cars", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [58, 60], "orth": "at", "lexemes": [{"lemma": "at", "mstag": "ADP", "disamb": true}]}, {"index": 12, "position": [61, 67], "orth": "Google", "lexemes": [{"lemma": "Google", "mstag": "PROPN", "disamb": true}]}, {"index": 13, "position": [68, 70], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 14, "position": [71, 75], "orth": "2007", "lexemes": [{"lemma": "2007", "mstag": "NUM", "disamb": true}]}, {"index": 15, "position": [76, 76], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 16, "position": [77, 80], "orth": "few", "lexemes": [{"lemma": "few", "mstag": "ADJ", "disamb": true}]}, {"index": 17, "position": [81, 87], "orth": "people", "lexemes": [{"lemma": "people", "mstag": "NOUN", "disamb": true}]}, {"index": 18, "position": [88, 95], "orth": "outside", "lexemes": [{"lemma": "outside", "mstag": "ADV", "disamb": true}]}, {"index": 19, "position": [96, 98], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 20, "position": [99, 102], "orth": "the", "lexemes": [{"lemma": "the", "mstag": "DET", "disamb": true}]}, {"index": 21, "position": [103, 110], "orth": "company", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 22, "position": [111, 115], "orth": "took", "lexemes": [{"lemma": "take", "mstag": "VERB", "disamb": true}]}, {"index": 23, "position": [116, 119], "orth": "him", "lexemes": [{"lemma": "he", "mstag": "PRON", "disamb": true}]}, {"index": 24, "position": [120, 129], "orth": "seriously", "lexemes": [{"lemma": "seriously", "mstag": "ADV", "disamb": true}]}, {"index": 25, "position": [130, 130], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 26, "position": [131, 132], "orth": "“", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 27, "position": [133, 133], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 28, "position": [134, 137], "orth": "can", "lexemes": [{"lemma": "can", "mstag": "AUX", "disamb": true}]}, {"index": 29, "position": [138, 142], "orth": "tell", "lexemes": [{"lemma": "tell", "mstag": "VERB", "disamb": true}]}, {"index": 30, "position": [143, 146], "orth": "you", "lexemes": [{"lemma": "you", "mstag": "PRON", "disamb": true}]}, {"index": 31, "position": [147, 151], "orth": "very", "lexemes": [{"lemma": "very", "mstag": "ADV", "disamb": true}]}, {"index": 32, "position": [152, 158], "orth": "senior", "lexemes": [{"lemma": "senior", "mstag": "ADJ", "disamb": true}]}, {"index": 33, "position": [159, 163], "orth": "CEOs", "lexemes": [{"lemma": "ceo", "mstag": "NOUN", "disamb": true}]}, {"index": 34, "position": [164, 166], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 35, "position": [167, 172], "orth": "major", "lexemes": [{"lemma": "major", "mstag": "ADJ", "disamb": true}]}, {"index": 36, "position": [173, 181], "orth": "American", "lexemes": [{"lemma": "american", "mstag": "ADJ", "disamb": true}]}, {"index": 37, "position": [182, 185], "orth": "car", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 38, "position": [186, 195], "orth": "companies", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 39, "position": [196, 201], "orth": "would", "lexemes": [{"lemma": "would", "mstag": "AUX", "disamb": true}]}, {"index": 40, "position": [202, 207], "orth": "shake", "lexemes": [{"lemma": "shake", "mstag": "VERB", "disamb": true}]}, {"index": 41, "position": [208, 210], "orth": "my", "lexemes": [{"lemma": "my", "mstag": "PRON", "disamb": true}]}, {"index": 42, "position": [211, 215], "orth": "hand", "lexemes": [{"lemma": "hand", "mstag": "NOUN", "disamb": true}]}, {"index": 43, "position": [216, 219], "orth": "and", "lexemes": [{"lemma": "and", "mstag": "CCONJ", "disamb": true}]}, {"index": 44, "position": [220, 224], "orth": "turn", "lexemes": [{"lemma": "turn", "mstag": "VERB", "disamb": true}]}, {"index": 45, "position": [225, 229], "orth": "away", "lexemes": [{"lemma": "away", "mstag": "ADV", "disamb": true}]}, {"index": 46, "position": [230, 237], "orth": "because", "lexemes": [{"lemma": "because", "mstag": "SCONJ", "disamb": true}]}, {"index": 47, "position": [238, 239], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 48, "position": [240, 243], "orth": "was", "lexemes": [{"lemma": "be", "mstag": "AUX", "disamb": true}]}, {"index": 49, "position": [244, 246], "orth": "n’t", "lexemes": [{"lemma": "not", "mstag": "PART", "disamb": true}]}, {"index": 50, "position": [247, 252], "orth": "worth", "lexemes": [{"lemma": "worth", "mstag": "ADJ", "disamb": true}]}, {"index": 51, "position": [253, 260], "orth": "talking", "lexemes": [{"lemma": "talk", "mstag": "VERB", "disamb": true}]}, {"index": 52, "position": [261, 263], "orth": "to", "lexemes": [{"lemma": "to", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [264, 264], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 54, "position": [265, 265], "orth": "”", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 55, "position": [266, 270], "orth": "said", "lexemes": [{"lemma": "say", "mstag": "VERB", "disamb": true}]}, {"index": 56, "position": [271, 276], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 57, "position": [277, 277], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 58, "position": [278, 280], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 59, "position": [281, 283], "orth": "an", "lexemes": [{"lemma": "an", "mstag": "DET", "disamb": true}]}, {"index": 60, "position": [284, 293], "orth": "interview", "lexemes": [{"lemma": "interview", "mstag": "NOUN", "disamb": true}]}, {"index": 61, "position": [294, 298], "orth": "with", "lexemes": [{"lemma": "with", "mstag": "ADP", "disamb": true}]}, {"index": 62, "position": [299, 305], "orth": "Recode", "lexemes": [{"lemma": "Recode", "mstag": "PROPN", "disamb": true}]}, {"index": 63, "position": [306, 313], "orth": "earlier", "lexemes": [{"lemma": "early", "mstag": "ADV", "disamb": true}]}, {"index": 64, "position": [314, 318], "orth": "this", "lexemes": [{"lemma": "this", "mstag": "DET", "disamb": true}]}, {"index": 65, "position": [319, 323], "orth": "week", "lexemes": [{"lemma": "week", "mstag": "NOUN", "disamb": true}]}, {"index": 66, "position": [324, 324], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 67, "position": [325, 325], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "entities": [{"text": "Sebastian Thrun", "type": "PERSON", "tokens": [1, 3], "position": [5, 20]}, {"text": "Google", "type": "ORG", "tokens": [11, 12], "position": [61, 67]}, {"text": "2007", "type": "DATE", "tokens": [13, 14], "position": [71, 75]}, {"text": "American", "type": "NORP", "tokens": [35, 36], "position": [173, 181]}, {"text": "Thrun", "type": "PERSON", "tokens": [55, 56], "position": [271, 276]}, {"text": "Recode", "type": "ORG", "tokens": [61, 62], "position": [299, 305]}, {"text": "earlier this week", "type": "DATE", "tokens": [62, 65], "position": [306, 323]}]}
\ No newline at end of file
{"filename": "post_spacy_input", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 4], "orth": "When", "lexemes": [{"lemma": "when", "mstag": "SCONJ", "disamb": true}]}, {"index": 2, "position": [5, 14], "orth": "Sebastian", "lexemes": [{"lemma": "Sebastian", "mstag": "PROPN", "disamb": true}]}, {"index": 3, "position": [15, 20], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [21, 28], "orth": "started", "lexemes": [{"lemma": "start", "mstag": "VERB", "disamb": true}]}, {"index": 5, "position": [29, 36], "orth": "working", "lexemes": [{"lemma": "work", "mstag": "VERB", "disamb": true}]}, {"index": 6, "position": [37, 39], "orth": "on", "lexemes": [{"lemma": "on", "mstag": "ADP", "disamb": true}]}, {"index": 7, "position": [40, 44], "orth": "self", "lexemes": [{"lemma": "self", "mstag": "NOUN", "disamb": true}]}, {"index": 8, "position": [45, 45], "orth": "-", "lexemes": [{"lemma": "-", "mstag": "PUNCT", "disamb": true}]}, {"index": 9, "position": [46, 52], "orth": "driving", "lexemes": [{"lemma": "drive", "mstag": "VERB", "disamb": true}]}, {"index": 10, "position": [53, 57], "orth": "cars", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [58, 60], "orth": "at", "lexemes": [{"lemma": "at", "mstag": "ADP", "disamb": true}]}, {"index": 12, "position": [61, 67], "orth": "Google", "lexemes": [{"lemma": "Google", "mstag": "PROPN", "disamb": true}]}, {"index": 13, "position": [68, 70], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 14, "position": [71, 75], "orth": "2007", "lexemes": [{"lemma": "2007", "mstag": "NUM", "disamb": true}]}, {"index": 15, "position": [76, 76], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 16, "position": [77, 80], "orth": "few", "lexemes": [{"lemma": "few", "mstag": "ADJ", "disamb": true}]}, {"index": 17, "position": [81, 87], "orth": "people", "lexemes": [{"lemma": "people", "mstag": "NOUN", "disamb": true}]}, {"index": 18, "position": [88, 95], "orth": "outside", "lexemes": [{"lemma": "outside", "mstag": "ADV", "disamb": true}]}, {"index": 19, "position": [96, 98], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 20, "position": [99, 102], "orth": "the", "lexemes": [{"lemma": "the", "mstag": "DET", "disamb": true}]}, {"index": 21, "position": [103, 110], "orth": "company", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 22, "position": [111, 115], "orth": "took", "lexemes": [{"lemma": "take", "mstag": "VERB", "disamb": true}]}, {"index": 23, "position": [116, 119], "orth": "him", "lexemes": [{"lemma": "he", "mstag": "PRON", "disamb": true}]}, {"index": 24, "position": [120, 129], "orth": "seriously", "lexemes": [{"lemma": "seriously", "mstag": "ADV", "disamb": true}]}, {"index": 25, "position": [130, 130], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 26, "position": [131, 132], "orth": "“", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 27, "position": [133, 133], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 28, "position": [134, 137], "orth": "can", "lexemes": [{"lemma": "can", "mstag": "AUX", "disamb": true}]}, {"index": 29, "position": [138, 142], "orth": "tell", "lexemes": [{"lemma": "tell", "mstag": "VERB", "disamb": true}]}, {"index": 30, "position": [143, 146], "orth": "you", "lexemes": [{"lemma": "you", "mstag": "PRON", "disamb": true}]}, {"index": 31, "position": [147, 151], "orth": "very", "lexemes": [{"lemma": "very", "mstag": "ADV", "disamb": true}]}, {"index": 32, "position": [152, 158], "orth": "senior", "lexemes": [{"lemma": "senior", "mstag": "ADJ", "disamb": true}]}, {"index": 33, "position": [159, 163], "orth": "CEOs", "lexemes": [{"lemma": "ceo", "mstag": "NOUN", "disamb": true}]}, {"index": 34, "position": [164, 166], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 35, "position": [167, 172], "orth": "major", "lexemes": [{"lemma": "major", "mstag": "ADJ", "disamb": true}]}, {"index": 36, "position": [173, 181], "orth": "American", "lexemes": [{"lemma": "american", "mstag": "ADJ", "disamb": true}]}, {"index": 37, "position": [182, 185], "orth": "car", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 38, "position": [186, 195], "orth": "companies", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 39, "position": [196, 201], "orth": "would", "lexemes": [{"lemma": "would", "mstag": "AUX", "disamb": true}]}, {"index": 40, "position": [202, 207], "orth": "shake", "lexemes": [{"lemma": "shake", "mstag": "VERB", "disamb": true}]}, {"index": 41, "position": [208, 210], "orth": "my", "lexemes": [{"lemma": "my", "mstag": "PRON", "disamb": true}]}, {"index": 42, "position": [211, 215], "orth": "hand", "lexemes": [{"lemma": "hand", "mstag": "NOUN", "disamb": true}]}, {"index": 43, "position": [216, 219], "orth": "and", "lexemes": [{"lemma": "and", "mstag": "CCONJ", "disamb": true}]}, {"index": 44, "position": [220, 224], "orth": "turn", "lexemes": [{"lemma": "turn", "mstag": "VERB", "disamb": true}]}, {"index": 45, "position": [225, 229], "orth": "away", "lexemes": [{"lemma": "away", "mstag": "ADV", "disamb": true}]}, {"index": 46, "position": [230, 237], "orth": "because", "lexemes": [{"lemma": "because", "mstag": "SCONJ", "disamb": true}]}, {"index": 47, "position": [238, 239], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 48, "position": [240, 243], "orth": "was", "lexemes": [{"lemma": "be", "mstag": "AUX", "disamb": true}]}, {"index": 49, "position": [244, 246], "orth": "n’t", "lexemes": [{"lemma": "not", "mstag": "PART", "disamb": true}]}, {"index": 50, "position": [247, 252], "orth": "worth", "lexemes": [{"lemma": "worth", "mstag": "ADJ", "disamb": true}]}, {"index": 51, "position": [253, 260], "orth": "talking", "lexemes": [{"lemma": "talk", "mstag": "VERB", "disamb": true}]}, {"index": 52, "position": [261, 263], "orth": "to", "lexemes": [{"lemma": "to", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [264, 264], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 54, "position": [265, 265], "orth": "”", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 55, "position": [266, 270], "orth": "said", "lexemes": [{"lemma": "say", "mstag": "VERB", "disamb": true}]}, {"index": 56, "position": [271, 276], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 57, "position": [277, 277], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 58, "position": [278, 280], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 59, "position": [281, 283], "orth": "an", "lexemes": [{"lemma": "an", "mstag": "DET", "disamb": true}]}, {"index": 60, "position": [284, 293], "orth": "interview", "lexemes": [{"lemma": "interview", "mstag": "NOUN", "disamb": true}]}, {"index": 61, "position": [294, 298], "orth": "with", "lexemes": [{"lemma": "with", "mstag": "ADP", "disamb": true}]}, {"index": 62, "position": [299, 305], "orth": "Recode", "lexemes": [{"lemma": "Recode", "mstag": "PROPN", "disamb": true}]}, {"index": 63, "position": [306, 313], "orth": "earlier", "lexemes": [{"lemma": "early", "mstag": "ADV", "disamb": true}]}, {"index": 64, "position": [314, 318], "orth": "this", "lexemes": [{"lemma": "this", "mstag": "DET", "disamb": true}]}, {"index": 65, "position": [319, 323], "orth": "week", "lexemes": [{"lemma": "week", "mstag": "NOUN", "disamb": true}]}, {"index": 66, "position": [324, 324], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 67, "position": [325, 325], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}], "text": "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.\n"}
\ No newline at end of file
{"filename": "post_spacy_input", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 4], "orth": "When", "lexemes": [{"lemma": "when", "mstag": "SCONJ", "disamb": true}]}, {"index": 2, "position": [5, 14], "orth": "Sebastian", "lexemes": [{"lemma": "Sebastian", "mstag": "PROPN", "disamb": true}]}, {"index": 3, "position": [15, 20], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [21, 28], "orth": "started", "lexemes": [{"lemma": "start", "mstag": "VERB", "disamb": true}]}, {"index": 5, "position": [29, 36], "orth": "working", "lexemes": [{"lemma": "work", "mstag": "VERB", "disamb": true}]}, {"index": 6, "position": [37, 39], "orth": "on", "lexemes": [{"lemma": "on", "mstag": "ADP", "disamb": true}]}, {"index": 7, "position": [40, 44], "orth": "self", "lexemes": [{"lemma": "self", "mstag": "NOUN", "disamb": true}]}, {"index": 8, "position": [45, 45], "orth": "-", "lexemes": [{"lemma": "-", "mstag": "PUNCT", "disamb": true}]}, {"index": 9, "position": [46, 52], "orth": "driving", "lexemes": [{"lemma": "drive", "mstag": "VERB", "disamb": true}]}, {"index": 10, "position": [53, 57], "orth": "cars", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 11, "position": [58, 60], "orth": "at", "lexemes": [{"lemma": "at", "mstag": "ADP", "disamb": true}]}, {"index": 12, "position": [61, 67], "orth": "Google", "lexemes": [{"lemma": "Google", "mstag": "PROPN", "disamb": true}]}, {"index": 13, "position": [68, 70], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 14, "position": [71, 75], "orth": "2007", "lexemes": [{"lemma": "2007", "mstag": "NUM", "disamb": true}]}, {"index": 15, "position": [76, 76], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 16, "position": [77, 80], "orth": "few", "lexemes": [{"lemma": "few", "mstag": "ADJ", "disamb": true}]}, {"index": 17, "position": [81, 87], "orth": "people", "lexemes": [{"lemma": "people", "mstag": "NOUN", "disamb": true}]}, {"index": 18, "position": [88, 95], "orth": "outside", "lexemes": [{"lemma": "outside", "mstag": "ADV", "disamb": true}]}, {"index": 19, "position": [96, 98], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 20, "position": [99, 102], "orth": "the", "lexemes": [{"lemma": "the", "mstag": "DET", "disamb": true}]}, {"index": 21, "position": [103, 110], "orth": "company", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 22, "position": [111, 115], "orth": "took", "lexemes": [{"lemma": "take", "mstag": "VERB", "disamb": true}]}, {"index": 23, "position": [116, 119], "orth": "him", "lexemes": [{"lemma": "he", "mstag": "PRON", "disamb": true}]}, {"index": 24, "position": [120, 129], "orth": "seriously", "lexemes": [{"lemma": "seriously", "mstag": "ADV", "disamb": true}]}, {"index": 25, "position": [130, 130], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 26, "position": [131, 132], "orth": "“", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 27, "position": [133, 133], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 28, "position": [134, 137], "orth": "can", "lexemes": [{"lemma": "can", "mstag": "AUX", "disamb": true}]}, {"index": 29, "position": [138, 142], "orth": "tell", "lexemes": [{"lemma": "tell", "mstag": "VERB", "disamb": true}]}, {"index": 30, "position": [143, 146], "orth": "you", "lexemes": [{"lemma": "you", "mstag": "PRON", "disamb": true}]}, {"index": 31, "position": [147, 151], "orth": "very", "lexemes": [{"lemma": "very", "mstag": "ADV", "disamb": true}]}, {"index": 32, "position": [152, 158], "orth": "senior", "lexemes": [{"lemma": "senior", "mstag": "ADJ", "disamb": true}]}, {"index": 33, "position": [159, 163], "orth": "CEOs", "lexemes": [{"lemma": "ceo", "mstag": "NOUN", "disamb": true}]}, {"index": 34, "position": [164, 166], "orth": "of", "lexemes": [{"lemma": "of", "mstag": "ADP", "disamb": true}]}, {"index": 35, "position": [167, 172], "orth": "major", "lexemes": [{"lemma": "major", "mstag": "ADJ", "disamb": true}]}, {"index": 36, "position": [173, 181], "orth": "American", "lexemes": [{"lemma": "american", "mstag": "ADJ", "disamb": true}]}, {"index": 37, "position": [182, 185], "orth": "car", "lexemes": [{"lemma": "car", "mstag": "NOUN", "disamb": true}]}, {"index": 38, "position": [186, 195], "orth": "companies", "lexemes": [{"lemma": "company", "mstag": "NOUN", "disamb": true}]}, {"index": 39, "position": [196, 201], "orth": "would", "lexemes": [{"lemma": "would", "mstag": "AUX", "disamb": true}]}, {"index": 40, "position": [202, 207], "orth": "shake", "lexemes": [{"lemma": "shake", "mstag": "VERB", "disamb": true}]}, {"index": 41, "position": [208, 210], "orth": "my", "lexemes": [{"lemma": "my", "mstag": "PRON", "disamb": true}]}, {"index": 42, "position": [211, 215], "orth": "hand", "lexemes": [{"lemma": "hand", "mstag": "NOUN", "disamb": true}]}, {"index": 43, "position": [216, 219], "orth": "and", "lexemes": [{"lemma": "and", "mstag": "CCONJ", "disamb": true}]}, {"index": 44, "position": [220, 224], "orth": "turn", "lexemes": [{"lemma": "turn", "mstag": "VERB", "disamb": true}]}, {"index": 45, "position": [225, 229], "orth": "away", "lexemes": [{"lemma": "away", "mstag": "ADV", "disamb": true}]}, {"index": 46, "position": [230, 237], "orth": "because", "lexemes": [{"lemma": "because", "mstag": "SCONJ", "disamb": true}]}, {"index": 47, "position": [238, 239], "orth": "I", "lexemes": [{"lemma": "I", "mstag": "PRON", "disamb": true}]}, {"index": 48, "position": [240, 243], "orth": "was", "lexemes": [{"lemma": "be", "mstag": "AUX", "disamb": true}]}, {"index": 49, "position": [244, 246], "orth": "n’t", "lexemes": [{"lemma": "not", "mstag": "PART", "disamb": true}]}, {"index": 50, "position": [247, 252], "orth": "worth", "lexemes": [{"lemma": "worth", "mstag": "ADJ", "disamb": true}]}, {"index": 51, "position": [253, 260], "orth": "talking", "lexemes": [{"lemma": "talk", "mstag": "VERB", "disamb": true}]}, {"index": 52, "position": [261, 263], "orth": "to", "lexemes": [{"lemma": "to", "mstag": "ADP", "disamb": true}]}, {"index": 53, "position": [264, 264], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 54, "position": [265, 265], "orth": "”", "lexemes": [{"lemma": "\"", "mstag": "PUNCT", "disamb": true}]}, {"index": 55, "position": [266, 270], "orth": "said", "lexemes": [{"lemma": "say", "mstag": "VERB", "disamb": true}]}, {"index": 56, "position": [271, 276], "orth": "Thrun", "lexemes": [{"lemma": "Thrun", "mstag": "PROPN", "disamb": true}]}, {"index": 57, "position": [277, 277], "orth": ",", "lexemes": [{"lemma": ",", "mstag": "PUNCT", "disamb": true}]}, {"index": 58, "position": [278, 280], "orth": "in", "lexemes": [{"lemma": "in", "mstag": "ADP", "disamb": true}]}, {"index": 59, "position": [281, 283], "orth": "an", "lexemes": [{"lemma": "an", "mstag": "DET", "disamb": true}]}, {"index": 60, "position": [284, 293], "orth": "interview", "lexemes": [{"lemma": "interview", "mstag": "NOUN", "disamb": true}]}, {"index": 61, "position": [294, 298], "orth": "with", "lexemes": [{"lemma": "with", "mstag": "ADP", "disamb": true}]}, {"index": 62, "position": [299, 305], "orth": "Recode", "lexemes": [{"lemma": "Recode", "mstag": "PROPN", "disamb": true}]}, {"index": 63, "position": [306, 313], "orth": "earlier", "lexemes": [{"lemma": "early", "mstag": "ADV", "disamb": true}]}, {"index": 64, "position": [314, 318], "orth": "this", "lexemes": [{"lemma": "this", "mstag": "DET", "disamb": true}]}, {"index": 65, "position": [319, 323], "orth": "week", "lexemes": [{"lemma": "week", "mstag": "NOUN", "disamb": true}]}, {"index": 66, "position": [324, 324], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 67, "position": [325, 325], "orth": "\n", "lexemes": [{"lemma": "\n", "mstag": "SPACE", "disamb": true}]}]}
\ No newline at end of file
{"base": {"Nazywam": 1, "się": 2, "Jan": 2, "Kowalski": 1, "i": 1, "mieszkać": 1, "w": 1, "Wrocław": 1, ".": 2, "mój": 1, "rodzinny": 1, "dom": 1, "mieścić": 1, "przy": 1, "aleji": 1, "Paweł": 1, "II": 1}}
\ No newline at end of file
{"filename": "simple_text_pl", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 7], "orth": "Nazywam", "lexemes": [{"lemma": "Nazywam", "mstag": "VERB", "disamb": true}]}, {"index": 2, "position": [8, 11], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 3, "position": [12, 15], "orth": "Jan", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [16, 24], "orth": "Kowalski", "lexemes": [{"lemma": "Kowalski", "mstag": "PROPN", "disamb": true}]}, {"index": 5, "position": [25, 26], "orth": "i", "lexemes": [{"lemma": "i", "mstag": "CCONJ", "disamb": true}]}, {"index": 6, "position": [27, 35], "orth": "mieszkam", "lexemes": [{"lemma": "mieszkać", "mstag": "VERB", "disamb": true}]}, {"index": 7, "position": [36, 38], "orth": "we", "lexemes": [{"lemma": "w", "mstag": "ADP", "disamb": true}]}, {"index": 8, "position": [39, 48], "orth": "Wrocławiu", "lexemes": [{"lemma": "Wrocław", "mstag": "PROPN", "disamb": true}]}, {"index": 9, "position": [49, 49], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 10, "position": [50, 53], "orth": "Mój", "lexemes": [{"lemma": "mój", "mstag": "DET", "disamb": true}]}, {"index": 11, "position": [54, 62], "orth": "rodzinny", "lexemes": [{"lemma": "rodzinny", "mstag": "ADJ", "disamb": true}]}, {"index": 12, "position": [63, 66], "orth": "dom", "lexemes": [{"lemma": "dom", "mstag": "NOUN", "disamb": true}]}, {"index": 13, "position": [67, 73], "orth": "mieści", "lexemes": [{"lemma": "mieścić", "mstag": "VERB", "disamb": true}]}, {"index": 14, "position": [74, 77], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 15, "position": [78, 82], "orth": "przy", "lexemes": [{"lemma": "przy", "mstag": "ADP", "disamb": true}]}, {"index": 16, "position": [83, 88], "orth": "aleji", "lexemes": [{"lemma": "aleji", "mstag": "NOUN", "disamb": true}]}, {"index": 17, "position": [89, 93], "orth": "Jana", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 18, "position": [94, 99], "orth": "Pawła", "lexemes": [{"lemma": "Paweł", "mstag": "PROPN", "disamb": true}]}, {"index": 19, "position": [100, 102], "orth": "II", "lexemes": [{"lemma": "II", "mstag": "ADJ", "disamb": true}]}, {"index": 20, "position": [103, 103], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}]}
\ No newline at end of file
This diff is collapsed.
woda być jeden z pospolity substancja w wszechświat . cząsteczka woda być trzeci bardzo rozpowszechniony molekuła w ośrodek międzygwiazdowy , po cząsteczkowy wodór i tlenek węgiel .
być również szeroko rozpowszechniony w Układ Słoneczny : stanowić istotny element budowa ceres i księżyc lodowy krążyć wokół planeta - olbrzym , jako domieszka występować w on atmosfera , a przypuszczać się , że duży on ilość znajdować się w wnętrze ten planeta .
jako lód występować także na część planetoida , a zapewne również na obiekt transneptunowych .
woda być bardzo rozpowszechniony także na powierzchnia Ziemia .
występować głównie w ocean , który pokrywać 70 , 8 % powierzchnia glob , ale także w rzeka , jezioro i w postać stały w lodowiec .
część woda znajdować się w atmosfera ( chmura , para wodny ) .
niektóry związek chemiczny zawierać cząsteczka woda w swój budowa ( hydrat – określać się on wówczas miano woda krystalizacyjny ) .
zawartość woda włączyć w struktura minerał w płaszcz Ziemia móc przekraczać łączny zawartość woda w ocean i inny zbiornik powierzchniowy nawet dziesięciokrotnie .
woda występować w przyroda być roztwór sól i gaz .
najwięcej sól mineralny zawierać woda morski i woda mineralny ; najmniej woda z opad atmosferyczny .
woda o mały zawartość składnik mineralny nazywać woda miękki , natomiast zawierać znaczny ilość sól wapń i magnez – woda twardy .
oprócz to woda naturalny zawierać rozpuścić substancja pochodzenie organiczny , na przykład . mocznik , kwas humusowy i tym podobne .
This diff is collapsed.
{"filename": "simple_text_pl", "tagset": "ud", "tokens": [{"index": 1, "position": [0, 7], "orth": "Nazywam", "lexemes": [{"lemma": "Nazywam", "mstag": "VERB", "disamb": true}]}, {"index": 2, "position": [8, 11], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 3, "position": [12, 15], "orth": "Jan", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 4, "position": [16, 24], "orth": "Kowalski", "lexemes": [{"lemma": "Kowalski", "mstag": "PROPN", "disamb": true}]}, {"index": 5, "position": [25, 26], "orth": "i", "lexemes": [{"lemma": "i", "mstag": "CCONJ", "disamb": true}]}, {"index": 6, "position": [27, 35], "orth": "mieszkam", "lexemes": [{"lemma": "mieszkać", "mstag": "VERB", "disamb": true}]}, {"index": 7, "position": [36, 38], "orth": "we", "lexemes": [{"lemma": "w", "mstag": "ADP", "disamb": true}]}, {"index": 8, "position": [39, 48], "orth": "Wrocławiu", "lexemes": [{"lemma": "Wrocław", "mstag": "PROPN", "disamb": true}]}, {"index": 9, "position": [49, 49], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}, {"index": 10, "position": [50, 53], "orth": "Mój", "lexemes": [{"lemma": "mój", "mstag": "DET", "disamb": true}]}, {"index": 11, "position": [54, 62], "orth": "rodzinny", "lexemes": [{"lemma": "rodzinny", "mstag": "ADJ", "disamb": true}]}, {"index": 12, "position": [63, 66], "orth": "dom", "lexemes": [{"lemma": "dom", "mstag": "NOUN", "disamb": true}]}, {"index": 13, "position": [67, 73], "orth": "mieści", "lexemes": [{"lemma": "mieścić", "mstag": "VERB", "disamb": true}]}, {"index": 14, "position": [74, 77], "orth": "się", "lexemes": [{"lemma": "się", "mstag": "PRON", "disamb": true}]}, {"index": 15, "position": [78, 82], "orth": "przy", "lexemes": [{"lemma": "przy", "mstag": "ADP", "disamb": true}]}, {"index": 16, "position": [83, 88], "orth": "aleji", "lexemes": [{"lemma": "aleji", "mstag": "NOUN", "disamb": true}]}, {"index": 17, "position": [89, 93], "orth": "Jana", "lexemes": [{"lemma": "Jan", "mstag": "PROPN", "disamb": true}]}, {"index": 18, "position": [94, 99], "orth": "Pawła", "lexemes": [{"lemma": "Paweł", "mstag": "PROPN", "disamb": true}]}, {"index": 19, "position": [100, 102], "orth": "II", "lexemes": [{"lemma": "II", "mstag": "ADJ", "disamb": true}]}, {"index": 20, "position": [103, 103], "orth": ".", "lexemes": [{"lemma": ".", "mstag": "PUNCT", "disamb": true}]}], "text": "Nazywam się Jan Kowalski i mieszkam we Wrocławiu. Mój rodzinny dom mieści się przy aleji Jana Pawła II."}
\ No newline at end of file
Woda jest jedną z najpospolitszych substancji we Wszechświecie.
Cząsteczka wody jest trzecią najbardziej rozpowszechnioną molekułą w ośrodku międzygwiazdowym, po cząsteczkowym wodorze i tlenku węgla. Jest również szeroko rozpowszechniona w Układzie Słonecznym: stanowi istotny element budowy Ceres i księżyców lodowych krążących wokół planet-olbrzymów, jako domieszka występuje w ich atmosferach, a przypuszcza się, że duże jej ilości znajdują się we wnętrzach tych planet. Jako lód występuje także na części planetoid, a zapewne również na obiektach transneptunowych. Woda jest bardzo rozpowszechniona także na powierzchni Ziemi. Występuje głównie w oceanach, które pokrywają 70,8% powierzchni globu, ale także w rzekach, jeziorach i w postaci stałej w lodowcach. Część wody znajduje się w atmosferze (chmury, para wodna). Niektóre związki chemiczne zawierają cząsteczki wody w swojej budowie (hydraty – określa się ją wówczas mianem wody krystalizacyjnej). Zawartość wody włączonej w strukturę minerałów w płaszczu Ziemi może przekraczać łączną zawartość wody w oceanach i innych zbiornikach powierzchniowych nawet dziesięciokrotnie.
Woda występująca w przyrodzie jest roztworem soli i gazów. Najwięcej soli mineralnych zawiera woda morska i wody mineralne; najmniej woda z opadów atmosferycznych. Wodę o małej zawartości składników mineralnych nazywamy wodą miękką, natomiast zawierającą znaczne ilości soli wapnia i magnezu – wodą twardą. Oprócz tego wody naturalne zawierają rozpuszczone substancje pochodzenia organicznego, np. mocznik, kwasy humusowe itp.
When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. “I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment