From dfac19a73f203a83496aae50731d95c12cf299a1 Mon Sep 17 00:00:00 2001 From: Maja Jablonska <majajjablonska@gmail.com> Date: Fri, 26 Jan 2024 15:55:14 +0100 Subject: [PATCH] Add multiwords --- combo/data/api.py | 6 +- .../universal_dependencies_dataset_reader.py | 2 +- combo/data/tokenizers/character_tokenizer.py | 4 +- combo/data/tokenizers/lambo_tokenizer.py | 85 +- combo/data/tokenizers/token.py | 6 +- combo/predict.py | 16 +- setup.py | 2 +- tests/data/data_readers/en_test.conllu | 116 ++ ...t_universal_dependencies_dataset_reader.py | 18 +- tests/data/data_readers/tl_trg-ud-test.conllu | 1343 ----------------- 10 files changed, 222 insertions(+), 1376 deletions(-) create mode 100644 tests/data/data_readers/en_test.conllu delete mode 100644 tests/data/data_readers/tl_trg-ud-test.conllu diff --git a/combo/data/api.py b/combo/data/api.py index ab00da5..240a8b3 100644 --- a/combo/data/api.py +++ b/combo/data/api.py @@ -41,7 +41,11 @@ class _TokenList(conllu.models.TokenList): def sentence2conllu(sentence: Sentence, keep_semrel: bool = True) -> conllu.models.TokenList: tokens = [] + used_multiwords = set() for token in sentence.tokens: + if token.multiword and token.multiword not in used_multiwords: + tokens.append(collections.OrderedDict(Token(idx=token.multiword[1], text=token.multiword[0]).as_dict(keep_semrel))) + used_multiwords.add(token.multiword) token_dict = collections.OrderedDict(token.as_dict(keep_semrel)) tokens.append(token_dict) # Range tokens must be tuple not list, this is conllu library requirement @@ -76,7 +80,7 @@ def serialize_field(field: Any) -> str: return '|'.join(fields) if isinstance(field, tuple): - return "".join([serialize_field(item) for item in field]) + return "-".join([serialize_field(item) for item in field]) if isinstance(field, list): if len(field[0]) != 2: diff --git a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py index a3b0408..d8f9d0e 100644 --- a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py +++ b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py @@ -133,9 +133,9 @@ class UniversalDependenciesDatasetReader(DatasetReader, ABC): def text_to_instance(self, tree: conllu.models.TokenList) -> Instance: fields_: Dict[str, Field] = {} - tokens = [Token.from_conllu_token(t) for t in tree if isinstance(t["id"], int)] # features + tokens = tree text_field = TextField(tokens, self.token_indexers) fields_["sentence"] = text_field diff --git a/combo/data/tokenizers/character_tokenizer.py b/combo/data/tokenizers/character_tokenizer.py index b407e6f..2b3779b 100644 --- a/combo/data/tokenizers/character_tokenizer.py +++ b/combo/data/tokenizers/character_tokenizer.py @@ -53,7 +53,9 @@ class CharacterTokenizer(Tokenizer): self._start_tokens.reverse() self._end_tokens = end_tokens or [] - def tokenize(self, text: str) -> List[Token]: + def tokenize(self, text: Union[str, Token]) -> List[Token]: + if isinstance(text, Token): + text = text.text if self._lowercase_characters: text = text.lower() if self._byte_encoding is not None: diff --git a/combo/data/tokenizers/lambo_tokenizer.py b/combo/data/tokenizers/lambo_tokenizer.py index 6842958..c5bb322 100644 --- a/combo/data/tokenizers/lambo_tokenizer.py +++ b/combo/data/tokenizers/lambo_tokenizer.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Iterator, List, Optional from lambo.segmenter.lambo import Lambo @@ -7,6 +7,34 @@ from combo.config.from_parameters import register_arguments from combo.data.tokenizers.token import Token from combo.data.tokenizers.tokenizer import Tokenizer +SPLIT_LEVELS = ["TURN", "SENTENCE", "NONE"] +DEFAULT_SPLIT_LEVEL = "SENTENCE" + +IDX = 0 + + +def _token_idx() -> Iterator[int]: + global IDX + IDX += 1 + yield IDX + + +def _reset_idx(): + global IDX + IDX = 0 + + +def _sentence_tokens(token: Token, + split_subwords: Optional[bool] = None) -> List[Token]: + if split_subwords and len(token.subwords) > 0: + subword_idxs = [next(_token_idx()) for _ in range(len(token.subwords))] + multiword = (token.text, (subword_idxs[0], subword_idxs[1])) + tokens = [Token(idx=s_idx, text=subword, multiword=multiword) for (s_idx, subword) + in zip(subword_idxs, token.subwords)] + return tokens + else: + return [Token(idx=next(_token_idx()), text=token.text)] + @Registry.register('lambo_tokenizer') class LamboTokenizer(Tokenizer): @@ -14,40 +42,71 @@ class LamboTokenizer(Tokenizer): def __init__( self, language: str = "English", - default_turns: bool = False, - default_split_subwords: bool = False + default_split_level: str = DEFAULT_SPLIT_LEVEL, + default_split_subwords: bool = True ): self._language = language self.__tokenizer = Lambo.get(language) - self.__default_turns = default_turns + self.__default_split_level = default_split_level.upper() + self.__default_split_subwords = default_split_subwords - def tokenize(self, text: str) -> List[Token]: + def tokenize(self, + text: str, + split_level: Optional[str] = None, + split_subwords: Optional[bool] = None, + multiwords: Optional[bool] = None) -> List[List[Token]]: """ Simple tokenization - ignoring the sentence splits :param text: + :param split_level: split on turns, sentences, or no splitting (return one list of tokens) + :param split_subwords: split subwords into separate tokens (e.g. can't into ca, n't) :return: """ + _reset_idx() document = self.__tokenizer.segment(text) tokens = [] - for turn in document.turns: - for sentence in turn.sentences: - for token in sentence.tokens: - tokens.append(Token(token.text, subwords=token.subwords)) + split_level = split_level if split_level is not None else self.__default_split_level + split_subwords = split_subwords if split_subwords is not None else self.__default_split_subwords + + if split_level.upper() == "TURN": + for turn in document.turns: + sentence_tokens = [] + for sentence in turn.sentences: + for token in sentence.tokens: + sentence_tokens.extend(_sentence_tokens(token, split_subwords)) + tokens.append(sentence_tokens) + elif split_level.upper() == "SENTENCE": + for turn in document.turns: + for sentence in turn.sentences: + sentence_tokens = [] + for token in sentence.tokens: + sentence_tokens.extend(_sentence_tokens(token, split_subwords)) + tokens.append(sentence_tokens) + else: + for turn in document.turns: + for sentence in turn.sentences: + for token in sentence.tokens: + tokens.extend(_sentence_tokens(token, split_subwords)) + tokens = [tokens] return tokens - def segment(self, text: str, turns: Optional[bool] = None, split_subwords: Optional[bool] = None) -> List[List[str]]: + def segment(self, + text: str, + turns: Optional[bool] = None, + split_subwords: Optional[bool] = None) -> List[List[str]]: """ - Full segmentation - segment into sentences. + Full segmentation - segment into sentences and return a list of strings. :param text: :param turns: segment into sentences by splitting on sentences or on turns. Default: sentences. + :param split_subwords: split subwords into separate tokens (e.g. can't into ca, n't) :return: """ - turns = turns or self.__default_turns - split_subwords = split_subwords or self.__default_split_subwords + turns = turns if turns is not None else self.__default_split_level.upper() == "TURNS" + split_subwords = split_subwords if split_subwords is not None else self.__default_split_subwords document = self.__tokenizer.segment(text) sentences = [] diff --git a/combo/data/tokenizers/token.py b/combo/data/tokenizers/token.py index 16c3a8e..319a5bd 100644 --- a/combo/data/tokenizers/token.py +++ b/combo/data/tokenizers/token.py @@ -29,6 +29,7 @@ class Token: "deps", "misc", "subwords", + "multiword", "semrel", "embeddings", "text_id", @@ -47,6 +48,7 @@ class Token: deps: Optional[str] misc: Optional[str] subwords: Optional[List[str]] + multiword: Optional[Tuple[str, Tuple[int, int]]] semrel: Optional[str] embeddings: Dict[str, List[float]] text_id: Optional[int] @@ -65,6 +67,7 @@ class Token: deps: str = None, misc: str = None, subwords: List[str] = None, + multiword: Tuple[str, Tuple[int, int]] = None, semrel: str = None, embeddings: Dict[str, List[float]] = None, text_id: int = None, @@ -83,6 +86,7 @@ class Token: self.deps = deps self.misc = misc self.subwords = subwords if subwords else [] + self.multiword = multiword self.semrel = semrel if embeddings is None: @@ -109,7 +113,7 @@ class Token: return repr def __str__(self): - return self.text + return str(self.text) def __repr__(self): return self.__str__() diff --git a/combo/predict.py b/combo/predict.py index 74c1ca5..c4f507b 100644 --- a/combo/predict.py +++ b/combo/predict.py @@ -11,7 +11,7 @@ from combo import data from combo.common import util from combo.config import Registry from combo.config.from_parameters import register_arguments -from combo.data import Instance, conllu2sentence, tokens2conllu, sentence2conllu +from combo.data import Instance, conllu2sentence, sentence2conllu from combo.data.dataset_loaders.dataset_loader import TensorDict from combo.data.dataset_readers.dataset_reader import DatasetReader from combo.data.instance import JsonDict @@ -20,6 +20,7 @@ from combo.modules.archival import load_archive from combo.predictors import PredictorModule from combo.utils import download, graph from combo.modules.model import Model +from combo.data import Token logger = logging.getLogger(__name__) @@ -72,16 +73,18 @@ class COMBO(PredictorModule): def configure_optimizers(self) -> Any: pass - def predict(self, sentence: Union[str, List[str], List[List[str]], List[data.Sentence]]): + def predict(self, + sentence: Union[str, List[str], List[List[str]], List[data.Sentence]], + **kwargs): if isinstance(sentence, str): - sentence = self.dataset_reader.tokenizer.segment(sentence) + sentence = self.dataset_reader.tokenizer.tokenize(sentence, **kwargs) if isinstance(sentence, list): if len(sentence) == 0: return [] example = sentence[0] sentences = sentence - if isinstance(example, str) or isinstance(example, list): + if isinstance(example, Token) or (isinstance(example, list) and isinstance(example[0], Token)): result = [] sentences = [self._to_input_json(s) for s in sentences] for sentences_batch in util.lazy_groups_of(sentences, self.batch_size): @@ -140,13 +143,14 @@ class COMBO(PredictorModule): @overrides def _json_to_instance(self, json_dict) -> Instance: sentence = json_dict["sentence"] + # TODO: tokenize EVERYTHING, even if a list is passed? if isinstance(sentence, str): tokens = [t.text for t in self.tokenizer.tokenize(json_dict["sentence"])] elif isinstance(sentence, list): tokens = sentence else: raise ValueError("Input must be either string or list of strings.") - return self.dataset_reader.text_to_instance(tokens2conllu(tokens)) + return self.dataset_reader.text_to_instance(tokens) @overrides def load_line(self, line: str) -> JsonDict: @@ -174,7 +178,7 @@ class COMBO(PredictorModule): def _predictions_as_tree(self, predictions: Dict[str, Any], instance: Instance): tree = instance.fields["metadata"]["input"] field_names = instance.fields["metadata"]["field_names"] - tree_tokens = [t for t in tree if isinstance(t["idx"], int)] + tree_tokens = [t for t in tree if isinstance(t["idx"], int) or isinstance(t["idx"], tuple)] embeddings = {t["idx"]: {} for t in tree} deprel_tree_distribution = None deprel_label_distribution = None diff --git a/setup.py b/setup.py index 80e89b4..0aeb7ee 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ REQUIREMENTS = [ setup( name="combo-nlp", - version="3.0.4", + version="3.1.0", author="Maja Jablonska", author_email="maja.jablonska@ipipan.waw.pl", install_requires=REQUIREMENTS, diff --git a/tests/data/data_readers/en_test.conllu b/tests/data/data_readers/en_test.conllu new file mode 100644 index 0000000..9618de1 --- /dev/null +++ b/tests/data/data_readers/en_test.conllu @@ -0,0 +1,116 @@ +# newdoc id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200 +# sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0001 +# newpar id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-p0001 +# text = What if Google Morphed Into GoogleOS? +1 What what PRON WP PronType=Int 0 root 0:root _ +2 if if SCONJ IN _ 4 mark 4:mark _ +3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ +4 Morphed morph VERB VBD Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ +5 Into into ADP IN _ 6 case 6:case _ +6 GoogleOS GoogleOS PROPN NNP Number=Sing 4 obl 4:obl:into SpaceAfter=No +7 ? ? PUNCT . _ 4 punct 4:punct _ + +# sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0002 +# text = What if Google expanded on its search-engine (and now e-mail) wares into a full-fledged operating system? +1 What what PRON WP PronType=Int 0 root 0:root _ +2 if if SCONJ IN _ 4 mark 4:mark _ +3 Google Google PROPN NNP Number=Sing 4 nsubj 4:nsubj _ +4 expanded expand VERB VBD Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin 1 advcl 1:advcl:if _ +5 on on ADP IN _ 15 case 15:case _ +6 its its PRON PRP$ Case=Gen|Gender=Neut|Number=Sing|Person=3|Poss=Yes|PronType=Prs 15 nmod:poss 15:nmod:poss _ +7 search search NOUN NN Number=Sing 9 compound 9:compound SpaceAfter=No +8 - - PUNCT HYPH _ 7 punct 7:punct SpaceAfter=No +9 engine engine NOUN NN Number=Sing 15 compound 15:compound _ +10 ( ( PUNCT -LRB- _ 13 punct 13:punct SpaceAfter=No +11 and and CCONJ CC _ 13 cc 13:cc _ +12 now now ADV RB _ 13 advmod 13:advmod _ +13 e-mail e-mail NOUN NN Number=Sing 9 conj 9:conj:and|15:compound SpaceAfter=No +14 ) ) PUNCT -RRB- _ 13 punct 13:punct _ +15 wares wares NOUN NNS Number=Plur 4 obl 4:obl:on _ +16 into into ADP IN _ 22 case 22:case _ +17 a a DET DT Definite=Ind|PronType=Art 22 det 22:det _ +18 full full ADV RB _ 20 advmod 20:advmod SpaceAfter=No +19 - - PUNCT HYPH _ 18 punct 18:punct SpaceAfter=No +20 fledged fledged ADJ JJ Degree=Pos 22 amod 22:amod _ +21 operating operating NOUN NN Number=Sing 22 compound 22:compound _ +22 system system NOUN NN Number=Sing 4 obl 4:obl:into SpaceAfter=No +23 ? ? PUNCT . _ 4 punct 4:punct _ + +# sent_id = weblog-blogspot.com_zentelligence_20040423000200_ENG_20040423_000200-0003 +# text = [via Microsoft Watch from Mary Jo Foley ] +1 [ [ PUNCT -LRB- _ 4 punct 4:punct SpaceAfter=No +2 via via ADP IN _ 4 case 4:case _ +3 Microsoft Microsoft PROPN NNP Number=Sing 4 compound 4:compound _ +4 Watch Watch PROPN NNP Number=Sing 0 root 0:root _ +5 from from ADP IN _ 6 case 6:case _ +6 Mary Mary PROPN NNP Number=Sing 4 nmod 4:nmod:from _ +7 Jo Jo PROPN NNP Number=Sing 6 flat 6:flat _ +8 Foley Foley PROPN NNP Number=Sing 6 flat 6:flat _ +9 ] ] PUNCT -RRB- _ 4 punct 4:punct _ + +# newdoc id = weblog-blogspot.com_marketview_20050511222700_ENG_20050511_222700 +# sent_id = weblog-blogspot.com_marketview_20050511222700_ENG_20050511_222700-0001 +# newpar id = weblog-blogspot.com_marketview_20050511222700_ENG_20050511_222700-p0001 +# text = (And, by the way, is anybody else just a little nostalgic for the days when that was a good thing?) +1 ( ( PUNCT -LRB- _ 14 punct 14:punct SpaceAfter=No +2 And and CCONJ CC _ 14 cc 14:cc SpaceAfter=No +3 , , PUNCT , _ 2 punct 2:punct _ +4 by by ADP IN _ 6 case 6:case _ +5 the the DET DT Definite=Def|PronType=Art 6 det 6:det _ +6 way way NOUN NN Number=Sing 14 obl 14:obl:by SpaceAfter=No +7 , , PUNCT , _ 6 punct 6:punct _ +8 is be AUX VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 14 cop 14:cop _ +9 anybody anybody PRON NN Number=Sing|PronType=Ind 14 nsubj 14:nsubj _ +10 else else ADJ JJ Degree=Pos 9 amod 9:amod _ +11 just just ADV RB _ 13 advmod 13:advmod _ +12 a a DET DT Definite=Ind|PronType=Art 13 det 13:det _ +13 little little ADJ JJ Degree=Pos 14 obl:npmod 14:obl:npmod _ +14 nostalgic nostalgic NOUN NN Number=Sing 0 root 0:root _ +15 for for ADP IN _ 17 case 17:case _ +16 the the DET DT Definite=Def|PronType=Art 17 det 17:det _ +17 days day NOUN NNS Number=Plur 14 nmod 14:nmod:for|23:obl:npmod _ +18 when when ADV WRB PronType=Rel 23 advmod 17:ref _ +19 that that PRON DT Number=Sing|PronType=Dem 23 nsubj 23:nsubj _ +20 was be AUX VBD Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin 23 cop 23:cop _ +21 a a DET DT Definite=Ind|PronType=Art 23 det 23:det _ +22 good good ADJ JJ Degree=Pos 23 amod 23:amod _ +23 thing thing NOUN NN Number=Sing 17 acl:relcl 17:acl:relcl SpaceAfter=No +24 ? ? PUNCT . _ 14 punct 14:punct SpaceAfter=No +25 ) ) PUNCT -RRB- _ 14 punct 14:punct _ + +# sent_id = weblog-blogspot.com_marketview_20050511222700_ENG_20050511_222700-0002 +# text = This BuzzMachine post argues that Google's rush toward ubiquity might backfire -- which we've all heard before, but it's particularly well-put in this post. +1 This this DET DT Number=Sing|PronType=Dem 3 det 3:det _ +2 BuzzMachine BuzzMachine PROPN NNP Number=Sing 3 compound 3:compound _ +3 post post NOUN NN Number=Sing 4 nsubj 4:nsubj _ +4 argues argue VERB VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 0 root 0:root _ +5 that that SCONJ IN _ 12 mark 12:mark _ +6-7 Google's _ _ _ _ _ _ _ _ +6 Google Google PROPN NNP Number=Sing 8 nmod:poss 8:nmod:poss _ +7 's 's PART POS _ 6 case 6:case _ +8 rush rush NOUN NN Number=Sing 12 nsubj 12:nsubj _ +9 toward toward ADP IN _ 10 case 10:case _ +10 ubiquity ubiquity NOUN NN Number=Sing 8 nmod 8:nmod:toward _ +11 might might AUX MD VerbForm=Fin 12 aux 12:aux _ +12 backfire backfire VERB VB VerbForm=Inf 4 ccomp 4:ccomp|18:ccomp _ +13 -- -- PUNCT , _ 18 punct 18:punct _ +14 which which PRON WDT PronType=Rel 18 obj 12:ref _ +15-16 we've _ _ _ _ _ _ _ _ +15 we we PRON PRP Case=Nom|Number=Plur|Person=1|PronType=Prs 18 nsubj 18:nsubj _ +16 've have AUX VBP Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin 18 aux 18:aux _ +17 all all ADV RB _ 18 advmod 18:advmod _ +18 heard hear VERB VBN Tense=Past|VerbForm=Part 12 advcl:relcl 12:advcl:relcl _ +19 before before ADV RB _ 18 advmod 18:advmod SpaceAfter=No +20 , , PUNCT , _ 27 punct 27:punct _ +21 but but CCONJ CC _ 27 cc 27:cc _ +22-23 it's _ _ _ _ _ _ _ _ +22 it it PRON PRP Case=Nom|Gender=Neut|Number=Sing|Person=3|PronType=Prs 27 nsubj:pass 27:nsubj:pass _ +23 's be AUX VBZ Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin 27 aux:pass 27:aux:pass _ +24 particularly particularly ADV RB _ 27 advmod 27:advmod _ +25 well well ADV RB Degree=Pos 27 advmod 27:advmod SpaceAfter=No +26 - - PUNCT HYPH _ 25 punct 25:punct SpaceAfter=No +27 put put VERB VBN Tense=Past|VerbForm=Part|Voice=Pass 4 conj 4:conj:but _ +28 in in ADP IN _ 30 case 30:case _ +29 this this DET DT Number=Sing|PronType=Dem 30 det 30:det _ +30 post post NOUN NN Number=Sing 27 obl 27:obl:in SpaceAfter=No +31 . . PUNCT . _ 4 punct 4:punct _ diff --git a/tests/data/data_readers/test_universal_dependencies_dataset_reader.py b/tests/data/data_readers/test_universal_dependencies_dataset_reader.py index 39cb0fa..b3292ee 100644 --- a/tests/data/data_readers/test_universal_dependencies_dataset_reader.py +++ b/tests/data/data_readers/test_universal_dependencies_dataset_reader.py @@ -1,30 +1,30 @@ import unittest import os -from combo.data import UniversalDependenciesDatasetReader +from combo.data import UniversalDependenciesDatasetReader, LamboTokenizer class UniversalDependenciesDatasetReaderTest(unittest.TestCase): def test_read_all_tokens(self): t = UniversalDependenciesDatasetReader() - tokens = [token for token in t.read(os.path.join(os.path.dirname(__file__), 'tl_trg-ud-test.conllu'))] - self.assertEqual(len(tokens), 128) + tokens = [token for token in t.read(os.path.join(os.path.dirname(__file__), 'en_test.conllu'))] + self.assertEqual(len(tokens), 5) def test_read_text(self): t = UniversalDependenciesDatasetReader() - token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'tl_trg-ud-test.conllu')))) + token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'en_test.conllu')))) self.assertListEqual([t.text for t in token['sentence'].tokens], - ['Gumising', 'ang', 'bata', '.']) + ['What', 'if', 'Google', 'Morphed', 'Into', 'GoogleOS', '?']) def test_read_deprel(self): t = UniversalDependenciesDatasetReader() - token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'tl_trg-ud-test.conllu')))) + token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'en_test.conllu')))) self.assertListEqual(token['deprel'].labels, - ['root', 'case', 'nsubj', 'punct']) + ['root', 'mark', 'nsubj', 'advcl', 'case', 'obl', 'punct']) def test_read_upostag(self): t = UniversalDependenciesDatasetReader() - token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'tl_trg-ud-test.conllu')))) + token = next(iter(t.read(os.path.join(os.path.dirname(__file__), 'en_test.conllu')))) self.assertListEqual(token['upostag'].labels, - ['VERB', 'ADP', 'NOUN', 'PUNCT']) + ['PRON', 'SCONJ', 'PROPN', 'VERB', 'ADP', 'PROPN', 'PUNCT']) diff --git a/tests/data/data_readers/tl_trg-ud-test.conllu b/tests/data/data_readers/tl_trg-ud-test.conllu deleted file mode 100644 index 4feec58..0000000 --- a/tests/data/data_readers/tl_trg-ud-test.conllu +++ /dev/null @@ -1,1343 +0,0 @@ -# sent_id = schachter-otanes-60-0 -# text = Gumising ang bata. -# text_en = The child awoke. -1 Gumising gising VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=awakened -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bata bata NOUN _ _ 1 nsubj _ Gloss=child|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-60-1 -# text = Ginising ng ingay ang bata. -# text_en = A noise awakened the child. -1 Ginising gising VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=awakened -2 ng ng ADP _ Case=Gen 3 case _ _ -3 ingay ingay NOUN _ _ 1 obj:agent _ Gloss=noise -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 bata bata NOUN _ _ 1 nsubj:pass _ Gloss=child|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-60-2 -# text = Sinulat ko ang liham. -# text_en = I wrote the letter. -1 Sinulat sulat VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=written -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 1 obj:agent _ Gloss=I -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 liham liham NOUN _ _ 1 nsubj:pass _ Gloss=letter|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-60-3 -# text = Sinulatan ko ang titser. -# text_en = I wrote to the teacher. -1 Sinulatan sulat VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=written -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 1 obj:agent _ Gloss=I -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 titser titser NOUN _ _ 1 nsubj:pass _ Gloss=teacher|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-61-2 -# text = Dumarating na ang bus. -# text_en = The bus is coming now. -1 Dumarating dating VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=coming -2 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 bus bus NOUN _ Foreign=Yes 1 nsubj _ Gloss=bus|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-61-3 -# text = Artista ang babae. -# text_en = The woman is an actress. -1 Artista artista NOUN _ _ 0 root _ Gloss=actress -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 babae babae NOUN _ _ 1 nsubj _ Gloss=woman|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-61-4 -# text = Maganda ang babae. -# text_en = The woman is beautiful. -1 Maganda ganda ADJ _ Degree=Pos 0 root _ Gloss=beautiful -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 babae babae NOUN _ _ 1 nsubj _ Gloss=woman|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-61-5 -# text = Yumaman ang babae. -# text_en = The woman got rich. -1 Yumaman yaman VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=got-rich -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 babae babae NOUN _ _ 1 nsubj _ Gloss=woman|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-62-0 -# text = Artista ang yumaman. -# text_en = The one who got rich is an actress. -1 Artista artista NOUN _ _ 0 root _ Gloss=actress -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 yumaman yaman VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 csubj _ Gloss=got-rich|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-62-1 -# text = Artista ang nagluto ng pagkain. -# text_en = The one who cooked some food is an actress. -1 Artista artista NOUN _ _ 3 nsubj _ Gloss=actress -2 ang ang ADP _ Case=Nom 1 case _ Gloss=the -3 nagluto luto VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=cooked -4 ng ng ADP _ Case=Gen 5 case _ _ -5 pagkain pagkain NOUN _ _ 3 obj _ Gloss=food|SpaceAfter=No -6 . . PUNCT _ _ 3 punct _ _ - -# sent_id = schachter-otanes-64-0 -# text = Siya ang Amerikano. -# text_en = He's the American. -1 Siya siya PRON _ Case=Nom|Number=Sing|Person=3|PronType=Prs 0 root _ Gloss=he/she -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 Amerikano amerikano NOUN _ _ 1 nsubj _ Gloss=American|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-1 -# text = Iyan ang bahay. -# text_en = That's the house. -1 Iyan iyan PRON _ Case=Nom|Deixis=Med|Number=Sing|PronType=Dem 0 root _ Gloss=that -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bahay bahay NOUN _ _ 1 obj _ Gloss=house|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-2 -# text = Si Juan ang bunso. -# text_en = Juan's the youngest child. -1 Si si ADP _ Case=Nom 2 case _ Gloss=the -2 Juan Juan PROPN _ Gender=Masc 0 root _ Gloss=Juan -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 bunso bunso NOUN _ _ 2 nsubj _ Gloss=youngest|SpaceAfter=No -5 . . PUNCT _ _ 2 punct _ _ - -# sent_id = schachter-otanes-64-3 -# text = Isda ang bakalaw. -# text_en = The cod is a fish. -1 Isda isda NOUN _ _ 0 root _ Gloss=fish -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bakalaw bakalaw NOUN _ _ 1 nsubj _ Gloss=cod|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-4 -# text = Isda ang pagkain niya. -# text_en = His meal is some fish. -1 Isda isda NOUN _ _ 0 root _ Gloss=fish -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 pagkain pagkain NOUN _ _ 1 nsubj _ Gloss=food -4 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 3 nmod:poss _ Gloss=his/her|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-5 -# text = Isda ang paborito niya. -# text_en = His favorite is fish. -1 Isda isda NOUN _ _ 0 root _ Gloss=fish -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 paborito paborito NOUN _ _ 1 nsubj _ Gloss=favorite -4 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 3 nmod:poss _ Gloss=his/her|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-6 -# text = Bato ang bahay. -# text_en = The house is stone. -1 Bato bato NOUN _ _ 0 root _ Gloss=stone -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bahay bahay NOUN _ _ 1 nsubj _ Gloss=house|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-64-7 -# text = Bago ang bahay. -# text_en = The house is new. -1 Bago bago ADJ _ Degree=Pos 0 root _ Gloss=new -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bahay bahay NOUN _ _ 1 nsubj _ Gloss=house|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-0 -# text = Bagong-bago ang bahay. -# text_en = The house is very new. -1 Bagong bago ADJ _ Degree=Pos|Link=Yes 3 compound:redup _ Gloss=new|SpaceAfter=No -2 - - PUNCT _ _ 3 punct _ SpaceAfter=No -3 bago bago ADJ _ Degree=Pos 0 root _ Gloss=new -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 bahay bahay NOUN _ _ 3 nsubj _ Gloss=house|SpaceAfter=No -6 . . PUNCT _ _ 3 punct _ _ - -# sent_id = schachter-otanes-65-2 -# text = Mabuti ang panahon. -# text_en = The weather is good. -1 Mabuti buti ADJ _ Degree=Pos 0 root _ Gloss=good -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 panahon panahon NOUN _ _ 1 nsubj _ Gloss=weather|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-3 -# text = Matamis ang kendi. -# text_en = The candy is sweet. -1 Matamis tamis ADJ _ Degree=Pos 0 root _ Gloss=sweet -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 kendi kendi NOUN _ _ 1 nsubj _ Gloss=candy|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-4 -# text = Pagod ang bata. -# text_en = The child is tired. -1 Pagod pagod ADJ _ Degree=Pos 0 root _ Gloss=tired -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 bata bata NOUN _ _ 1 nsubj _ Gloss=child|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-5 -# text = Nasa kusina ang mesa. -# text_en = The table is in the kitchen. -1 Nasa nasa ADP _ _ 2 case _ Gloss=in -2 kusina kusina NOUN _ _ 0 root _ Gloss=kitchen -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 mesa mesa NOUN _ _ 2 nsubj _ Gloss=table|SpaceAfter=No -5 . . PUNCT _ _ 2 punct _ _ - -# sent_id = schachter-otanes-65-6 -# text = Para sa bata ang laruan. -# text_en = The toy is for the child. -1 Para para ADP _ _ 3 case _ Gloss=for -2 sa sa ADP _ Case=Dat 3 case _ Gloss=to -3 bata bata NOUN _ _ 0 root _ Gloss=child -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 laruan laruan NOUN _ _ 3 nsubj _ Gloss=toy|SpaceAfter=No -6 . . PUNCT _ _ 3 punct _ _ - -# sent_id = schachter-otanes-65-7 -# text = Tungkol sa giyera ang kuwento. -# text_en = The story is about the war. -1 Tungkol tungkol ADP _ _ 3 case _ Gloss=about -2 sa sa ADP _ Case=Dat 3 case _ Gloss=to -3 giyera giyera NOUN _ _ 0 root _ Gloss=war -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 kuwento kuwento NOUN _ _ 3 nsubj _ Gloss=story|SpaceAfter=No -6 . . PUNCT _ _ 3 punct _ _ - -# sent_id = schachter-otanes-65-8 -# text = May gulayan ang babae. -# text_en = The woman has a vegetable garden. -1 May may VERB _ Polarity=Pos 0 root _ Gloss=exist -2 gulayan gulayan NOUN _ _ 1 obj _ Gloss=vegetable-garden -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 babae babae NOUN _ _ 1 nsubj _ Gloss=woman|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-9 -# text = Kailangan ko ang kuwalta. -# text_en = I need the money. -1 Kailangan kailangan VERB _ Aspect=Hab 0 root _ Gloss=necessary -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 1 obj _ Gloss=I -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 kuwalta kuwalta NOUN _ _ 1 nsubj _ Gloss=money|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-65-10 -# text = Sa istudyante ang libro. -# text_en = The book belongs to the student. -1 Sa sa ADP _ Case=Dat 2 case _ Gloss=to -2 istudyante istudyante NOUN _ _ 0 root _ Gloss=student -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 libro libro NOUN _ _ 2 nsubj _ Gloss=book|SpaceAfter=No -5 . . PUNCT _ _ 2 punct _ _ - -# sent_id = schachter-otanes-67-0 -# text = Nagluto ng pagkain ang nanay. -# text_en = Mother cooked some food. -1 Nagluto luto VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=cooked -2 ng ng ADP _ Case=Gen 3 case _ _ -3 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-1 -# text = Nagluto na ng pagkain ang nanay. -# text_en = Mother has cooked some food. -1 Nagluto luto VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=cooked -2 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -3 ng ng ADP _ Case=Gen 4 case _ _ -4 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -5 ang ang ADP _ Case=Nom 6 case _ Gloss=the -6 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-2 -# text = Nagluto na ng pagkain ang nanay noong dumating ako. -# text_en = Mother had cooked some food when I arrived. -1 Nagluto luto VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=cooked -2 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -3 ng ng ADP _ Case=Gen 4 case _ _ -4 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -5 ang ang ADP _ Case=Nom 6 case _ Gloss=the -6 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother -7 noong nang SCONJ _ _ 8 mark _ Gloss=when -8 dumating dating VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 advcl _ Gloss=arrived -9 ako ako PRON _ Case=Nom|Number=Sing|Person=1|PronType=Prs 8 nsubj _ Gloss=I|SpaceAfter=No -10 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-3 -# text = Nagluluto ng pagkain ang nanay araw-araw. -# text_en = Mother is cooking some food everyday. -1 Nagluluto luto VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=cooking -2 ng ng ADP _ Case=Gen 3 case _ _ -3 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother -6 araw araw NOUN _ _ 8 compound:redup _ Gloss=day|SpaceAfter=No -7 - - PUNCT _ _ 8 punct _ SpaceAfter=No -8 araw araw NOUN _ _ 1 obl _ Gloss=day|SpaceAfter=No -9 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-4 -# text = Nagluluto na ng pagkain ang nanay. -# text_en = Mother is cooking some food now. -1 Nagluluto luto VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=cooking -2 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -3 ng ng ADP _ Case=Gen 4 case _ _ -4 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -5 ang ang ADP _ Case=Nom 6 case _ Gloss=the -6 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-5 -# text = Nagluluto ng pagkain ang nanay noong dumating ako. -# text_en = Mother was cooking some food when I arrived. -1 Nagluluto luto VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=cooking -2 ng ng ADP _ Case=Gen 3 case _ _ -3 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother -6 noong nang SCONJ _ _ 7 mark _ Gloss=when -7 dumating dating VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 advcl _ Gloss=arrived -8 ako ako PRON _ Case=Nom|Number=Sing|Person=1|PronType=Prs 7 nsubj _ Gloss=I|SpaceAfter=No -9 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-6 -# text = Magluluto ng pagkain ang nanay bukas. -# text_en = Mother will cook some food tomorrow. -1 Magluluto luto VERB _ Aspect=Prosp|Mood=Ind|Voice=Act 0 root _ Gloss=will-cook -2 ng ng ADP _ Case=Gen 3 case _ _ -3 pagkain pagkain NOUN _ _ 1 obj _ Gloss=food -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 nanay nanay NOUN _ _ 1 nsubj _ Gloss=mother -6 bukas bukas ADV _ _ 1 advmod _ Gloss=tomorrow|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-67-7 -# text = Hindi pa nagluluto ng pagkain ang nanay. -# text_en = Mother has not cooked any food yet. -1 Hindi hindi PART _ Polarity=Neg 3 advmod _ Gloss=not -2 pa pa ADV _ _ 3 advmod _ Gloss=yet -3 nagluluto luto VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=cooking -4 ng ng ADP _ Case=Gen 5 case _ _ -5 pagkain pagkain NOUN _ _ 3 obj _ Gloss=food -6 ang ang ADP _ Case=Nom 7 case _ Gloss=the -7 nanay nanay NOUN _ _ 3 nsubj _ Gloss=mother|SpaceAfter=No -8 . . PUNCT _ _ 3 punct _ _ - -# sent_id = schachter-otanes-69-1 -# text = Bumabasa ng diyaryo ang titser. -# text_en = The teacher is reading a newspaper. -1 Bumabasa basa VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=reading -2 ng ng ADP _ Case=Gen 3 case _ _ -3 diyaryo diyaryo NOUN _ _ 1 obj _ Gloss=newspaper -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 titser titser NOUN _ _ 1 nsubj _ Gloss=teacher|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-69-2 -# text = Binabasa ng titser ang diyaryo. -# text_en = The teacher is reading the newspaper. -1 Binabasa basa VERB _ Aspect=Imp|Mood=Ind|Voice=Pass 0 root _ Gloss=being-read -2 ng ng ADP _ Case=Gen 3 case _ _ -3 titser titser NOUN _ _ 1 obj:agent _ Gloss=teacher -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 diyaryo diyaryo NOUN _ _ 1 nsubj:pass _ Gloss=newspaper|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-69-3 -# text = Naghihilik ang lolo. -# text_en = Grandfather is snoring. -1 Naghihilik hilik VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=snoring -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 lolo lolo NOUN _ _ 1 nsubj _ Gloss=grandfather|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-69-4 -# text = Humihinga pa ang pasyente. -# text_en = The patient is still breathing. -1 Humihinga hinga VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=breathing -2 pa pa ADV _ _ 1 advmod _ Gloss=still -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 pasyente pasyente NOUN _ _ 1 nsubj _ Gloss=patient|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-0 -# text = Nauuhaw ang sanggol. -# text_en = The baby is getting thirsty. -1 Nauuhaw uhaw VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=getting-thirsty -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 sanggol sanggol NOUN _ _ 1 nsubj _ Gloss=baby|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-1 -# text = Tumatanda ang aso. -# text_en = The dog is growing old. -1 Tumatanda magtanda VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=getting-older -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 aso aso NOUN _ _ 1 nsubj _ Gloss=dog|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-2 -# text = Ibinigay ng titser sa istudyante ang premyo. -# text_en = The teacher gave the student the prize. -1 Ibinigay bigay VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=given -2 ng ng ADP _ Case=Gen 3 case _ _ -3 titser titser NOUN _ _ 1 obj:agent _ Gloss=teacher -4 sa sa ADP _ Case=Dat 5 case _ Gloss=to -5 istudyante istudyante NOUN _ _ 1 obl _ Gloss=student -6 ang ang ADP _ Case=Nom 7 case _ Gloss=the -7 premyo premyo NOUN _ _ 1 nsubj:pass _ Gloss=prize|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-3 -# text = Binigyan ng titser ng premyo ang istudyante. -# text_en = The teacher gave the student a prize. -1 Binigyan bigay VERB _ Aspect=Perf|Mood=Ind|Voice=Lfoc 0 root _ Gloss=given -2 ng ng ADP _ Case=Gen 3 case _ _ -3 titser titser NOUN _ _ 1 obj:agent _ Gloss=teacher -4 ng ng ADP _ Case=Gen 3 case _ _ -5 premyo premyo NOUN _ _ 1 iobj _ Gloss=prize -6 ang ang ADP _ Case=Nom 7 case _ Gloss=the -7 istudyante istudyante NOUN _ _ 1 nsubj:lfoc _ Gloss=student|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-4 -# text = Binili ng mangingisda ang bangka. -# text_en = The fisherman bought the boat. -1 Binili bili VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=bought -2 ng ng ADP _ Case=Gen 3 case _ _ -3 mangingisda mangingisda NOUN _ _ 1 obj:agent _ Gloss=fisherman -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 bangka bangka NOUN _ _ 1 nsubj:pass _ Gloss=boat|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-5 -# text = Sinalpok ng alon ang bangka. -# text_en = The wave struck the boat. -1 Sinalpok salpok VERB _ Aspect=Perf|Mood=Ind|Voice=Lfoc 0 root _ Gloss=struck -2 ng ng ADP _ Case=Gen 3 case _ _ -3 alon alon NOUN _ _ 1 obj:agent _ Gloss=wave -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 bangka bangka NOUN _ _ 1 nsubj:lfoc _ Gloss=boat|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-6 -# text = Bumili ng bangka ang mangingisda. -# text_en = The fisherman bought a boat. -1 Bumili bili VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=bought -2 ng ng ADP _ Case=Gen 3 case _ _ -3 bangka bangka NOUN _ _ 1 obj _ Gloss=boat -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 mangingisda mangingisda NOUN _ _ 1 nsubj _ Gloss=fisherman|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-70-7 -# text = Sumalpok sa bangka ang alon. -# text_en = The wave struck the boat. -1 Sumalpok salpok VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=struck -2 sa sa ADP _ Case=Dat 3 case _ Gloss=to -3 bangka bangka NOUN _ _ 1 obl _ Gloss=boat -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the -5 alon alon NOUN _ _ 1 nsubj _ Gloss=wave|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-71-0 -# text = Binalikan niya ang Maynila. -# text_en = He returned to Manila. -1 Binalikan balik VERB _ Aspect=Perf|Mood=Ind|Voice=Lfoc 0 root _ Gloss=returned -2 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 1 obj:agent _ Gloss=he/she -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 Maynila Maynila PROPN _ _ 1 nsubj:lfoc _ Gloss=Manila|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-71-1 -# text = Tinakasan niya ang bilangguan. -# text_en = He escaped from the prison. -1 Tinakasan takas VERB _ Aspect=Perf|Mood=Ind|Voice=Lfoc 0 root _ Gloss=escaped -2 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 1 obj:agent _ Gloss=he/she -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the -4 bilangguan bilangguan NOUN _ _ 1 nsubj:lfoc _ Gloss=prison|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-95-10 -# text = Mansanas ito. -# text_en = This is an apple. -1 Mansanas mansanas NOUN _ _ 0 root _ Gloss=apple -2 ito ito PRON _ Case=Nom|Deixis=Prox|Number=Sing|PronType=Dem 1 det _ Gloss=this|SpaceAfter=No -3 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-97-0 -# text = Biyudo ang maestro. -# text_en = The teacher is a widower. -1 Biyudo biyudo NOUN _ Gender=Masc 0 root _ Gloss=widower -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 maestro maestro NOUN _ Gender=Masc 1 nsubj _ Gloss=teacher|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-97-1 -# text = Biyuda ang maestra. -# text_en = The teacher is a widow. -1 Biyuda biyuda NOUN _ Gender=Fem 0 root _ Gloss=widow -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 maestra maestra NOUN _ Gender=Fem 1 nsubj _ Gloss=teacher|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-197-0 -# text = Komika si Linda. -# text_en = Linda is funny. -1 Komika komika ADJ _ Degree=Pos|Gender=Fem 0 root _ Gloss=funny -2 si si ADP _ Case=Nom 3 case _ Gloss=the -3 Linda Linda PROPN _ Gender=Fem 1 nsubj _ Gloss=Linda|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = devos-71-0 -# text = Nakita kita. -# text_en = I saw you. -1 Nakita kita VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=seen -2 kita ako PRON _ Case=Nom|Clusivity=In|Number=Dual|Person=1|PronType=Prs 1 nsubj:pass _ Gloss=I|SpaceAfter=No -3 . . PUNCT _ _ 1 punct _ _ - -# sent_id = schachter-otanes-73-0 -# text = Kinain ang pagkain. -# text_en = The food was eaten. -1 Kinain kain VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=ate -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the -3 pagkain pagkain NOUN _ _ 1 nsubj _ Gloss=food|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ _ - -# sent_id = shopen-1.8 -# text = Mga guro sila. -# gloss = PL teacher they -# text_en = They are teachers -1 Mga mga DET _ Number=Plur|PronType=Ind 2 det _ Gloss=PLUR -2 guro guro NOUN _ _ 0 root _ Gloss=teacher -3 sila sila PRON _ Case=Nom|Number=Plur|Person=3|PronType=Prs 2 nsubj _ Gloss=they|SpaceAfter=No -4 . . PUNCT _ _ 2 punct _ Gloss=. - -# sent_id = shopen-1.12 -# text = Malapit sa babae ang bata. -# gloss = near OBLIQ woman TOP child -# text_en = The child is near the woman -1 Malapit malapit ADJ _ Degree=Pos 0 root _ Gloss=near -2 sa sa ADP _ Case=Dat 3 case _ Gloss=to -3 babae babae NOUN _ _ 1 obl _ Gloss=woman -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 bata bata NOUN _ _ 1 nsubj _ Gloss=child|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.13 -# text = Malapit kay Maria si Juan. -# gloss = near OBLIQ Maria TOP Juan -# text_en = Juan is near Maria -1 Malapit malapit ADJ _ Degree=Pos 0 root _ Gloss=near -2 kay kay ADP _ Case=Dat 3 case _ Gloss=to -3 Maria Maria PROPN _ Gender=Fem 1 obl _ Gloss=Maria -4 si si ADP _ Case=Nom 5 case _ Gloss=the -5 Juan Juan PROPN _ Gender=Masc 1 nsubj _ Gloss=Juan|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.14 -# text = Malapit sa Maynila ang Pasay City. -# gloss = near OBL Manila TOP Pasay City -# text_en = Pasay City is near Manila -1 Malapit malapit ADJ _ Degree=Pos 0 root _ Gloss=near -2 sa sa ADP _ Case=Dat 3 case _ Gloss=to -3 Maynila Maynila PROPN _ _ 1 obl _ Gloss=Manila -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 Pasay Pasay PROPN _ _ 1 nsubj _ Gloss=Pasay -6 City City PROPN _ _ 5 flat _ Gloss=City|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.16a -# text = Pinanood ko ang mga sumasayaw. -# gloss = watch I TOP PL were.dancing -# text_en = I watched the ones who were dancing -# http://www.seasite.niu.edu/Tagalog/tagalog_verbs.htm -1 Pinanood nood VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=watch -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 1 nsubj _ Gloss=me -3 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -4 mga mga DET _ Number=Plur|PronType=Ind 5 det _ Gloss=PLUR -5 sumasayaw sayaw VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 obj _ Gloss=were-dancing|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.16b -# text = Sumasayaw ang mga tao. -# gloss = were.dancing TOP PL person -# text_en = The people were dancing -1 Sumasayaw sayaw VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=were-dancing -2 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -3 mga mga DET _ Number=Plur|PronType=Ind 4 det _ Gloss=PLUR -4 tao tao NOUN _ _ 1 nsubj _ Gloss=person|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.24 -# text = Nagtatrabaho ang lalaki. -# gloss = is.working TOP man -# text_en = The man is working -1 Nagtatrabaho trabaho VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=working|MGloss=is.working -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 lalaki lalaki NOUN _ _ 1 nsubj _ Gloss=man|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.25 -# text = Lalaki ang nagtatrabaho. -# gloss = man TOP is.working -# text_en = The one who is working is a man -1 Lalaki lalaki NOUN _ _ 0 root _ Gloss=man -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 nagtatrabaho trabaho VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 nsubj _ Gloss=working|MGloss=is.working|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.55a -# text = Hindi ko siya nakita. -# gloss = not I(AG) him(TOP) saw -# text_en = I didn't see him -1 Hindi hindi PART _ Polarity=Neg 4 advmod _ Gloss=not -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 4 nsubj _ Gloss=me -3 siya siya PRON _ Case=Nom|Number=Sing|Person=3|PronType=Prs 4 obj _ Gloss=he -4 nakita kita VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=saw|SpaceAfter=No -5 . . PUNCT _ _ 4 punct _ Gloss=. - -# sent_id = shopen-1.55b -# text = Hindi nakita ni Pedro si Juan. -# gloss = not saw AG Pedro TOP Juan -# text_en = Pedro didn't see Juan -1 Hindi hindi PART _ Polarity=Neg 2 advmod _ Gloss=not -2 nakita kita VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=saw -3 ni ni ADP _ Case=Gen 4 case _ Gloss=DET -4 Pedro Pedro PROPN _ Gender=Masc 2 nsubj _ Gloss=Pedro -5 si si ADP _ Case=Nom 6 case _ Gloss=the -6 Juan Juan PROPN _ Gender=Masc 2 obj _ Gloss=Juan|SpaceAfter=No -7 . . PUNCT _ _ 2 punct _ Gloss=. - -# sent_id = shopen-1.63 -# text = Inahit ni John ang sarili niya. -# gloss = shaved AG John TOP self his -# text_en = John shaved himself -1 Inahit ahit VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=shaved -2 ni ni ADP _ Case=Gen 3 case _ Gloss=DET -3 John John PROPN _ Gender=Masc 1 nsubj _ Gloss=John -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 sarili sarili PRON _ PronType=Prs|Reflex=Yes 1 obj _ Gloss=self -6 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 5 nmod _ Gloss=him|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.64 -# text = Inahit ni John mismo si Bill. -# gloss = shaved AG John EMPH TOP Bill -# text_en = John himself shaved Bill -1 Inahit ahit VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=shaved -2 ni ni ADP _ Case=Gen 3 case _ Gloss=DET -3 John John PROPN _ Gender=Masc 1 nsubj _ Gloss=John -4 mismo mismo DET _ Gender=Masc|PronType=Emp 3 nmod _ Gloss=EMPH -5 si si ADP _ Case=Nom 6 case _ Gloss=the -6 Bill Bill PROPN _ Gender=Masc 1 obj _ Gloss=Bill|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.84 -# text = Sino ang batang pumunta sa tindahan? -# gloss = who TOP child went OBL store -# text_en = Who is the child who went to the store? -1 Sino sino PRON _ Case=Nom|PronType=Int 0 root _ Gloss=who -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 batang bata NOUN _ Link=Yes 1 nsubj _ Gloss=child|MSeg=bata-ng|MGloss=child-LINK -4 pumunta punta VERB _ Aspect=Perf|Mood=Ind|Voice=Act 3 acl:relcl _ Gloss=went -5 sa sa ADP _ Case=Dat 6 case _ Gloss=to -6 tindahan tindahan NOUN _ _ 4 obl _ Gloss=store|SpaceAfter=No -7 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.89a -# text = Umuulan ba? -# gloss = is.raining Q -# text_en = Is it raining? -1 Umuulan ulan VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=is-raining -2 ba ba PART _ PartType=Int 1 advmod _ Gloss=QUESTION|SpaceAfter=No -3 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.89b -# text = Oo. -# gloss = yes -# text_en = Yes -1 Oo oo INTJ _ Polarity=Pos 0 root _ Gloss=yes|SpaceAfter=No -2 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.89c -# text = Hindi. -# gloss = no -# text_en = No -1 Hindi hindi INTJ _ Polarity=Pos 0 root _ Gloss=no|SpaceAfter=No -2 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.90a -# text = Mayroon bang pagkain? -# gloss = EXIST Q-LINK food -# text_en = Is there any food? -1 Mayroon mayroon VERB _ Polarity=Pos 0 root _ Gloss=exists -2 bang ba PART _ Link=Yes|PartType=Int 1 advmod _ Gloss=QUESTION|MSeg=ba-ng|MGloss=QUESTION-LINK -3 pagkain pagkain NOUN _ _ 1 nsubj _ Gloss=food|SpaceAfter=No -4 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.90b -# text = Mayroon. -# gloss = EXIST -# text_en = Yes (answer to existential questions) -1 Mayroon mayroon VERB _ Polarity=Pos 0 root _ Gloss=exists|SpaceAfter=No -2 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.90c -# text = Wala. -# gloss = NEG-EXIST -# text_en = No (answer to existential questions) -1 Wala wala VERB _ Polarity=Neg 0 root _ Gloss=does-not-exist|SpaceAfter=No -2 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.92 -# text = Umuulan, ano? -# gloss = is.raining CONFIRMATION TAG -# text_en = It's raining, isn't it? -1 Umuulan ulan VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=is-raining|SpaceAfter=No -2 , , PUNCT _ _ 3 punct _ Gloss=, -3 ano ano PART _ PartType=Int 1 advmod _ Gloss=CONFIRMATION-TAG|SpaceAfter=No -4 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.98a -# text = Napakaano nila? -# gloss = very.what they -# text_en = What are they very much like? -# "Napaka-" is an intensifying prefix meaning "very". For example, "napakabuti" means "so very good". -# We consider "napakaano" an adjective derived from the interrogative pronoun "ano" (what). -1 Napakaano ano ADJ _ Degree=Pos|PronType=Int 0 root _ Gloss=very-much-like|MSeg=napaka-ano|MGloss=very-what -2 nila sila PRON _ Case=Gen|Number=Plur|Person=3|PronType=Prs 1 nsubj _ Gloss=them|SpaceAfter=No -3 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.98b -# text = Napakataas nila. -# gloss = very.tall they -# text_en = They are very tall -1 Napakataas taas ADJ _ Degree=Pos 0 root _ Gloss=very-tall|MSeg=napaka-taas|MGloss=very-tall -2 nila sila PRON _ Case=Gen|Number=Plur|Person=3|PronType=Prs 1 nsubj _ Gloss=them|SpaceAfter=No -3 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.98c -# text = Nagano ka? -# gloss = (PERF.ACTIVE)what you -# text_en = What did you do? -# We consider "ano" a verb, resulting from a conversion from the pronoun "ano" (what), acquiring verbal morphology. -1 Nagano ano VERB _ Aspect=Perf|Mood=Ind|PronType=Int|Voice=Act 0 root _ Gloss=what-did|MSeg=nag-ano|MGloss=PERF+ACTIVE-what -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you|SpaceAfter=No -3 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.98d -# text = Nagsalita ka. -# gloss = (PERF.ACTIVE)speak you -# text_en = You spoke -1 Nagsalita salita VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=spoke|MSeg=nag-salita|MGloss=PERF+ACTIVE-speak -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you|SpaceAfter=No -3 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.98e -# text = Naano ka? -# gloss = (PERF.INVOL)what you -# text_en = What happened to you? -# We consider "ano" a verb, resulting from a conversion from the pronoun "ano" (what), acquiring verbal morphology. -1 Naano ano VERB _ Aspect=Perf|Mood=Ind|PronType=Int|Voice=Pass 0 root _ Gloss=what-happened|MSeg=na-ano|MGloss=PERF+INVOL-what -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you|SpaceAfter=No -3 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.98f -# text = Natalisod ka. -# gloss = (PERF.INVOL)trip you -# text_en = You tripped -1 Natalisod tisod VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=tripped|MSeg=na-talisod|MGloss=PERF+INVOL-trip -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you|SpaceAfter=No -3 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.100 -# text = Ipinansulat ni John ng liham kay Mary ang makinilya. -# gloss = wrote.with AG John OBJ letter IO Mary TOP typewriter -# text_en = John wrote Mary a letter on the typewriter -1 Ipinansulat sulat VERB _ Aspect=Perf|Mood=Ind|Voice=Ifoc 0 root _ Gloss=wrote-with -2 ni ni ADP _ Case=Gen 3 case _ Gloss=DET -3 John John PROPN _ Gender=Masc 1 obj:agent _ Gloss=John -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 liham liham NOUN _ _ 1 obj _ Gloss=letter -6 kay kay ADP _ Case=Dat 7 case _ Gloss=to -7 Mary Mary PROPN _ Gender=Fem 1 obl _ Gloss=Mary -8 ang ang ADP _ Case=Nom 9 case _ Gloss=the|MGloss=PIV -9 makinilya makinilya NOUN _ _ 1 nsubj:ifoc _ Gloss=typewriter|SpaceAfter=No -10 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.109 -# text = Nasaan ang mga pinggan? -# gloss = where TOP PL dish -# text_en = Where are the dishes? -1 Nasaan nasaan ADV _ PronType=Int 0 root _ Gloss=where -2 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -3 mga mga DET _ Number=Plur|PronType=Ind 4 det _ Gloss=PLUR -4 pinggan pinggan NOUN _ _ 1 nsubj _ Gloss=dish|SpaceAfter=No -5 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.141 -# text = Itinanong ko kung nasaan sila. -# gloss = asked I COMP where they -# text_en = I asked where they were -1 Itinanong tanong VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=asked -2 ko ako PRON _ Case=Gen|Number=Sing|Person=1|PronType=Prs 1 nsubj _ Gloss=me -3 kung kung SCONJ _ _ 4 mark _ Gloss=that -4 nasaan nasaan ADV _ PronType=Int 1 ccomp _ Gloss=where -5 sila sila PRON _ Case=Nom|Number=Plur|Person=3|PronType=Prs 4 nsubj _ Gloss=they|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.164a -# text = Darating daw si Pedro bukas. -# gloss = will.arrive they.say TOP Pedro tomorrow -# text_en = They say Pedro will arrive tomorrow -1 Darating dating VERB _ Aspect=Prog|Mood=Ind|Voice=Act 0 root _ Gloss=will-arrive -2 daw daw PART _ PartType=Nfh 1 advmod _ Gloss=they-say -3 si si ADP _ Case=Nom 4 case _ Gloss=the -4 Pedro Pedro PROPN _ Gender=Masc 1 nsubj _ Gloss=Pedro -5 bukas bukas ADV _ _ 1 advmod _ Gloss=tomorrow|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.164b -# text = Hindi daw darating si Pedro bukas. -# gloss = NEG they.say will.arrive TOP Pedro tomorrow -# text_en = They say Pedro won't arrive tomorrow -1 Hindi hindi PART _ Polarity=Neg 3 advmod _ Gloss=not -2 daw daw PART _ PartType=Nfh 3 advmod _ Gloss=they-say -3 darating dating VERB _ Aspect=Prog|Mood=Ind|Voice=Act 0 root _ Gloss=will-arrive -4 si si ADP _ Case=Nom 5 case _ Gloss=the -5 Pedro Pedro PROPN _ Gender=Masc 3 nsubj _ Gloss=Pedro -6 bukas bukas ADV _ _ 3 advmod _ Gloss=tomorrow|SpaceAfter=No -7 . . PUNCT _ _ 3 punct _ Gloss=. - -# sent_id = shopen-1.164c -# text = Bakit daw hindi darating si Pedro bukas? -# gloss = why they.say NEG will.arrive TOP Pedro tomorrow -# text_en = Why do they say Pedro won't arrive tomorrow? -1 Bakit bakit ADV _ PronType=Int 4 advmod _ Gloss=why -2 daw daw PART _ PartType=Nfh 4 advmod _ Gloss=they-say -3 hindi hindi PART _ Polarity=Neg 4 advmod _ Gloss=not -4 darating dating VERB _ Aspect=Prog|Mood=Ind|Voice=Act 0 root _ Gloss=will-arrive -5 si si ADP _ Case=Nom 6 case _ Gloss=the -6 Pedro Pedro PROPN _ Gender=Masc 4 nsubj _ Gloss=Pedro -7 bukas bukas ADV _ _ 4 advmod _ Gloss=tomorrow|SpaceAfter=No -8 ? ? PUNCT _ _ 4 punct _ Gloss=? - -# sent_id = shopen-1.167a -# text = Nagtatrabaho ka na ba daw roon? -# gloss = are.working you now Q they.say there -# text_en = Do they say you are working there now? -1 Nagtatrabaho trabaho VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=working|MGloss=are.working -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you -3 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -4 ba ba PART _ PartType=Int 1 advmod _ Gloss=QUESTION -5 daw daw PART _ PartType=Nfh 1 advmod _ Gloss=they-say -6 roon roon ADV _ Deixis=Remt|PronType=Dem 1 advmod _ Gloss=there|SpaceAfter=No -7 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.167b -# text = Nagtatrabaho ka na daw ba roon? -# gloss = are.working you now they.say Q there -# text_en = Do they say you are working there now? -1 Nagtatrabaho trabaho VERB _ Aspect=Imp|Mood=Ind|Voice=Act 0 root _ Gloss=working|MGloss=are.working -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you -3 na na ADV _ PronType=Dem 1 advmod _ Gloss=now -4 daw daw PART _ PartType=Nfh 1 advmod _ Gloss=they-say -5 ba ba PART _ PartType=Int 1 advmod _ Gloss=QUESTION -6 roon roon ADV _ Deixis=Remt|PronType=Dem 1 advmod _ Gloss=there|SpaceAfter=No -7 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.168 -# text = Hindi pa man lamang tuloy siya nakakapagalmusal. -# gloss = NEG yet even just as.a.result he can.have.breakfast -# text_en = As a result, he hasn't even been able to have breakfast yet -1 Hindi hindi PART _ Polarity=Neg 7 advmod _ Gloss=not -2 pa pa ADV _ _ 7 advmod _ Gloss=yet -3 man man ADV _ _ 7 advmod _ Gloss=even -4 lamang lamang ADV _ _ 7 advmod _ Gloss=just -5 tuloy tuloy ADV _ _ 7 advmod _ Gloss=as-a-result -6 siya siya PRON _ Case=Nom|Number=Sing|Person=3|PronType=Prs 7 nsubj _ Gloss=he -7 nakakapagalmusal almusal VERB _ Aspect=Imp|Mood=Pot|Voice=Act 0 root _ Gloss=can-have-breakfast|SpaceAfter=No -8 . . PUNCT _ _ 7 punct _ Gloss=. - -# sent_id = shopen-1.183a -# text = Mayroong libro sa mesa. -# gloss = EXIST/POSS-LINK book on table -# text_en = There is a book on the table -1 Mayroong mayroon VERB _ Link=Yes|Polarity=Pos 0 root _ Gloss=there-is|MSeg=mayroon-g|MGloss=exists-POSSESSIVE -2 libro libro NOUN _ _ 1 obj _ Gloss=book -3 sa sa ADP _ Case=Dat 4 case _ Gloss=to -4 mesa mesa NOUN _ _ 1 obl _ Gloss=table|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.183b -# text = Walang libro sa mesa. -# gloss = EXIST/POSS(NEG)-LINK book on table -# text_en = There isn't a book on the table -1 Walang wala VERB _ Link=Yes|Polarity=Neg 0 root _ Gloss=there-is-not|MSeg=wala-ng|MGloss=does+not+exist-POSSESSIVE -2 libro libro NOUN _ _ 1 obj _ Gloss=book -3 sa sa ADP _ Case=Dat 4 case _ Gloss=to -4 mesa mesa NOUN _ _ 1 obl _ Gloss=table|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.183c -# text = Mayroong libro ang bata. -# gloss = EXIST/POSS-LINK book TOP child -# text_en = The child has a book -1 Mayroong mayroon VERB _ Link=Yes|Polarity=Pos 0 root _ Gloss=there-is|MSeg=mayroon-g|MGloss=exists-POSSESSIVE -2 libro libro NOUN _ _ 1 obj _ Gloss=book -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 bata bata NOUN _ _ 1 nsubj _ Gloss=child|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.183d -# text = Walang libro ang bata. -# gloss = EXIST/POSS(NEG)-LINK book TOP child -# text_en = The child doesn't have a book -1 Walang wala VERB _ Link=Yes|Polarity=Neg 0 root _ Gloss=there-is-not|MSeg=wala-ng|MGloss=does+not+exist-POSSESSIVE -2 libro libro NOUN _ _ 1 obj _ Gloss=book -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 bata bata NOUN _ _ 1 nsubj _ Gloss=child|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.185a -# text = Mabuti ba ang ani? -# The actual example in Shopen (2007) is "Mabuti a ang ani?". According to Ann Tan, "a" is a typo and it should be "ba". -# gloss = good Q TOP harvest -# text_en = Is the harvest good? -1 Mabuti mabuti ADJ _ Degree=Pos 0 root _ Gloss=good -2 ba ba PART _ PartType=Int 1 advmod _ Gloss=QUESTION -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 ani ani NOUN _ _ 1 nsubj _ Gloss=harvest|SpaceAfter=No -5 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.185b -# text = Mabuti kaya ang ani? -# gloss = good Q.SPECULATIVE TOP harvest -# text_en = Do you suppose the harvest will be good? -1 Mabuti mabuti ADJ _ Degree=Pos 0 root _ Gloss=good -2 kaya kaya PART _ PartType=Int 1 advmod _ Gloss=QUESTION-SPECULATIVE -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 ani ani NOUN _ _ 1 nsubj _ Gloss=harvest|SpaceAfter=No -5 ? ? PUNCT _ _ 1 punct _ Gloss=? - -# sent_id = shopen-1.185c -# text = Mabuti sana ang ani. -# gloss = good WISH TOP harvest -# text_en = I hope the harvest is good -1 Mabuti mabuti ADJ _ Degree=Pos 0 root _ Gloss=good -2 sana sana PART _ PartType=Des 1 advmod _ Gloss=WISH -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 ani ani NOUN _ _ 1 nsubj _ Gloss=harvest|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-1.186a -# text = Huwag kayong umalis. -# gloss = NEG you-LINK leave -# text_en = Don't leave -1 Huwag huwag AUX _ Mood=Imp|Polarity=Neg 3 aux _ Gloss=do-not -2 kayong kayo PRON _ Case=Nom|Link=Yes|Number=Plur|Person=2|PronType=Prs 3 nsubj _ Gloss=you|MSeg=kayo-ng|MGloss=you-LINK -3 umalis alis VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=leave|SpaceAfter=No -4 . . PUNCT _ _ 3 punct _ Gloss=. - -# sent_id = shopen-1.186b -# text = Huwag siyang pumarito. -# gloss = NEG he-LINK come.here -# text_en = He shouldn't come here -1 Huwag huwag AUX _ Mood=Imp|Polarity=Neg 3 aux _ Gloss=do-not -2 siyang siya PRON _ Case=Nom|Link=Yes|Number=Sing|Person=3|PronType=Prs 3 nsubj _ Gloss=he|MSeg=siya-ng|MGloss=he-LINK -3 pumarito parito VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=come-here|SpaceAfter=No -4 . . PUNCT _ _ 3 punct _ Gloss=. - -# sent_id = shopen-3.111a -# text = Magaalis ang babae ng bigas sa sako para sa bata. -# gloss = AP-FUT-take.out PIV woman OBJ rice DIR sack for BEN child -# text_en = The woman will take some rice out of a/the sack for a/the child -# AP = actor pivot; PIV = pivot marker -# http://www.seasite.niu.edu/Tagalog/tagalog_verbs.htm -1 Magaalis alis VERB _ Aspect=Prog|Mood=Ind|Voice=Act 0 root _ Gloss=will-take-out|MSeg=mag-a-alis|MGloss=AP-FUT-take.out -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 babae babae NOUN _ _ 1 nsubj _ Gloss=woman -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 bigas bigas NOUN _ _ 1 obj _ Gloss=rice -6 sa sa ADP _ Case=Dat 7 case _ Gloss=to -7 sako sako NOUN _ _ 1 obl _ Gloss=sack -8 para para ADP _ _ 10 case _ Gloss=for -9 sa sa ADP _ Case=Dat 10 case _ Gloss=to -10 bata bata NOUN _ _ 1 obl _ Gloss=child|SpaceAfter=No -11 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.111b -# text = Aalisin ng babae ang bigas sa sako para sa bata. -# gloss = FUT-take.out-OP ACT woman PIV rice DIR sack for BEN child -# text_en = A/the woman will take the rice out of a/the sack for a/the child -# OP = object pivot; PIV = pivot marker -1 Aalisin alis VERB _ Aspect=Prog|Mood=Ind|Voice=Pass 0 root _ Gloss=will-take-out|MSeg=a-alis-in|MGloss=FUT-take.out-OP -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET -3 babae babae NOUN _ _ 1 obj:agent _ Gloss=woman -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 bigas bigas NOUN _ _ 1 nsubj:pass _ Gloss=rice -6 sa sa ADP _ Case=Dat 7 case _ Gloss=to -7 sako sako NOUN _ _ 1 obl _ Gloss=sack -8 para para ADP _ _ 10 case _ Gloss=for -9 sa sa ADP _ Case=Dat 10 case _ Gloss=to -10 bata bata NOUN _ _ 1 obl _ Gloss=child|SpaceAfter=No -11 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.111c -# text = Aalisan ng babae ng bigas ang sako para sa bata. -# gloss = FUT-take.out-DP ACT woman OBJ rice PIV sack BEN child -# text_en = A/the woman will take some rice out of the sack for a/the child -# DP = directional pivot; PIV = pivot marker -1 Aalisan alis VERB _ Aspect=Prog|Mood=Ind|Voice=Lfoc 0 root _ Gloss=will-take-out|MSeg=a-alis-an|MGloss=FUT-take.out-DP -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET -3 babae babae NOUN _ _ 1 obj:agent _ Gloss=woman -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 bigas bigas NOUN _ _ 1 iobj:patient _ Gloss=rice -6 ang ang ADP _ Case=Nom 7 case _ Gloss=the|MGloss=PIV -7 sako sako NOUN _ _ 1 nsubj:lfoc _ Gloss=sack -8 para para ADP _ _ 10 case _ Gloss=for -9 sa sa ADP _ Case=Dat 10 case _ Gloss=to -10 bata bata NOUN _ _ 1 obl _ Gloss=child|SpaceAfter=No -11 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.111d -# text = Ipagaalis ng babae ng bigas sa sako ang bata. -# gloss = BP-FUT-take.out ACT woman OBJ rice DIR sack PIV child -# text_en = A/the woman will take some rice out of a/the sack for the child -# BP = benefactive pivot; PIV = pivot marker -1 Ipagaalis alis VERB _ Aspect=Prog|Mood=Ind|Voice=Bfoc 0 root _ Gloss=will-take-out|MSeg=Ipag-a-alis|MGloss=BP-FUT-take.out -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET -3 babae babae NOUN _ _ 1 obj:agent _ Gloss=woman -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 bigas bigas NOUN _ _ 1 iobj:patient _ Gloss=rice -6 sa sa ADP _ Case=Dat 7 case _ Gloss=to -7 sako sako NOUN _ _ 1 obl _ Gloss=sack -8 ang ang ADP _ Case=Nom 9 case _ Gloss=the|MGloss=PIV -9 bata bata NOUN _ _ 1 nsubj:bfoc _ Gloss=child|SpaceAfter=No -10 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.112a -# text = Matalino ang lalaking bumasa ng diyaryo. -# gloss = intelligent PIV man-LINK [AP]-read OBJ newspaper -# text_en = The man who read a newspaper is intelligent -1 Matalino matalino ADJ _ Degree=Pos 0 root _ Gloss=intelligent -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 lalaking lalaki NOUN _ Link=Yes 1 nsubj _ Gloss=man|MSeg=lalaki-ng|man-LINK -4 bumasa basa VERB _ Aspect=Perf|Mood=Ind|Voice=Act 3 acl:relcl _ Gloss=read|MSeg=b[um]asa|MGloss=[AP]-read -5 ng ng ADP _ Case=Gen 6 case _ Gloss=DET -6 diyaryo diyaryo NOUN _ _ 4 obj _ Gloss=newspaper|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.112b -# text = Interesante ang diyaryong binasa ng lalaki. -# gloss = interesting PIV newspaper-LINK [PERF]-read-OP ACT man -# text_en = The newspaper that the man read is interesting -1 Interesante interesante ADJ _ Degree=Pos 0 root _ Gloss=interesting -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 diyaryong diyaryo NOUN _ Link=Yes 1 nsubj _ Gloss=newspaper|MSeg=diyaryo-ng|MGloss=newspaper-LINK -4 binasa basa VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 3 acl:relcl _ Gloss=be-read|MSeg=b[in]asa-0|MGloss=[PERF]-read-OP -5 ng ng ADP _ Case=Gen 6 case _ Gloss=DET -6 lalaki lalaki NOUN _ _ 4 obj _ Gloss=man|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.114a -# text = Susulat lahat ang mga bata ng mga liham. -# source = Schachter and Otanes, 1972:147-8 -# gloss = AP-FUT-write all PIV PL child OBJ PL letter -# text_en = All the children will write letters -1 Susulat sulat VERB _ Aspect=Prog|Mood=Ind|Voice=Act 0 root _ Gloss=will-write|MSeg=0-su-sulat|MGloss=AP-FUT-write -2 lahat lahat DET _ PronType=Tot 5 det _ Gloss=all -3 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -4 mga mga DET _ Number=Plur|PronType=Ind 5 det _ Gloss=PLUR -5 bata bata NOUN _ _ 1 nsubj _ Gloss=child -6 ng ng ADP _ Case=Gen 8 case _ Gloss=DET -7 mga mga DET _ Number=Plur|PronType=Ind 8 det _ Gloss=PLUR -8 liham liham NOUN _ _ 1 obj _ Gloss=letter|SpaceAfter=No -9 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.114b -# text = Susulatin lahat ng mga bata ang mga liham. -# source = Schachter and Otanes, 1972:147-8 -# gloss = FUT-write-OP all ACT PL child PIV PL letter -# text_en = The/some children will write all the letters -1 Susulatin sulat VERB _ Aspect=Prog|Mood=Ind|Voice=Pass 0 root _ Gloss=will-be-written|MSeg=su-sulat-in|MGloss=FUT-write-OP -2 lahat lahat DET _ PronType=Tot 8 det _ Gloss=all -3 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -4 mga mga DET _ Number=Plur|PronType=Ind 5 det _ Gloss=PLUR -5 bata bata NOUN _ _ 1 obj:agent _ Gloss=child -6 ang ang ADP _ Case=Nom 8 case _ Gloss=the|MGloss=PIV -7 mga mga DET _ Number=Plur|PronType=Ind 8 det _ Gloss=PLUR -8 liham liham NOUN _ _ 1 nsubj:pass _ Gloss=letter|SpaceAfter=No -9 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.115a -# text = Nagalala ang lolo sa kaniyang sarili. -# source = Schachter, 1977:292 -# gloss = AP-worry PIV grandfather DIR his-LINK self -# text_en = Grandfather worried about himself -1 Nagalala alala VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=worried|MSeg=nag-alala|MGloss=AP-worry -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 lolo lolo NOUN _ _ 1 nsubj _ Gloss=grandfather -4 sa sa ADP _ Case=Dat 6 case _ Gloss=to -5 kaniyang siya PRON _ Case=Dat|Link=Yes|Number=Sing|Person=3|PronType=Prs 6 nmod _ Gloss=him|MSeg=kaniya-ng|MGloss=his-LINK -6 sarili sarili PRON _ PronType=Prs|Reflex=Yes 1 obl _ Gloss=self|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.115b -# text = Inalala ng lolo ang kaniyang sarili. -# source = Schachter, 1977:292 -# gloss = PERF-worry-OP ACT grandfather PIV his-LINK self -# text_en = Grandfather worried about himself -1 Inalala alala VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=worried|MSeg=in-alala-0|MGloss=PERF-worry-OP -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET -3 lolo lolo NOUN _ _ 1 obj:agent _ Gloss=grandfather -4 ang ang ADP _ Case=Nom 6 case _ Gloss=the|MGloss=PIV -5 kaniyang siya PRON _ Case=Dat|Link=Yes|Number=Sing|Person=3|PronType=Prs 6 nmod _ Gloss=him|MSeg=kaniya-ng|MGloss=his-LINK -6 sarili sarili PRON _ PronType=Prs|Reflex=Yes 1 nsubj _ Gloss=self|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.117a -# text = Iniabot niya sa bata ang kaniyang sariling larawan. -# gloss = PERF-OP-hand he(ACT) DIR child PIV his-LINK self-LINK picture -# text_en = He[i] handed the child[j] a picture of himself[i,j] -1 Iniabot abot VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=handed|MSeg=in-i-abot|MGloss=PERF-OP-hand -2 niya siya PRON _ Case=Gen|Number=Sing|Person=3|PronType=Prs 1 obj:agent _ Gloss=him|MGloss=he(ACT) -3 sa sa ADP _ Case=Dat 4 case _ Gloss=to -4 bata bata NOUN _ _ 1 obl _ Gloss=child -5 ang ang ADP _ Case=Nom 8 case _ Gloss=the|MGloss=PIV -6 kaniyang siya PRON _ Case=Dat|Link=Yes|Number=Sing|Person=3|PronType=Prs 7 nmod _ Gloss=him|MSeg=kaniya-ng|MGloss=his-LINK -7 sariling sarili PRON _ Link=Yes|PronType=Prs|Reflex=Yes 8 nmod _ Gloss=self|MSeg=sarili-ng|MGloss=self-LINK -8 larawan larawan NOUN _ _ 1 nsubj:pass _ Gloss=picture|SpaceAfter=No -9 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.117b -# text = Tumanggap ang Rosa ng sulat para sa bata sa kaniyang sarili. -# gloss = [AP]-receive PIV Rosa OBJ letter BEN BEN child DIR her-LINK self -# text_en = Rosa[i] received a letter for the child[j] from herself[i]/him-herself[j] -1 Tumanggap tanggap VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=received|MSeg=t[um]anggap|MGloss=[AP]-receive -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 Rosa Rosa PROPN _ Gender=Fem 1 nsubj _ Gloss=Rosa -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 sulat sulat NOUN _ _ 1 obj _ Gloss=letter -6 para para ADP _ _ 8 case _ Gloss=for|MGloss=BEN -7 sa sa ADP _ Case=Dat 8 case _ Gloss=to -8 bata bata NOUN _ _ 1 obl _ Gloss=child -9 sa sa ADP _ Case=Dat 11 case _ Gloss=from -10 kaniyang siya PRON _ Case=Dat|Link=Yes|Number=Sing|Person=3|PronType=Prs 11 nmod _ Gloss=him|MSeg=kaniya-ng|MGloss=his-LINK -11 sarili sarili PRON _ PronType=Prs|Reflex=Yes 1 obl _ Gloss=self|SpaceAfter=No -12 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.118a -# text = Magbigay ka sa kaniya ng kape. -# source = Schachter, 1977 -# gloss = AP-give you(PIV) DIR him OBJ coffee -# text_en = Give him some coffee! -1 Magbigay bigay VERB _ Mood=Imp|Voice=Act 0 root _ Gloss=give|MSeg=mag-bigay|MGloss=AP-give -2 ka ikaw PRON _ Case=Nom|Number=Sing|Person=2|PronType=Prs 1 nsubj _ Gloss=you|MGloss=you(PIV) -3 sa sa ADP _ Case=Dat 4 case _ Gloss=to|MGloss=DIR -4 kaniya siya PRON _ Case=Dat|Number=Sing|Person=3|PronType=Prs 1 obl _ Gloss=him -5 ng ng ADP _ Case=Gen 6 case _ Gloss=DET -6 kape kape NOUN _ _ 1 obj _ Gloss=coffee|SpaceAfter=No -7 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.118b -# text = Bigyan mo siya ng kape. -# source = Schachter, 1977 -# gloss = give-DP you(ACT) him(PIV) OBJ coffee -# text_en = Give him some coffee! -1 Bigyan bigay VERB _ Mood=Imp|Voice=Lfoc 0 root _ Gloss=give|MSeg=bigy-an|MGloss=give-DP -2 mo ikaw PRON _ Case=Gen|Number=Sing|Person=2|PronType=Prs 1 obj:agent _ Gloss=you|MGloss=you(ACT) -3 siya siya PRON _ Case=Nom|Number=Sing|Person=3|PronType=Prs 1 nsubj:lfoc _ Gloss=he|MGloss=him(PIV) -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET -5 kape kape NOUN _ _ 1 iobj:patient _ Gloss=coffee|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.119a -# text = Walisan natin ang sahig. -# source = Schachter and Otanes, 1972:407-9 -# gloss = Sweep-OP us(DU.ACT) PIV floor -# text_en = Let's sweep the floor -1 Walisan walis VERB _ Mood=Imp|Voice=Pass 0 root _ Gloss=sweep|MSeg=walis-an|MGloss=sweep-OP -2 natin ako PRON _ Case=Gen|Clusivity=In|Number=Plur|Person=1|PronType=Prs 1 obj:agent _ Gloss=us|MGloss=us(DU.ACT) -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 sahig sahig NOUN _ _ 1 nsubj:pass _ Gloss=floor|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.119b -# text = Walisan nila ang sahig. -# source = Schachter and Otanes, 1972:407-9 -# gloss = Sweep-OP they(ACT) PIV floor -# text_en = I want them to sweep the floor -1 Walisan walis VERB _ Mood=Imp|Voice=Pass 0 root _ Gloss=sweep|MSeg=walis-an|MGloss=sweep-OP -2 nila sila PRON _ Case=Gen|Number=Plur|Person=3|PronType=Prs 1 obj:agent _ Gloss=them|MGloss=they(ACT) -3 ang ang ADP _ Case=Nom 4 case _ Gloss=the|MGloss=PIV -4 sahig sahig NOUN _ _ 1 nsubj:pass _ Gloss=floor|SpaceAfter=No -5 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.120a -# text = Nagatubili siyang humiram ng pera sa banko. -# source = Schachter, 1972 -# gloss = AP-hesitate he(PIV)-LINK [AP]-borrow OBJ money DIR bank -# text_en = He hesitated to borrow money from a/the bank -1 Nagatubili atubili VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=hesitated|MSeg=nag-atubili|MGloss=AP-hesitate -2 siyang siya PRON _ Case=Nom|Link=Yes|Number=Sing|Person=3|PronType=Prs 1 nsubj _ Gloss=he|MSeg=siya-ng|MGloss=he(PIV)-LINK -3 humiram hiram VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 xcomp _ Gloss=borrow|MSeg=h[um]iram|MGloss=[AP]-borrow -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET|MGloss=OBJ -5 pera pera NOUN _ _ 3 obj _ Gloss=money -6 sa sa ADP _ Case=Dat 7 case _ Gloss=to|MGloss=DIR -7 banko banko NOUN _ _ 3 obl _ Gloss=bank|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.120b -# text = Nagatubili siyang hiramin ang pera sa banko. -# source = Schachter, 1972 -# gloss = AP-hesitate he(PIV)-LINK borrow-OP PIV money DIR bank -# text_en = He hesitated to borrow the money from the bank -1 Nagatubili atubili VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=hesitated|MSeg=nag-atubili|MGloss=AP-hesitate -2 siyang siya PRON _ Case=Nom|Link=Yes|Number=Sing|Person=3|PronType=Prs 1 nsubj _ Gloss=he|MSeg=siya-ng|MGloss=he(PIV)-LINK -3 hiramin hiram VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 1 xcomp _ Gloss=borrow|MSeg=hiram-in|MGloss=borrow-OP -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 pera pera NOUN _ _ 3 nsubj:pass _ Gloss=money -6 sa sa ADP _ Case=Dat 7 case _ Gloss=to|MGloss=DIR -7 banko banko NOUN _ _ 3 obl _ Gloss=bank|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.121a -# text = Gusto ni Juan suriin siya ng doktor. -# source = Schachter, 1977:295 -# gloss = want ACT John(LINK) examine-OP he(PIV) ACT doctor -# text_en = John wants the doctor to examine him -# The verb gusto seems to have only the patient focus voice. But here it appears in its base form, without aspectual inflection. -# https://www.tagalog.com/words/gusto.php suggests that "gusto" does not inflect. -# The patient focus means that the agent (which is the only argument) must appear in the genitive case. -1 Gusto gusto VERB _ Aspect=Hab|Voice=Pass 0 root _ Gloss=want -2 ni ni ADP _ Case=Gen 3 case _ Gloss=DET -3 Juan Juan PROPN _ Gender=Masc|Link=Yes 1 obj:agent _ Gloss=Juan|MGloss=John(LINK) -4 suriin suri VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 1 ccomp _ Gloss=examine|MSeg=suri-in|MGloss=examine-OP -5 siya siya PRON _ Case=Nom|Number=Sing|Person=3|PronType=Prs 4 nsubj:pass _ Gloss=he|MGloss=he(PIV) -6 ng ng ADP _ Case=Gen 7 case _ Gloss=DET|MGloss=ACT -7 doktor doktor NOUN _ _ 4 obj:agent _ Gloss=doctor|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.122a -# text = Masagwa ang tumatanda. -# gloss = disagreeable PIV [AP]-IMPERF-become-old -# text_en = It is disagreeable to become old -1 Masagwa masagwa ADJ _ Degree=Pos 0 root _ Gloss=disagreeable -2 ang ang ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 tumatanda tatanda VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 csubj _ Gloss=to-become-old|MGloss=[AP]-IMPERF-become-old|MSeg=t[um]a-tanda|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.122b -# text = Gusto niyang gumanda. -# gloss = want he/she(ACT)-LINK [AP]-beautiful -# text_en = She wants to become beautiful -1 Gusto gusto VERB _ Aspect=Hab|Voice=Pass 0 root _ Gloss=want -2 niyang siya PRON _ Case=Gen|Link=Yes|Number=Sing|Person=3|PronType=Prs 1 obj:agent _ Gloss=him|MSeg=niya-ng|MGloss=he/she(ACT)-LINK -3 gumanda ganda VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 xcomp _ Gloss=become-beautiful|MGloss=[AP]-beautiful|MSeg=g[um]anda|SpaceAfter=No -4 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.122c -# text = Gusto kong tumanggap ng gantimpala. -# gloss = want I(ACT)-LINK [AP]-receive OBJ prize -# text_en = I want to be the recipient of the prize -1 Gusto gusto VERB _ Aspect=Hab|Voice=Pass 0 root _ Gloss=want -2 kong ako PRON _ Case=Gen|Link=Yes|Number=Sing|Person=1|PronType=Prs 1 obj:agent _ Gloss=me|MSeg=ko-ng|MGloss=I(ACT)-LINK -3 tumanggap tanggap VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 xcomp _ Gloss=received|MSeg=t[um]anggap|MGloss=[AP]-receive -4 ng ng ADP _ Case=Gen 5 case _ Gloss=DET|MGloss=OBJ -5 gantimpala gantimpala NOUN _ _ 3 obj _ Gloss=prize|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.122d -# text = Gusto kong matanggap ang gantimpala. -# gloss = want I(ACT)-LINK OP-receive PIV prize -# text_en = I want to receive the prize -1 Gusto gusto VERB _ Aspect=Hab|Voice=Pass 0 root _ Gloss=want -2 kong ako PRON _ Case=Gen|Link=Yes|Number=Sing|Person=1|PronType=Prs 1 obj:agent _ Gloss=me|MSeg=ko-ng|MGloss=I(ACT)-LINK -3 matanggap tanggap VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 1 xcomp _ Gloss=received|MSeg=ma-tanggap|MGloss=OP-receive -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 gantimpala gantimpala NOUN _ _ 3 nsubj:pass _ Gloss=prize|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.123 -# text = Ayaw kong mamatay sa Maynila. -# gloss = not.want I(ACT)-LINK AP-die DIR Manila -# text_en = I don't want to die in Manila -1 Ayaw ayaw VERB _ Aspect=Hab|Voice=Pass 0 root _ Gloss=not-want|MGloss=not.want -2 kong ako PRON _ Case=Gen|Link=Yes|Number=Sing|Person=1|PronType=Prs 1 obj:agent _ Gloss=me|MSeg=ko-ng|MGloss=I(ACT)-LINK -3 mamatay patay VERB _ Aspect=Perf|Mood=Ind|Voice=Act 1 xcomp _ Gloss=die|MSeg=ma-matay|MGloss=AP-die -4 sa sa ADP _ Case=Dat 5 case _ Gloss=to|MGloss=DIR -5 Maynila Maynila PROPN _ _ 3 obl _ Gloss=Manila|SpaceAfter=No -6 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.124a -# text = Binisita ni Juan ang hari nang nagiisa. -# gloss = [PERF]-visit(OP) ACT Juan PIV king ADV AP.IMPERF-one -# text_en = Juan visited the king alone [either Juan or the king is alone] -1 Binisita bisita VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=visited|MSeg=b[in]isita|MGloss=[PERF]-visit(OP) -2 ni ni ADP _ Case=Gen 3 case _ Gloss=DET|MGloss=ACT -3 Juan Juan PROPN _ Gender=Masc 1 obj:agent _ Gloss=Juan|MGloss=John(LINK) -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 hari hari NOUN _ _ 1 nsubj:pass _ Gloss=king -6 nang nang SCONJ _ _ 7 mark _ Gloss=while|MGloss=ADV -7 nagiisa iisa VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 advcl _ Gloss=being-alone|MGloss=AP.IMPERF-one|MSeg=nag-iisa|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.124b -# text = Bumisita si Juan sa hari nang nagiisa. -# gloss = [AP.PERF]-visit PIV Juan DAT king ADV AP.IMPERF-one -# text_en = Juan visited the king alone [only Juan is alone] -1 Bumisita bisita VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=visited|MSeg=b[um]isita|MGloss=[AP.PERF]-visit -2 si si ADP _ Case=Nom 3 case _ Gloss=the|MGloss=PIV -3 Juan Juan PROPN _ Gender=Masc 1 nsubj _ Gloss=Juan|MGloss=John(LINK) -4 sa sa ADP _ Case=Dat 5 case _ Gloss=to|MGloss=DAT -5 hari hari NOUN _ _ 1 obl _ Gloss=king -6 nang nang SCONJ _ _ 7 mark _ Gloss=while|MGloss=ADV -7 nagiisa iisa VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 advcl _ Gloss=being-alone|MGloss=AP.IMPERF-one|MSeg=nag-iisa|SpaceAfter=No -8 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.124c -# text = Hinuli ng polis ang mgananakaw nang pumapasok sa banko. -# gloss = PERF-catch(OP) ACT police PIV thief ADV AP.IMPERF:enter DAT bank -# text_en = The police caught the thief entering the bank [either thief or police are entering] -1 Hinuli huli VERB _ Aspect=Perf|Mood=Ind|Voice=Pass 0 root _ Gloss=caught|MSeg=h[in]uli|MGloss=PERF-catch(OP) -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET|MGloss=ACT -3 polis polis NOUN _ _ 1 obj:agent _ Gloss=police -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 mgananakaw mgananakaw NOUN _ _ 1 nsubj:pass _ Gloss=thief -6 nang nang SCONJ _ _ 7 mark _ Gloss=while|MGloss=ADV -7 pumapasok pasok VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 advcl _ Gloss=entering|MSeg=p[um]a-pasok|MGloss=[AP]-IMPERF-enter -8 sa sa ADP _ Case=Dat 9 case _ Gloss=to|MGloss=DAT -9 banko banko NOUN _ _ 7 obl _ Gloss=bank|SpaceAfter=No -10 . . PUNCT _ _ 1 punct _ Gloss=. - -# sent_id = shopen-3.124d -# text = Nanghuli ng mgananakaw ang polis nang pumapasok sa banko. -# gloss = AP.PERF-catch OBJ thief PIV police ADV AV.IMPERF:enter DAT bank -# text_en = The police caught the thief entering the bank [either thief or police are entering] -1 Nanghuli huli VERB _ Aspect=Perf|Mood=Ind|Voice=Act 0 root _ Gloss=caught|MSeg=nang-huli|MGloss=AP.PERF-catch -2 ng ng ADP _ Case=Gen 3 case _ Gloss=DET|MGloss=OBJ -3 mgananakaw mgananakaw NOUN _ _ 1 obj _ Gloss=thief -4 ang ang ADP _ Case=Nom 5 case _ Gloss=the|MGloss=PIV -5 polis polis NOUN _ _ 1 nsubj _ Gloss=police -6 nang nang SCONJ _ _ 7 mark _ Gloss=while|MGloss=ADV -7 pumapasok pasok VERB _ Aspect=Imp|Mood=Ind|Voice=Act 1 advcl _ Gloss=entering|MSeg=p[um]a-pasok|MGloss=[AP]-IMPERF-enter -8 sa sa ADP _ Case=Dat 9 case _ Gloss=to|MGloss=DAT -9 banko banko NOUN _ _ 7 obl _ Gloss=bank|SpaceAfter=No -10 . . PUNCT _ _ 1 punct _ Gloss=. - -- GitLab