#! /usr/bin/python # -*- coding: utf-8 -*- from importer.PhraseAttributes import get_attributes, get_lex_attributes, empty_attributes from syntax.models_phrase import PhraseTypeModel, PhraseType, LemmaOperator, LemmaCooccur, Lemma, Modification as DBModification, ModificationType class Case: values = ['nom', 'gen', 'dat', 'acc', 'inst', 'loc', 'voc', 'str', 'part', 'agr', 'pred', 'postp'] def __init__(self, value): if value not in Case.values: print(value) raise UnknownError() self._value = value def __str__(self): return self._value class Preposition: def __init__(self, value, case): self._value = value self._case = case def __str__(self): return self._value + ',' + str(self._case) class ComplexPreposition: def __init__(self, value): self._value = value def __str__(self): return self._value class ClauseType: values = ['int', 'rel', u'aż', 'bo', 'czy', u'dopóki', 'gdy', 'jak', 'jakby', 'jakoby', u'jeśli', 'kiedy', 'zanim', u'że', u'żeby', u'żeby2'] def __init__(self, value, realisations): if value not in ClauseType.values: print(value) raise UnknownError() self._value = value self._realisations = realisations @classmethod def fromTree(cls, tree): realisations = None value = tree._children[0]._children[0]._children[0]._attrs['value'] assert (tree._children[0]._children[0]._attrs['name'] == 'conjunction') if len(tree._children[0]._children) > 1: realisations = [] for node in tree._children[0]._children[1]._children[0]._children: assert (node._name == 'symbol') realisations.append(node._attrs['value']) return cls(value, realisations) def __str__(self): if self._realisations is not None: return self._value + '[' + ';'.join(self._realisations) + ']' else: return self._value class Aspect: values = ['_', 'imperf', 'perf'] def __init__(self, value): if value not in Aspect.values: print(value) raise UnknownError() self._value = value def __str__(self): return self._value class AdverbialCategory: values = ['abl', 'adl', 'caus', 'dest', 'dur', 'instr', 'locat', 'misc', 'mod', 'perl', 'pron', 'temp'] def __init__(self, value, limitations, limitation_tree): if value not in AdverbialCategory.values: print(value) raise UnknownError() self._value = value self._limitations = limitations self._limitation_tree = limitation_tree @classmethod def fromTree(cls, tree): limitations = None limitation_tree = None if tree._children[0]._attrs['name'] == 'expansions': if tree._children[1]._children[0]._name == 'symbol': value = tree._children[1]._children[0]._attrs['value'] else: value = tree._children[1]._children[0]._children[0]._children[0]._attrs['value'] elif tree._children[0]._children[0]._name == 'symbol': value = tree._children[0]._children[0]._attrs['value'] else: value = tree._children[0]._children[0]._children[0]._children[0]._attrs['value'] if len(tree._children[0]._children[0]._children) > 1: limitations = [phrase_from_tree(subtree) for subtree in tree._children[0]._children[0]._children[1]._children[0]._children] limitation_tree = tree._children[0]._children[0]._children[1]._children[0]._children[0] return cls(value, limitations, limitation_tree) def __str__(self): if self._limitations is not None: return self._value + '[' + ','.join([str(limitation) for limitation in self._limitations]) + ']' else: return self._value class ComparCategory: def __init__(self, value): self._value = value def __str__(self): return self._value class Words: def __init__(self, cooccur, selection, lemmas): self._cooccur = cooccur self._selection = selection self._lemmas = lemmas @classmethod def fromTree(cls, tree): cooccur = tree._children[0]._children[1]._children[0]._attrs['value'] selection = tree._children[0]._children[0]._children[0]._attrs['value'].lower() lemmas = [] for string in tree._children[0]._children[2]._children[0]._children: lemmas.append(string._content) return cls(cooccur, selection, lemmas) def __str__(self): if len(self._lemmas) < 2: return ','.join(self._lemmas) elif self._selection == 'xor': return 'XOR(' + ','.join(self._lemmas) + ')' elif self._cooccur == 'concat': return 'OR(' + ','.join(self._lemmas) + ')' else: return 'OR(' + ';'.join(self._lemmas) + ')' class Modification: def __init__(self, atr, dependents): self._atr = atr self._dependents = dependents # for overriding order rules when building text representation self._order = None @classmethod def fromTree(cls, tree): atr = tree._children[0]._children[0]._children[0]._attrs['value'] from importer.Position import Position if len(tree._children[0]._children) > 1: dependents = [Position.fromTree(subtree) for subtree in tree._children[0]._children[1]._children[0]._children] else: dependents = [] return cls(atr, dependents) def __str__(self): if self._dependents: return self._atr + '(' + '+'.join((position._function._value if position._function else '') + '{' + ';'.join(map(str, position._phrases)) +'}' for position in self._dependents) + ')' else: return self._atr class Phrase(object): def __init__(self, name, id, no_attributes=False): self._name = name self._no_attributes = no_attributes self._id = id def store(self, position, stored_positions): raise NotImplementedError def getId(self): return self._id def getCase(self): return None class NonLexPhrase(Phrase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def store(self, position, stored_positions): #phrase = get_phrase_db_object(self, self._name, no_attributes=self._no_attributes) main_type, _ = PhraseTypeModel.objects.get_or_create(name=self._name, phraseologic=False, defaults={'priority': 2}) attributes = empty_attributes() if self._no_attributes else get_attributes(self, stored_positions) text_rep = str(self) print('text_rep: ', text_rep, 'main_type: ', main_type, 'attributes: ', attributes) phrase, _ = PhraseType.objects.get_or_create(text_rep=text_rep, defaults={'main_type': main_type, 'attributes': attributes, 'lexicalized_phrase': None}) # position is None for nested lex phrases if position is not None: position.phrase_types.add(phrase) return phrase stored_modifications = dict() class LexPhrase(Phrase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _lex_phrase(self): raise NotImplementedError def store(self, position, stored_positions): #phrase = get_lex_phrase_db_object(self, self._lex_phrase(), self._name, stored_positions, no_attributes=self._no_attributes, no_lex_attributes=self._no_lex_attributes) #def get_lex_phrase_db_object(phrase, lex_phrase, name, stored_positions, no_attributes=False, no_lex_attributes=False): lex = self._lex_phrase().store(None, None) main_type, _ = PhraseTypeModel.objects.get_or_create(name='lex', phraseologic=True, defaults={'priority': 0}) if self._name not in ('compar', 'xp'): assert(len(self._words._lemmas) > 0) lemma_operator = LemmaOperator.objects.get(name=self._words._selection) lemma_cooccur = LemmaCooccur.objects.get(name=self._words._cooccur) lemmata = [Lemma.objects.get_or_create(name=lemma)[0] for lemma in self._words._lemmas] mod_key = str(self._modification) #print('\n*****************************', mod_key) if mod_key in stored_modifications: modification = stored_modifications[mod_key] #print('*************** already stored:', modification) else: mod_type = ModificationType.objects.get(name=self._modification._atr) modification = DBModification.objects.create(mod_type=mod_type) # TODO? this is hacky: done like in Schema, taking advantage of the fact # that both Schema and Modifications have a m2m field ‘positions’ # TODO ask TB what exactly the 3rd argument to Position.store does mod_positions = set() for mod_position in self._modification._dependents: mod_position.store(modification, stored_positions, mod_positions, None) stored_modifications[mod_key] = modification #print('*************** now stored:', modification) else: lemma_operator, lemma_cooccur, lemmata, modification = None, None, None, None attributes = empty_attributes() if self._no_attributes else get_lex_attributes(self, stored_positions) phrase, _ = PhraseType.objects.get_or_create(text_rep=str(self), defaults={'main_type': main_type, 'attributes': attributes, 'lexicalized_phrase': lex, 'lemma_operator': lemma_operator, 'lemma_cooccur': lemma_cooccur, 'modification': modification}) if lemmata: phrase.lemmata.set(lemmata) if position is not None: position.phrase_types.add(phrase) # @TODO: powinno być tylko dla obecnej, # choć lexy się w zasadzie nie powtarzają, więc to mały narzut for schema in position.schemata.all(): schema.phraseologic=True schema.save() return phrase # appears in realisations class Adverb(NonLexPhrase): def __init__(self, base, id): super().__init__('adverb', id) self._base = base self._id = id @classmethod def fromTree(cls, tree, id): base = tree._children[0]._children[0]._attrs['value'] return cls(base, id) def __str__(self): return 'lex(advp, [_,' + str(self._base) + '], atr)' class NP(NonLexPhrase): def __init__(self, case, id): super().__init__('np', id) self._case = case @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) return cls(case, id) def __str__(self): return 'np(' + str(self._case) + ')' def getCase(self): return self._case._value class LexNP(LexPhrase): def __init__(self, np, number, words, modification, id): super().__init__('np', id) self._np = np self._number = number self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): np = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) modifications = Modification.fromTree(tree._children[3]) return cls(np, number, words, modifications, id) def _lex_phrase(self): return self._np def __str__(self): return 'lex(' + str(self._np) + ',' + self._number + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + str(self._words) + ',' + str(self._modification) + ')' class PrepNP(NonLexPhrase): def __init__(self, prep, id): super().__init__('prepnp', id) self._prep = prep @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) return cls(prep, id) def __str__(self): return 'prepnp(' + str(self._prep) + ')' def getCase(self): return self._prep._case._value class LexPrepNP(LexPhrase): def __init__(self, prepnp, number, words, modification, id): super().__init__('prepnp', id) self._prepnp = prepnp self._number = number self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): prepnp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) modifications = Modification.fromTree(tree._children[3]) return cls(prepnp, number, words, modifications, id) def _lex_phrase(self): return self._prepnp def __str__(self): return 'lex(' + str(self._prepnp) + ',' + self._number + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + str(self._words) + ',' + str(self._modification) + ')' class PrepNumP(NonLexPhrase): def __init__(self, prep, id): super().__init__('prepnump', id) self._prep = prep @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) return cls(prep, id) def __str__(self): return 'prepnump(' + str(self._prep) + ')' class LexPrepNumP(LexPhrase): def __init__(self, prepnump, nums, words, modification, id): super().__init__('prepnump', id, no_attributes=True) self._prepnump = prepnump self._nums = nums self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): prepnp = phrase_from_tree(tree._children[0]._children[0]) nums = Words.fromTree(tree._children[1]) words = Words.fromTree(tree._children[2]) modifications = Modification.fromTree(tree._children[3]) return cls(prepnp, nums, words, modifications, id) def _lex_phrase(self): return self._prepnump def __str__(self): return 'lex(' + str(self._prepnump) + ',' + str(self._nums) + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + str(self._nums) + ',' + str(self._words) + ',' + str(self._modification) + ')' class NumP(NonLexPhrase): def __init__(self, case, id): super().__init__('nump', id) self._case = case @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) return cls(case, id) def __str__(self): return 'nump(' + str(self._case) + ')' class LexNumP(LexPhrase): def __init__(self, nump, nums, words, modification, id): super().__init__('nump', id, no_attributes=True) self._nump = nump self._nums = nums self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): nump = phrase_from_tree(tree._children[0]._children[0]) nums = Words.fromTree(tree._children[1]) words = Words.fromTree(tree._children[2]) modifications = Modification.fromTree(tree._children[3]) return cls(nump, nums, words, modifications, id) def _lex_phrase(self): return self._nump def __str__(self): return 'lex(' + str(self._nump) + ',' + str(self._nums) + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + str(self._nums) + ',' + str(self._words) + ',' + str(self._modification) + ')' class PrepAdjP(NonLexPhrase): def __init__(self, prep, id): super().__init__('prepadjp', id) self._prep = prep @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) return cls(prep, id) def __str__(self): return 'prepadjp(' + str(self._prep) + ')' class LexPrepAdjP(LexPhrase): def __init__(self, prepadjp, number, gender, degree, words, modification, id): super().__init__('prepadjp', id) self._prepadjp = prepadjp self._number = number self._gender = gender self._degree = degree self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): prepadjp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] gender = tree._children[2]._children[0]._children[0]._attrs['value'] degree = tree._children[3]._children[0]._attrs['value'] words = Words.fromTree(tree._children[4]) modifications = Modification.fromTree(tree._children[5]) return cls(prepadjp, number, gender, degree, words, modifications, id) def _lex_phrase(self): return self._prepadjp def __str__(self): return 'lex(' + str(self._prepadjp) + ',' + self._number + ',' + self._gender + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._gender + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' class ComPrepNP(NonLexPhrase): def __init__(self, prep, id): super().__init__('comprepnp', id) self._prep = prep @classmethod def fromTree(cls, tree, id): prep = ComplexPreposition(tree._children[-1]._children[0]._content) return cls(prep, id) def __str__(self): return 'comprepnp(' + str(self._prep) + ')' class CP(NonLexPhrase): def __init__(self, type, id): super().__init__('cp', id) self._type = type @classmethod def fromTree(cls, tree, id): type = ClauseType.fromTree(tree._children[0]) return cls(type, id) def __str__(self): return 'cp(' + str(self._type) + ')' class LexCP(LexPhrase): def __init__(self, cp, negativity, words, inherent_sie, modification, id): super().__init__('cp', id) self._cp = cp self._negativity = negativity self._words = words self._inherent_sie = inherent_sie self._modification = modification @classmethod def fromTree(cls, tree, id): cp = phrase_from_tree(tree._children[0]._children[0]) negativity = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) if tree._children[3]._children[0]._attrs['value'] == 'true': inherent_sie = u'się' else: inherent_sie = u'' modifications = Modification.fromTree(tree._children[4]) return cls(cp, negativity, words, inherent_sie, modifications, id) def _lex_phrase(self): return self._cp def __str__(self): return 'lex(' + str(self._cp) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' class NCP(NonLexPhrase): def __init__(self, case, type, id): super().__init__('ncp', id) self._case = case self._type = type @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) type = ClauseType.fromTree(tree._children[1]) return cls(case, type, id) def __str__(self): return 'ncp(' + str(self._case) + ',' + str(self._type) + ')' class LexNCP(LexPhrase): def __init__(self, ncp, negativity, words, inherent_sie, modification, id): super().__init__('ncp', id) self._ncp = ncp self._negativity = negativity self._words = words self._inherent_sie = inherent_sie self._modification = modification @classmethod def fromTree(cls, tree, id): ncp = phrase_from_tree(tree._children[0]._children[0]) negativity = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) if tree._children[3]._children[0]._attrs['value'] == 'true': inherent_sie = u'się' else: inherent_sie = u'' modifications = Modification.fromTree(tree._children[4]) return cls(ncp, negativity, words, inherent_sie, modifications, id) def _lex_phrase(self): return self._ncp def __str__(self): return 'lex(' + str(self._ncp) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' class PrepNCP(NonLexPhrase): def __init__(self, prep, type, id): super().__init__('prepncp', id) self._prep = prep self._type = type @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) type = ClauseType.fromTree(tree._children[2]) return cls(prep, type, id) def __str__(self): return 'prepncp(' + str(self._prep) + ',' + str(self._type) + ')' class Nonch(NonLexPhrase): def __init__(self, id): super().__init__('nonch', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'nonch' class InfP(NonLexPhrase): def __init__(self, aspect, id): super().__init__('infp', id) self._aspect = aspect self._id = id @classmethod def fromTree(cls, tree, id): aspect = Aspect(tree._children[0]._children[0]._attrs['value']) return cls(aspect, id) def __str__(self): return 'infp(' + str(self._aspect) + ')' class LexInfP(LexPhrase): def __init__(self, infp, negativity, words, inherent_sie, modification, id): super().__init__('infp', id) self._infp = infp self._negativity = negativity self._words = words self._inherent_sie = inherent_sie self._modification = modification @classmethod def fromTree(cls, tree, id): infp = phrase_from_tree(tree._children[0]._children[0]) negativity = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) if tree._children[3]._children[0]._attrs['value'] == 'true': inherent_sie = u'się' else: inherent_sie = u'' modifications = Modification.fromTree(tree._children[4]) return cls(infp, negativity, words, inherent_sie, modifications, id) def _lex_phrase(self): return self._infp def __str__(self): return 'lex(' + str(self._infp) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._inherent_sie) + ',' + str(self._modification) + ')' class PrepGerP(NonLexPhrase): def __init__(self, prep, id): super().__init__('prepgerp', id) self._prep = prep @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) return cls(prep, id) def __str__(self): return 'prepgerp(' + str(self._prep) + ')' class LexPrepGerP(LexPhrase): def __init__(self, prepgerp, number, negativity, words, inherent_sie, modification, id): super().__init__('prepgerp', id) self._prepgerp = prepgerp self._number = number self._negativity = negativity self._words = words self._inherent_sie = inherent_sie self._modification = modification @classmethod def fromTree(cls, tree, id): prepgerp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] negativity = tree._children[2]._children[0]._attrs['value'] words = Words.fromTree(tree._children[3]) if tree._children[4]._children[0]._attrs['value'] == 'true': inherent_sie = 'się' else: inherent_sie = '' modifications = Modification.fromTree(tree._children[5]) return cls(prepgerp, number, negativity, words, inherent_sie, modifications, id) def _lex_phrase(self): return self._prepgerp def __str__(self): return 'lex(' + str(self._prepgerp) + ',' + self._number + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' class PPasP(NonLexPhrase): def __init__(self, case, id): super().__init__('ppasp', id) self._case = case @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) return cls(case, id) def __str__(self): return 'ppasp(' + str(self._case) + ')' class LexPPasP(LexPhrase): def __init__(self, ppasp, number, gender, negativity, words, modification, id): super().__init__('ppasp', id) self._ppasp = ppasp self._number = number self._gender = gender self._negativity = negativity self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): ppasp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] gender = tree._children[2]._children[0]._children[0]._attrs['value'] negativity = tree._children[3]._children[0]._attrs['value'] words = Words.fromTree(tree._children[4]) modifications = Modification.fromTree(tree._children[5]) return cls(ppasp, number, gender, negativity, words, modifications, id) def _lex_phrase(self): return self._ppasp def __str__(self): return 'lex(' + str(self._ppasp) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' class PrepPPasP(NonLexPhrase): def __init__(self, prep, id): super().__init__('prepppasp', id) self._prep = prep @classmethod def fromTree(cls, tree, id): case = Case(tree._children[1]._children[0]._attrs['value']) prep = Preposition(tree._children[0]._children[0]._attrs['value'], case) return cls(prep, id) def __str__(self): return 'prepppasp(' + str(self._prep) + ')' class LexPrepPPasP(LexPhrase): def __init__(self, prepppasp, number, gender, negativity, words, modification, id): super().__init__('prepppasp', id) self._prepppasp = prepppasp self._number = number self._gender = gender self._negativity = negativity self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): prepppasp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] gender = tree._children[2]._children[0]._children[0]._attrs['value'] negativity = tree._children[3]._children[0]._attrs['value'] words = Words.fromTree(tree._children[4]) modifications = Modification.fromTree(tree._children[5]) return cls(prepppasp, number, gender, negativity, words, modifications, id) def _lex_phrase(self): return self._prepppasp def __str__(self): return 'lex(' + str(self._prepppasp) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + str(self._modification) + ')' class PActP(NonLexPhrase): def __init__(self, case, id): super().__init__('pactp', id) self._case = case @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) return cls(case, id) def __str__(self): return 'pactp(' + str(self._case) + ')' class LexPActP(LexPhrase): def __init__(self, pactp, number, gender, negativity, words, inherent_sie, modification, id): super().__init__('pactp', id) self._pactp = pactp self._number = number self._gender = gender self._negativity = negativity self._words = words self._inherent_sie = inherent_sie self._modification = modification @classmethod def fromTree(cls, tree, id): pactp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] gender = tree._children[2]._children[0]._children[0]._attrs['value'] negativity = tree._children[3]._children[0]._attrs['value'] words = Words.fromTree(tree._children[4]) if tree._children[5]._children[0]._attrs['value'] == 'true': inherent_sie = u'się' else: inherent_sie = u'' modifications = Modification.fromTree(tree._children[6]) return cls(pactp, number, gender, negativity, words, inherent_sie, modifications, id) def _lex_phrase(self): return self._pactp def __str__(self): return 'lex(' + str(self._pactp) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + self._inherent_sie + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._gender + ',' + self._negativity + ',' + str(self._words) + ',' + self._inherent_sie + ',' + str(self._modification) + ')' class XP(NonLexPhrase): def __init__(self, category, id): super().__init__('xp', id) self._category = category @classmethod def fromTree(cls, tree, id): category = AdverbialCategory.fromTree(tree) return cls(category, id) def __str__(self): return 'xp(' + str(self._category) + ')' class Dummy: def __init__(self): self._attrs = {} self._children = [] def __str__(self): return str(self._attrs) + str(self._children) class LexXP(LexPhrase): def __init__(self, xp, lex, id): super().__init__('xp', id) self._xp = xp self._lex = lex @classmethod def fromTree(cls, tree, id): xp = phrase_from_tree(tree._children[0]._children[0]) fake_node = Dummy() fake_node._attrs['type'] = 'lex' dummy = Dummy() dummy._children = [xp._category._limitation_tree] fake_node._children = [dummy] + tree._children[1:] lex = phrase_from_tree(fake_node) return cls(xp, lex, id) def _lex_phrase(self): return self._xp def __str__(self): return self._lex.retyped(self._xp) class AdvP(NonLexPhrase): def __init__(self, category, id): super().__init__('advp', id) self._category = category @classmethod def fromTree(cls, tree, id): category = AdverbialCategory.fromTree(tree) return cls(category, id) def __str__(self): return 'advp(' + str(self._category) + ')' class LexAdvP(LexPhrase): def __init__(self, advp, degree, words, modification, id): super().__init__('advp', id) self._advp = advp self._degree = degree self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): advp = phrase_from_tree(tree._children[0]._children[0]) degree = tree._children[1]._children[0]._attrs['value'] words = Words.fromTree(tree._children[2]) modifications = Modification.fromTree(tree._children[3]) return cls(advp, degree, words, modifications, id) def _lex_phrase(self): return self._advp def __str__(self): return 'lex(' + str(self._advp) + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' class AdjP(NonLexPhrase): def __init__(self, case, id): super().__init__('adjp', id) self._case = case @classmethod def fromTree(cls, tree, id): case = Case(tree._children[0]._children[0]._attrs['value']) return cls(case, id) def __str__(self): return 'adjp(' + str(self._case) + ')' class LexAdjP(LexPhrase): def __init__(self, adjp, number, gender, degree, words, modification, id): super().__init__('adjp', id) self._adjp = adjp self._number = number self._gender = gender self._degree = degree self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): adjp = phrase_from_tree(tree._children[0]._children[0]) number = tree._children[1]._children[0]._attrs['value'] gender = tree._children[2]._children[0]._children[0]._attrs['value'] degree = tree._children[3]._children[0]._attrs['value'] words = Words.fromTree(tree._children[4]) modifications = Modification.fromTree(tree._children[5]) return cls(adjp, number, gender, degree, words, modifications, id) def _lex_phrase(self): return self._adjp def __str__(self): return 'lex(' + str(self._adjp) + ',' + self._number + ',' + self._gender + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + self._number + ',' + self._gender + ',' + self._degree + ',' + str(self._words) + ',' + str(self._modification) + ')' class Compar(NonLexPhrase): def __init__(self, category, id): super().__init__('compar', id) self._category = category @classmethod def fromTree(cls, tree, id): category = ComparCategory(tree._children[0]._children[0]._attrs['value']) return cls(category, id) def __str__(self): return 'compar(' + str(self._category) + ')' class LexCompar(LexPhrase): def __init__(self, compar, lexes, id): super().__init__('compar', id) self._compar = compar self._lexes = lexes @classmethod def fromTree(cls, tree, id): compar = phrase_from_tree(tree._children[0]._children[0]) lexes = [phrase_from_tree(subtree) for subtree in tree._children[1]._children[0]._children] return cls(compar, lexes, id) def _lex_phrase(self): return self._compar def __str__(self): return 'lex(' + str(self._compar) + ',' + ','.join([str(lex) for lex in self._lexes]) + ',natr)' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + ','.join([str(lex) for lex in self._lexes]) + ',natr)' class Qub(NonLexPhrase): def __init__(self, id): super().__init__('qub', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'qub' class LexQub(LexPhrase): def __init__(self, qub, words, modification, id): super().__init__('qub', id, no_attributes=True) self._qub = qub self._words = words self._modification = modification @classmethod def fromTree(cls, tree, id): qub = phrase_from_tree(tree._children[0]._children[0]) words = Words.fromTree(tree._children[1]) modifications = Modification.fromTree(tree._children[2]) return cls(qub, words, modifications, id) def _lex_phrase(self): return self._qub def __str__(self): return 'lex(' + str(self._qub) + ',' + str(self._words) + ',' + str(self._modification) + ')' def retyped(self, new_type): return 'lex(' + str(new_type) + ',' + str(self._words) + ',' + str(self._modification) + ')' class Refl(NonLexPhrase): def __init__(self, id): super().__init__('refl', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'refl' class Recip(NonLexPhrase): def __init__(self, id): super().__init__('recip', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'recip' class OR(NonLexPhrase): def __init__(self, id): super().__init__('or', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'or' class E(NonLexPhrase): def __init__(self, id): super().__init__('E', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'E' class PossP(NonLexPhrase): def __init__(self, id): super().__init__('possp', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'possp' def getCase(self): return 'gen' class DistrP(NonLexPhrase): def __init__(self, id): super().__init__('distrp', id, no_attributes=True) @classmethod def fromTree(cls, tree, id): return cls(id) def __str__(self): return 'distrp' # TODO subclass? class Fixed(Phrase): def __init__(self, phrase, text, id): super().__init__('fixed', id) self._phrase = phrase self._text = text self._id = id @classmethod def fromTree(cls, tree, id): phrase = phrase_from_tree(tree._children[0]._children[0]) text = tree._children[1]._children[0]._content if ' ' in text or text[0].isupper(): text = '\'' + text + '\'' return cls(phrase, text, id) def store(self, position, stored_positions): main_type, _ = PhraseTypeModel.objects.get_or_create(name=self._name, phraseologic=True, defaults={'priority': 1}) # @TODO: to nie powinno być phrase '''lex_type, _ = PhraseTypeModel.objects.get_or_create(name=str(self._phrase).split('(')[0], phraseologic=False, defaults={'priority': 0}) lex, _ = PhraseType.objects.get_or_create(main_type=lex_type, attributes=get_attributes(lex_type.name, self._phrase), lexicalized_phrase=None, text_rep=str(self._phrase)) ''' lex = self._phrase.store(None, None) phrase, _ = PhraseType.objects.get_or_create(main_type=main_type, attributes=get_attributes(self, stored_positions), lexicalized_phrase=lex, text_rep=str(self)) position.phrase_types.add(phrase) # @TODO: powinno być tylko dla obecnej, # choć lexy się w zasadzie nie powtarzają, więc to mały narzut for schema in position.schemata.all(): schema.phraseologic=True schema.save() def __str__(self): return 'fixed(' + str(self._phrase) + ',' + self._text + ')' '''def get_phrase_db_object(phrase, name, no_attributes=False): main_type, _ = PhraseTypeModel.objects.get_or_create(name=name, phraseologic=False, defaults={'priority': 0}) attributes = empty_attributes() if no_attributes else get_attributes(main_type.name, phrase) phrase, _ = PhraseType.objects.get_or_create(main_type=main_type, attributes=attributes, lexicalized_phrase=None, text_rep=str(phrase)) return phrase stored_modifications = dict() def get_lex_phrase_db_object(phrase, lex_phrase, name, stored_positions, no_attributes=False, no_lex_attributes=False): main_type, _ = PhraseTypeModel.objects.get_or_create(name='lex', phraseologic=True, defaults={'priority': 0}) lex_type, _ = PhraseTypeModel.objects.get_or_create(name=name, phraseologic=False, defaults={'priority':0}) attributes = empty_attributes() if no_attributes else get_attributes(lex_type.name, lex_phrase) lex, _ = PhraseType.objects.get_or_create(main_type=lex_type, attributes=attributes, lexicalized_phrase=None, text_rep=str(lex_phrase)) if name not in ('compar', 'xp'): assert(len(phrase._words._lemmas) > 0) lemma_operator = LemmaOperator.objects.get(name=phrase._words._selection) lemma_cooccur = LemmaCooccur.objects.get(name=phrase._words._cooccur) lemmata = [Lemma.objects.get_or_create(name=lemma)[0] for lemma in phrase._words._lemmas] mod_key = str(phrase._modification) #print('\n*****************************', mod_key) if mod_key in stored_modifications: modification = stored_modifications[mod_key] #print('*************** already stored:', modification) else: mod_type = ModificationType.objects.get(name=phrase._modification._atr) modification = DBModification.objects.create(mod_type=mod_type) # TODO? this is hacky: done like in Schema, taking advantage of the fact # that both Schema and Modifications have a m2m field ‘positions’ # TODO ask TB what exactly the 3rd argument to Position.store does mod_positions = set() for position in phrase._modification._dependents: position.store(modification, stored_positions, mod_positions, None) stored_modifications[mod_key] = modification #print('*************** now stored:', modification) else: lemma_operator, lemma_cooccur, lemmata, modification = None, None, None, None attributes = empty_attributes() if no_lex_attributes else get_lex_attributes(lex_type.name, phrase) phrase, _ = PhraseType.objects.get_or_create(main_type=main_type, attributes=attributes, lexicalized_phrase=lex, lemma_operator=lemma_operator, lemma_cooccur=lemma_cooccur, modification=modification, text_rep=str(phrase)) if lemmata: phrase.lemmata.set(lemmata) return phrase ''' def phrase_from_tree(tree): if 'xml:id' in tree._attrs: id = tree._attrs['xml:id'] else: id = None if tree._attrs['type'] == 'adverb': return Adverb.fromTree(tree, id) elif tree._attrs['type'] == 'np': return NP.fromTree(tree, id) elif tree._attrs['type'] == 'adjp': return AdjP.fromTree(tree, id) elif tree._attrs['type'] == 'ppasp': return PPasP.fromTree(tree, id) elif tree._attrs['type'] == 'pactp': return PActP.fromTree(tree, id) elif tree._attrs['type'] == 'prepppasp': return PrepPPasP.fromTree(tree, id) elif tree._attrs['type'] == 'prepnp': return PrepNP.fromTree(tree, id) elif tree._attrs['type'] == 'nump': return NumP.fromTree(tree, id) elif tree._attrs['type'] == 'prepnump': return PrepNumP.fromTree(tree, id) elif tree._attrs['type'] == 'prepadjp': return PrepAdjP.fromTree(tree, id) elif tree._attrs['type'] == 'comprepnp': return ComPrepNP.fromTree(tree, id) elif tree._attrs['type'] == 'cp': return CP.fromTree(tree, id) elif tree._attrs['type'] == 'ncp': return NCP.fromTree(tree, id) elif tree._attrs['type'] == 'prepncp': return PrepNCP.fromTree(tree, id) elif tree._attrs['type'] == 'nonch': return Nonch.fromTree(tree, id) elif tree._attrs['type'] == 'infp': return InfP.fromTree(tree, id) elif tree._attrs['type'] == 'prepgerp': return PrepGerP.fromTree(tree, id) elif tree._attrs['type'] == 'xp': return XP.fromTree(tree, id) elif tree._attrs['type'] == 'advp': return AdvP.fromTree(tree, id) elif tree._attrs['type'] == 'compar': return Compar.fromTree(tree, id) elif tree._attrs['type'] == 'distrp': return DistrP.fromTree(tree, id) elif tree._attrs['type'] == 'refl': return Refl.fromTree(tree, id) elif tree._attrs['type'] == 'recip': return Recip.fromTree(tree, id) elif tree._attrs['type'] == 'or': return OR.fromTree(tree, id) elif tree._attrs['type'] == 'E': return E.fromTree(tree, id) elif tree._attrs['type'] == 'qub': return Qub.fromTree(tree, id) elif tree._attrs['type'] == 'possp': return PossP.fromTree(tree, id) elif tree._attrs['type'] == 'fixed': return Fixed.fromTree(tree, id) elif tree._attrs['type'] == 'lex': if tree._children[0]._children[0]._attrs['type'] == 'np': return LexNP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'prepnp': return LexPrepNP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'infp': return LexInfP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'compar': return LexCompar.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'xp': return LexXP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'adjp': return LexAdjP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'cp': return LexCP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'ncp': return LexNCP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'prepnump': return LexPrepNumP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'prepadjp': return LexPrepAdjP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'ppasp': return LexPPasP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'prepgerp': return LexPrepGerP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'nump': return LexNumP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'advp': return LexAdvP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'prepppasp': return LexPrepPPasP.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'qub': return LexQub.fromTree(tree, id) elif tree._children[0]._children[0]._attrs['type'] == 'pactp': return LexPActP.fromTree(tree, id) else: print('lex: ' + tree._children[0]._children[0]._attrs['type']) raise UnexpectedError() else: print('type: ' + tree._attrs['type']) raise UnexpectedError(tree._attrs['type'])