import logging from itertools import chain, combinations, product from django.utils import translation from django.utils.translation import gettext as _ from importer.Phrase import * from importer.Global import get_current_entry from .polish_strings import * from .utils import * class PhraseDescriptionError(Exception): pass def powerset(iterable): s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) def powerset_nonempty(iterable): s = list(iterable) return chain.from_iterable(combinations(s, r) for r in range(1, len(s) + 1)) # keeps the element order def uniq_list(iterable): u = [] for element in iterable: if element not in u: u.append(element) return u def position_prop_description(prop): return POSITION_PROP()[prop] def phrase_description2(phrase, position, negativity, lang, controller=None): curr_lang = translation.get_language() translation.activate(lang) function = position._function._value if position._function else None negativity = negativity.name if negativity else '_' try: desc = phrase_description(phrase, function, negativity, controller=controller) except: raise PhraseDescriptionError('couldn’t generate description: {}'.format(phrase)) translation.activate(curr_lang) return desc def phrase_description(phrase, function, negativity, desc_case='nom', inside_lex=False, controller=None): if str(phrase) in ( # malowany -> ppas in in Morfeusz #'lex(adjp(agr),agr,agr,pos,malować,natr)', #'lex(compar(jak),lex(np(nom),sg,wół,natr),lex(prepnp(na,acc),pl,wrota,ratr1({lex(adjp(agr),agr,agr,pos,malować,natr)})),natr)', # oszalały -> adj in Morfeusz #'lex(compar(jak),lex(ppasp(agr),agr,agr,aff,oszaleć,natr),natr)', # OR #'lex(np(str),_,uwaga,ratr({adjp(agr)}+{or}))', #'lex(xp(mod[prepnp(z,gen)]),sg,góry,natr)', 'lex(np(str),_,uwaga,atr({adjp(agr)}+{or}))', # „jak” nie ma wśród modyfikacyj #'lex(cp(int[jak]),aff,dziękować,,atr)', # „na jakim świecie żyje” – element pytajny zagnieżdżony w prepnp #'lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))}))', # „co” nie ma wśród modyfikacyj #'lex(xp(mod[cp(rel[co])]),aff,wyskoczyć,,ratr1(subj{lex(np(str),sg,koń,natr)}))', #'lex(cp(rel[co]),aff,XOR(przynieść,przynosić),,ratr(subj{lex(np(str),sg,ślina,natr)}+{lex(np(dat),_,XOR(ja,my,on,ty,wy),natr)}+{lex(prepnp(na,acc),_,język,natr)}))', #'lex(cp(rel[co]),_,XOR(przychodzić,przyjść),,ratr({lex(np(dat),_,XOR(ja,my,on,ty,wy),natr)}+{lex(prepnp(na,acc),sg,myśl,natr)}))', #'lex(np(str),sg,wszystko,ratr(subj{lex(np(str),sg,co,natr)}+{lex(cp(rel[co]),aff,być,,ratr1({lex(prepnp(w,loc),sg,moc,atr({lex(adjp(agr),agr,agr,pos,ludzki,natr)}))}))}))', #'lex(np(str),sg,wszystko,ratr(subj{lex(np(str),sg,co,natr)}+{lex(cp(rel[co]),aff,być,,ratr1({lex(prepnp(w,loc),sg,moc,atr({possp}))}))}))', #'lex(cp(rel[co]),aff,być,,ratr1({lex(prepnp(w,loc),sg,moc,atr({lex(adjp(agr),agr,agr,pos,ludzki,natr)}))}))', #'lex(cp(rel[co]),aff,być,,ratr1({lex(prepnp(w,loc),sg,moc,atr({possp}))}))', # „jakby” nie ma wśród modyfikacyj #'lex(xp(mod[cp(rel[jakby])]),aff,strzelić,,ratr({prepnp(w,acc)}+{lex(np(str),sg,XOR(grom,piorun),natr)}))', ): return '???' if str(phrase).startswith('lex'): return lex_phrase_description(phrase, function, negativity, desc_case=desc_case, controller=controller) return make_phrase_description(phrase, function, negativity, desc_case, inside_lex=inside_lex) def get_phrase_type(lex_phrase): ptype = type(lex_phrase) if ptype == LexNP: return lex_phrase._np if ptype == LexNumP: return lex_phrase._nump if ptype == LexAdjP: return lex_phrase._adjp if ptype == LexPPasP: return lex_phrase._ppasp if ptype == LexPActP: return lex_phrase._pactp if ptype == LexPrepNP: return lex_phrase._prepnp if ptype == LexPrepGerP: return lex_phrase._prepgerp if ptype == LexPrepNumP: return lex_phrase._prepnump if ptype == LexPrepAdjP: return lex_phrase._prepadjp if ptype == LexPrepPPasP: return lex_phrase._prepppasp if ptype == LexInfP: return lex_phrase._infp if ptype == LexCP: return lex_phrase._cp if ptype == LexNCP: return lex_phrase._ncp if ptype == LexXP: return lex_phrase._xp if ptype == LexAdvP: return lex_phrase._advp if ptype == LexCompar: return lex_phrase._compar if ptype == LexQub: return lex_phrase._qub print(ptype) 1/0 def postprocess_phraseologism(p): return p.replace(' ,', ',') def lex_phrase_description(phrase, function, negativity, desc_case='nom', controller=None, controller_grammar=None): phrase2 = get_phrase_type(phrase) desc = make_phrase_description(phrase2, function, negativity, 'nom', inside_lex=True) phraseo = uniq_list(map(postprocess_phraseologism, make_phraseologisms(phrase, function, negativity, controller=controller, controller_grammar=controller_grammar))) return _('zleksykalizowana') + ' ' + desc + ' ' + _('postaci') + make_ul(map('<i>{}</i>'.format, phraseo)) def make_phrase_description(phrase, function, negativity, desc_case, inside_lex=False): ptype = type(phrase) if ptype == NP: case = phrase._case._value if function == 'subj' and case == 'str': case = 'str_subj' return make_inflected_string(NP_(), desc_case).format(case=CASE_FOR_NP()[case]) if ptype == NumP: case = phrase._case._value return make_inflected_string(NUMP(), desc_case).format(case=CASE_FOR_NP()[case]) if ptype == AdjP: return make_inflected_string(ADJP(), desc_case).format(case=CASE_FOR_ADJP()[phrase._case._value]) if ptype == PPasP: return make_inflected_string(PPASP(), desc_case).format(case=CASE_FOR_ADJP()[phrase._case._value]) if ptype == PActP: return make_inflected_string(PACTP(), desc_case).format(case=CASE_FOR_ADJP()[phrase._case._value]) if ptype == PrepNP: prep, case = phrase._prep._value, phrase._prep._case._value return make_inflected_string(PREPNP(), desc_case).format(prep=prep, case=CASE_FOR_PREPNP()[case]) if ptype == PrepGerP: prep, case = phrase._prep._value, phrase._prep._case._value return make_inflected_string(PREPGERP(), desc_case).format(prep=prep, case=CASE_FOR_PREPNP()[case]) if ptype == PrepNumP: prep, case = phrase._prep._value, phrase._prep._case._value return make_inflected_string(PREPNUMP(), desc_case).format(prep=prep, case=CASE_FOR_PREPNP()[case]) if ptype == PrepAdjP: prep, case = phrase._prep._value, phrase._prep._case._value return make_inflected_string(PREPADJP(), desc_case).format(prep=prep, case=CASE_FOR_PREPNP()[case]) if ptype == PrepPPasP: prep, case = phrase._prep._value, phrase._prep._case._value return make_inflected_string(PREPPPASP(), desc_case).format(prep=prep, case=CASE_FOR_PREPNP()[case]) if ptype == ComPrepNP: return make_inflected_string(COMPREPNP(), desc_case).format(prep=phrase._prep) if ptype == InfP: aspect = phrase._aspect._value return make_inflected_string(INFP(), desc_case).format(aspect=make_inflected_string(ASPECT()[aspect], desc_case)) if ptype == CP: typ = phrase._type._value if phrase._type._realisations: typ_str = make_inflected_string(CP_TYPE().get(typ + '_r', CP_CONJ()), desc_case).format( conj='/'.join(phrase._type._realisations)) else: typ_str = make_inflected_string(CP_TYPE().get(typ, CP_CONJ()), desc_case).format(conj=typ) return make_inflected_string(CP_(), desc_case).format(typ=typ_str) if ptype == NCP: case, typ = phrase._case._value, phrase._type._value if case == 'part': case = 'gen' to = TO[case] if phrase._type._realisations: typ_str = make_inflected_string(NCP_TYPE().get(typ + '_r', NCP_CONJ()), desc_case).format( to=to, conj='/'.join(phrase._type._realisations)) else: typ_str = make_inflected_string(NCP_TYPE().get(typ, NCP_CONJ()), desc_case).format(to=to, conj=typ) return make_inflected_string(NCP_(), desc_case).format(typ=typ_str) if ptype == PrepNCP: prep, case, typ = phrase._prep._value, phrase._prep._case._value, phrase._type._value to = TO[case] if phrase._type._realisations: typ_str = make_inflected_string(PREPNCP_TYPE().get(typ + '_r', PREPNCP_CONJ()), desc_case).format( prep=prep, to=to, conj='/'.join(phrase._type._realisations)) else: typ_str = make_inflected_string(PREPNCP_TYPE().get(typ, PREPNCP_CONJ()), desc_case).format(prep=prep, to=to, conj=typ) # we use NCP here as it’s the same (fraza zdaniowa wprowadzana przez...) return make_inflected_string(NCP_(), desc_case).format(typ=typ_str) if ptype in (XP, AdvP): sem, realisations = phrase._category._value, phrase._category._limitations if ptype == AdvP and sem == 'misc': return make_inflected_string(ADVP_MISC(), desc_case) if ptype == AdvP and sem == 'pron': return make_inflected_string(ADVP_PRON(), desc_case) b = (bool(realisations) or inside_lex) #desc = make_inflected_string(XP_()[b] if ptype == XP else ADVP(), desc_case) #ret = '{desc} {sem}'.format(desc=desc, sem=make_inflected_string(XP_SEM()[sem], desc_case)) ret = make_inflected_string(XP_(b)[sem] if ptype == XP else ADVP()[sem], desc_case) # don’t describe realisations for fixed/lexicalised phrases if realisations and not inside_lex: rs = make_ul(map(lambda r: phrase_description(r, function, negativity, 'nom', inside_lex=inside_lex), realisations)) if len(realisations) == 1: ret += _(' z dopuszczalną realizacją: ') + rs else: ret += _(' z dopuszczalnymi realizacjami: ') + rs return ret # TODO opis? if ptype == Compar: prep = phrase._category._value return make_inflected_string(COMPAR(), desc_case).format(prep=prep) if ptype == Nonch: return make_inflected_string(NONCH(), desc_case) if ptype == OR: return make_inflected_string(OR_(), desc_case) if ptype == Refl: return make_inflected_string(REFL(), desc_case) if ptype == Recip: return make_inflected_string(RECIP(), desc_case) if ptype == E: return make_inflected_string(E_(), desc_case) if ptype == PossP: return make_inflected_string(POSSP(), desc_case) if ptype == DistrP: return make_inflected_string(DISTRP(), desc_case) if ptype == Fixed: assert (desc_case == 'nom') phrase, phraseo = phrase._phrase, phrase._text.strip('\'') return _('zleksykalizowana {phrase} zamrożona w postaci <i>{phraseo}</i>').format( phrase=make_phrase_description(phrase, function, negativity, 'nom', inside_lex=True), phraseo=phraseo) # TODO nie było w dokumentacji if ptype == Qub: return make_inflected_string(QUB(), desc_case) print(ptype) 1/0 return 'fraza TODO' def combine(phrase, texts): assert (len(texts) == len(phrase._words._lemmas)) if len(texts) == 1: return texts[0] if phrase._words._selection == 'xor' or len(phrase._words._lemmas) > 5: return list(chain.from_iterable(texts)) else: joiner = ' ' if phrase._words._cooccur == 'concat' else ' i/lub ' return list(chain.from_iterable(map(joiner.join, powerset_nonempty(x)) for x in product(*texts))) # controller_grammar: (gender, number) of the controller if controller!=None def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None, controller_grammar=None): control = False ptype = type(phrase) if ptype in (NP, PrepNP, ComPrepNP): # “any ((com)prep)np” if ptype == NP: case = phrase._case._value else: case = phrase._prep._case._value if ptype == PrepNP else 'gen' CASE = correct_case(case, function, negativity) prep = (phrase._prep._value + ' ') if ptype != NP else '' feats = ['subst', CASE] forms = [get_form(lemma, feats) for lemma in ('ktoś', 'coś')] return ['{}‹{}›'.format(prep, '/'.join(o for o, t in forms))] # TODO merge LexNP with LexPrepNP/LexPrepGerP? if ptype == LexNP: case = phrase._np._case._value if case == 'agr': # nom for e.g. ‹ktoś żądny czegoś› case = attrs['case'] if 'case' in attrs else 'nom' POS = lambda lemma: correct_pos(lemma, 'subst') NUM = lambda lemma: correct_num(lemma, phrase._number) #if not function and controller and controller._function and controller._function._value: # function = controller._function._value CASE = correct_case(case, function, negativity) if case == 'str' and not function: logging.warning('{}: couldn’t resolve str case in {}; assuming {}'.format(get_current_entry()._base, phrase, CASE)) feats = [POS, NUM, CASE] nps = [] for lemma in phrase._words._lemmas: lemma = correct_lemma(lemma) for orth, tag in get_forms(lemma, correct_feats(lemma, feats)): mod_attrs = get_subst_attrs(lemma, tag) nps += make_modified_phrases(phrase, orth, NP, function, negativity, mod_attrs) return nps if ptype in (LexPrepNP, LexPrepGerP): phrase2 = phrase._prepnp if ptype == LexPrepNP else phrase._prepgerp prep, case = phrase2._prep._value, phrase2._prep._case._value if prep == 'jako': # ktoś aplikuje *jako ktoś* (kontrola przez podmiot) # ktoś rozpoznaje kogoś *jako kogoś* (kontrola przez dopełnienie) assert(controller) assert(controller_grammar) function = controller._function._value if controller._function else None control = True POS = lambda lemma: correct_pos(lemma, 'subst') if ptype == LexPrepNP else 'ger' NUM = controller_grammar[1] if control else lambda lemma: correct_num(lemma, phrase._number) CASE = correct_case(case, function) feats = [POS, NUM, CASE] if ptype == LexPrepGerP: feats += ['aff'] nps = [] for lemma in phrase._words._lemmas: lemma = correct_lemma(lemma) # eg. zaofiarować dobra:pl jako *coś* feats2 = [feats[0], feats[1] if lemma not in ('ktoś', 'coś') else 'sg'] + feats[2:] for orth, tag in get_forms(lemma, correct_feats(lemma, feats2, praep=True)): mod_attrs = get_subst_attrs(lemma, tag) nps += make_modified_phrases(phrase, orth, NP, function, negativity, mod_attrs) return [combine_with_prep(prep, np) for np in nps] if ptype in (LexNumP, LexPrepNumP): case = (phrase._nump if ptype == LexNumP else phrase._prepnump._prep)._case._value prep = (phrase._prepnump._prep._value + ' ') if ptype != LexNumP else '' CASE = correct_case(case, function) phrs = [] words = [] for word in phrase._words._lemmas: if word.startswith('E('): if word == 'E(_)': word = 'E(f.m1.m2.m3.n)' words += ['E({})'.format(gend) for gend in word.strip('E()').split('.')] else: words.append(word) for num in phrase._nums._lemmas: POS = correct_pos(num, 'num') for word in words: # wiele wody NUM = correct_num(num, 'pl') if word != 'woda' else 'sg' gend = get_gender(word) # gender before congr/rec to avoid empty result due to filtering priority feats = [POS, NUM, CASE] + gend + [correct_congr(num)] num_form = get_form(correct_num_lemma(num), feats) congr = num_form[1].split(':')[4] word_case = CASE if congr == 'congr' else 'gen' word_form = get_form(word, ['subst', NUM, word_case]) # back to digits if this is the case num_form = (num, num_form[1]) if num in NUM_LEMMA else num_form phr = '{} {}'.format(num_form[0], word_form[0]) if word_form[0] else num_form[0] mod_attrs = { 'num': NUM, 'case': CASE, 'gend' : gend } phrs += make_modified_phrases(phrase, phr, NumP, function, negativity, mod_attrs) return ['{}{}'.format(prep, phr) for phr in phrs] if ptype == AdjP: feats = ['adj', attrs['num'], attrs['case'], attrs['gend'], 'pos'] return ['‹{}›'.format(get_form('jakiś', feats)[0])] if ptype in (LexAdjP, LexPPasP, LexPActP): # np. uczynić coś *jakimś* / kobietę *jakąś* # w składni nie będzie controller_grammar #TODO test? (***) #if not function and controller and controller_grammar: # function = controller._function._value if controller._function else None # control = True if ptype == LexAdjP: phrase2 = phrase._adjp POS = lambda lemma: correct_pos(lemma, 'adj') elif ptype == LexPPasP: phrase2 = phrase._ppasp POS = 'ppas' else: phrase2 = phrase._pactp POS = 'pact' if phrase2._case._value == 'agr' and 'case' in attrs: case = attrs['case'] assert (case != 'agr') CASE = correct_case(case, function) else: if phrase2._case._value == 'pred': assert(controller) #assert(controller_grammar) CASE = correct_case(phrase2._case._value, function, negativity) # np. uczynić coś *jakimś* / kobietę *jakąś* # w składni nie będzie controller_grammar #TODO test? (***) if controller and controller_grammar: function = controller._function._value if controller._function else None control = True if phrase._number == 'agr' and 'num' in attrs: num = attrs['num'] assert (num != 'agr') NUM = lambda lemma: correct_num(lemma, num) elif control: NUM = controller_grammar[1] else: NUM = lambda lemma: correct_num(lemma, phrase._number) if phrase._gender == 'agr' and 'gend' in attrs: gend = attrs['gend'] assert (gend != 'agr') GEND = gend elif control: GEND = controller_grammar[0] else: GEND = correct_gend(phrase._gender) DEG = correct_deg(phrase._degree) if ptype == LexAdjP else '' feats = [POS, NUM, CASE, GEND, DEG] if ptype != LexAdjP: feats.append('aff') adjps = [] for lemma in phrase._words._lemmas: lemma_adjps = [] for orth, tag in get_forms(lemma, correct_feats(lemma, feats)): mod_attrs = get_subst_attrs(lemma, tag) lemma_adjps += make_modified_phrases(phrase, orth, AdjP, function, negativity, mod_attrs) adjps.append(lemma_adjps) return combine(phrase, adjps) if ptype in (LexPrepAdjP, LexPrepPPasP): phrase2 = phrase._prepadjp if ptype == LexPrepAdjP else phrase._prepppasp prep, case = phrase2._prep._value, phrase2._prep._case._value if prep == 'jako': # ktoś jawi się *jako jakiś* (kontrola przez podmiot) # ktoś rozpoznaje kogoś *jako jakiegoś* (kontrola przez dopełnienie) assert(controller) assert(controller_grammar) # np. uznawać kogoś *za jakiegoś* / coś *za jakieś* / facetów *za jakichś* # w składni nie będzie controller_grammar if controller and controller_grammar: function = controller._function._value if controller._function else None control = True if control: GEND, NUM = controller_grammar else: NUM = lambda lemma: correct_num(lemma, phrase._number) GEND = correct_gend(phrase._gender) CASE = correct_case(case, function) DEG = correct_deg(phrase._degree) if ptype == LexPrepAdjP else '' feats_adjp = ['adjp', CASE] feats_adj = ['adj', NUM, CASE, GEND, DEG] if ptype == LexPrepAdjP else ['ppas', NUM, CASE, GEND, DEG, correct_aff(phrase._negativity)] adjps = [] for lemma in phrase._words._lemmas: if case == 'postp': assert (ptype == LexPrepAdjP) try: orth, tag = get_form(lemma, feats_adjp) mod_attrs = {} except: orth, tag = get_form(lemma, feats_adj) mod_attrs = get_subst_attrs(lemma, tag) adjps = make_modified_phrases(phrase, orth, AdjP, function, negativity, mod_attrs) else: for orth, tag in get_forms(lemma, feats_adj): mod_attrs = get_subst_attrs(lemma, tag) adjps += make_modified_phrases(phrase, orth, AdjP, function, negativity, mod_attrs) return [combine_with_prep(prep, adjp) for adjp in adjps] if ptype == LexInfP: aspect = phrase._infp._aspect._value neg = correct_neg(phrase._negativity) sie = correct_sie(phrase._inherent_sie) POS = 'inf' feats = [POS] infps = [] for lemma in phrase._words._lemmas: for orth, tag in get_forms(lemma, feats): head = '{}{}{}'.format(neg, orth, sie) infps += make_modified_phrases(phrase, head, InfP, function, negativity, {}) return infps if ptype in (CP, NCP, PrepNCP): typ = phrase._type._value to = '' if ptype in (NCP, PrepNCP): case = phrase._case._value if ptype == NCP else phrase._prep._case._value to = TO[case] prep = '{} '.format(phrase._prep._value) if ptype == PrepNCP else '' conj = None if typ == 'żeby2': conj = 'że' if negativity != 'neg' else 'że/żeby' elif typ in ('int', 'rel') and phrase._type._realisations: conj = '/'.join(phrase._type._realisations) elif typ == 'int': conj = 'co/czy/ile/kto…' elif typ == 'rel': #ktory = get_form('który', ['adj', attrs['num'], 'nom', attrs['gend'], 'pos'])[0] #conj = '{}/co'.format(ktory) conj = 'co/gdzie/kto…' elif typ in ('gdy', 'jak', 'kiedy', 'że', 'żeby',): conj = typ if conj is not None: return ['{}{}, {} …'.format(prep, to, conj)] print('===========', typ) 1/0 # TODO order (się) if ptype in (LexCP, LexNCP): print(phrase) typ = (phrase._cp if ptype == LexCP else phrase._ncp)._type._value to = '' if ptype == LexCP else '{}, '.format(TO[phrase._ncp._case._value]) comp = '' if typ == 'żeby2': comp = 'żeby ' elif typ == 'gdy': comp = 'gdy ' elif typ not in ('int', 'rel',): comp = typ + ' ' neg = correct_neg(phrase._negativity) sie = correct_sie(phrase._inherent_sie) subj = None # dependent like „co”, „na kogo”, „który” – should go first first = [] # then pronouns: mi, ci etc., generic NP: ktoś/coś, LexQub: tylko etc. pron = [] rest = [] #print() realisations = (phrase._cp if ptype == LexCP else phrase._ncp)._type._realisations realisations = set(realisations) if realisations else set() for position in phrase._modification._dependents: #print('---') assert(len(position._phrases) == 1) dep_phrase = position._phrases[0] func = position._function._value if position._function else None dep_phr = (dep_phrase, make_phraseologisms(dep_phrase, func, phrase._negativity, {})) if func == 'subj': subj = dep_phr words = None if type(dep_phrase) == LexNumP: words = dep_phrase._nums._lemmas elif type(dep_phrase) == LexXP: words = dep_phrase._lex._words._lemmas elif hasattr(dep_phrase, '_words'): words = dep_phrase._words._lemmas #print(words) if words: realisations.difference_update(words) if words and {'co', 'gdzie', 'ile', 'jak', 'skąd', 'dokąd', 'który',}.intersection(words): first.append(dep_phr) elif func != 'subj': if (words and {'ja', 'ty', 'on', 'my', 'wy'}.intersection(words)) or type(dep_phrase) in (NP, LexQub): pron.append(dep_phr) else: rest.append(dep_phr) # all realisations should have been matched by modifications # TODO: assertion fails (but shouldn’t!) for lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))})) – nested ‘jaki’ # TODO: assertion fails for some phrases where the realisation is not among modifications, see (*****) in notes — correct those schemata #assert (not realisations) # TODO workaround: if realisations: # TODO workaround produces mess for ‘na jakim świecie żyje’ assert (len(realisations) == 1 and not first) first.append((None, list(realisations))) assert (len(first) == 1 or typ not in ('int',)) #print() deps1 = [d[1] for d in first] + [d[1] for d in pron] if subj and subj not in first + rest: deps1.append(subj[1]) deps2 = [d[1] for d in rest] # TODO: always ter? sg/m1 if no subj? # TODO separate numbers/genders for subject realisations? subj_num = 'sg' if subj and hasattr(subj[0], '_number'): subj_num = correct_num('', subj[0]._number) if typ != 'jakby': feats = ['fin', subj_num, 'ter'] else: subj_gend = 'm1' if subj: subj_gends = set(get_gender(w)[0] for w in subj[0]._words._lemmas) assert (len(subj_gends) == 1) subj_gend = subj_gends.pop() feats = ['praet', subj_num, subj_gend] phrs = [] for lemma in phrase._words._lemmas: for dps1 in product(*deps1): for dps2 in product(*deps2): verb_form = get_form(lemma, feats)[0] phrs.append('{}{}{}{}{}{}{}{}{}'.format(to, comp, ' '.join(dps1), ' ' if dps1 else '', sie, neg, verb_form, ' ' if dps2 else '', ' '.join(dps2))) for phr in phrs: print(' ===>', phr) return phrs if ptype in (XP, AdvP): if phrase._category._limitations: return chain.from_iterable(make_phraseologisms(phr, function, negativity, {}) for phr in phrase._category._limitations) else: return [XP_SEM_PHRASEO[phrase._category._value]] if ptype == LexXP: return make_phraseologisms(phrase._lex, function, negativity, {}, controller=controller, controller_grammar=controller_grammar) if ptype == LexAdvP: POS = 'adv' DEG = correct_deg(phrase._degree) feats = [POS, DEG] advps = [] for lemma in phrase._words._lemmas: for orth, tag in get_forms(lemma, feats): advps += make_modified_phrases(phrase, orth, AdvP, function, negativity, {}) return advps if ptype == Compar: return ['{} …'.format(phrase._category._value)] if ptype == LexCompar: comp = phrase._compar._category._value lex_phrs = [] for lex in phrase._lexes: # TODO case (determined by function?) will depend on the control # TODO ‹padać jak rażony piorunem› – no control, compar has str case, should be nom, gets resolved to acc... phrs = make_phraseologisms(lex, function, negativity, {}, controller=controller, controller_grammar=controller_grammar) lex_phrs.append(phrs) return ['{} {}'.format(comp, ' '.join(phrs)) for phrs in product(*lex_phrs)] if ptype == PossP: feats = ['adj', attrs['num'], attrs['case'], attrs['gend'], 'pos'] forms = [get_form(lemma, feats) for lemma in ('mój', 'pański')] return ['/'.join(o for o, t in forms) + '/Anny/…'] if ptype == OR: return '„…”' if ptype == Fixed: return [phrase._text.strip('\'')] if ptype == LexQub: qubs = [] for orth in phrase._words._lemmas: qubs += make_modified_phrases(phrase, orth, Qub, function, negativity, {}) return qubs print(ptype) 1/0 return ['TODO'] def make_modified_phrases(phrase, head, head_type, function, negativity, mod_attrs): texts = [] if phrase._modification is not None and phrase._modification._atr != 'natr': for mod_list in make_modifications(phrase._modification, function, negativity, mod_attrs): if phrase._modification._atr == 'ratr1': for mod_ptype, mod in mod_list: texts.append(build_phrase(head, mod, head_type, mod_ptype, order_override=phrase._modification._order)) elif phrase._modification._atr == 'atr1': for mod_ptype, mod in mod_list: texts.append(build_phrase(head, '({})'.format(mod), head_type, mod_ptype, order_override=phrase._modification._order)) elif phrase._modification._atr == 'ratr': #for mod_list2 in powerset_nonempty(mod_list): # p = head # for mod_ptype, mod in mod_list2: # p = build_phrase(p, '{}'.format(mod), head_type, mod_ptype) # texts.append(p) p = head for mod_ptype, mod in mod_list: p = build_phrase(p, '{}'.format(mod), head_type, mod_ptype, order_override=phrase._modification._order) texts.append(p) elif phrase._modification._atr == 'atr': p = head for mod_ptype, mod in mod_list: p = build_phrase(p, '({})'.format(mod), head_type, mod_ptype, order_override=phrase._modification._order) texts.append(p) else: print(phrase._modification._atr) 1/0 else: texts.append(head) return uniq_list(texts) # TODO is the ‘first’ heuristic for choosing phrase type enough? def make_coordinations(mods): ptype = mods[0][0] ret = [(ptype, ' i/lub '.join(text for _, text in x)) for x in powerset_nonempty(mods)] return ret def make_modifications(modification, function, negativity, attrs): mods = [] for position in modification._dependents: position_mods = [] for p in position._phrases: mod = make_phraseologisms(p, function, negativity, attrs=attrs) # pass the mod phrase’s type for determining text order position_mods.append([(type(p), m) for m in mod]) if len(position_mods) > 1: mods.append(chain.from_iterable(make_coordinations(mds) for mds in product(*position_mods))) else: mods.append(position_mods[0]) return list(product(*mods)) def make_ul(items): return '<ul>{}</ul>'.format(''.join(map('<li>{}</li>'.format, items)))