Skip to content
Snippets Groups Projects
Commit 9394b990 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

config for KPWr phrases

parent 1433a409
Branches
No related merge requests found
# Unigram
# orth
U00:%x[-2,0]
U01:%x[-1,0]
U02:%x[0,0]
U03:%x[1,0]
U04:%x[2,0]
U05:%x[-1,0]/%x[0,0]
U06:%x[0,0]/%x[1,0]
# class
U10:%x[-2,1]
U11:%x[-1,1]
U12:%x[0,1]
U13:%x[1,1]
U14:%x[2,1]
U15:%x[-2,1]/%x[-1,1]
U16:%x[-1,1]/%x[0,1]
U17:%x[0,1]/%x[1,1]
U18:%x[1,1]/%x[2,1]
# cas
U20:%x[-2,2]
U21:%x[-1,2]
U22:%x[0,2]
U23:%x[1,2]
U24:%x[2,2]
# gnd
U30:%x[-2,3]
U31:%x[-1,3]
U32:%x[0,3]
U33:%x[1,3]
U34:%x[2,3]
# nmb
U40:%x[-2,4]
U41:%x[-1,4]
U42:%x[0,4]
U43:%x[1,4]
U44:%x[2,4]
# agr
U50:%x[-1,5] # agr(0,1) -> agr(-1,0)
U51:%x[0,5] # agr(0,1)
U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0)
U53:%x[0,6] # (-1,1)
U54:%x[1,6] # ... -> (0,2)
# regex feats
#U60:%x[-1,7]/%x[-1,8]
U61:%x[0,7]/%x[0,8]
#U62:%x[1,7]/%x[1,8]
# wordclass trigrams
U80:%x[-2,1]/%x[-1,1]/%x[0,1]
U81:%x[-1,1]/%x[0,1]/%x[1,1]
U82:%x[0,1]/%x[1,1]/%x[2,1]
# Bigram
B
# Unigram
# orth
U00:%x[-2,0]
U01:%x[-1,0]
U02:%x[0,0]
U03:%x[1,0]
U04:%x[2,0]
U05:%x[-1,0]/%x[0,0]
U06:%x[0,0]/%x[1,0]
# class
U10:%x[-2,1]
U11:%x[-1,1]
U12:%x[0,1]
U13:%x[1,1]
U14:%x[2,1]
U15:%x[-2,1]/%x[-1,1]
U16:%x[-1,1]/%x[0,1]
U17:%x[0,1]/%x[1,1]
U18:%x[1,1]/%x[2,1]
# cas
U20:%x[-2,2]
U21:%x[-1,2]
U22:%x[0,2]
U23:%x[1,2]
U24:%x[2,2]
# gnd
U30:%x[-2,3]
U31:%x[-1,3]
U32:%x[0,3]
U33:%x[1,3]
U34:%x[2,3]
# nmb
U40:%x[-2,4]
U41:%x[-1,4]
U42:%x[0,4]
U43:%x[1,4]
U44:%x[2,4]
# agr
U50:%x[-1,5] # agr(0,1) -> agr(-1,0)
U51:%x[0,5] # agr(0,1)
U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0)
U53:%x[0,6] # (-1,1)
U54:%x[1,6] # ... -> (0,2)
# regex feats
#U60:%x[-1,7]/%x[-1,8]
U61:%x[0,7]/%x[0,8]
#U62:%x[1,7]/%x[1,8]
# wordclass trigrams
U80:%x[-2,1]/%x[-1,1]/%x[0,1]
U81:%x[-1,1]/%x[0,1]/%x[1,1]
U82:%x[0,1]/%x[1,1]/%x[2,1]
# Bigram
B
@ "default" (
orth[0]; // 0
class[0]; // 1
cas[0]; // 2
gnd[0]; // 3
nmb[0]; // 4
agrpp(0,1,{nmb,gnd,cas}); // 5
and(inside(-1), inside(1), wagr(-1,1,{nmb,gnd,cas})); // 6
regex(orth[0], "\\P{Ll}.*"); regex(orth[0], "\\P{Lu}.*") // 7, 8
)
/*
@ "chunk_np" (
iob(0, "chunk_agp"); // 9
iob(0, "chunk_vp") // 10
)
@ "chunk_adjp" (
iob(0, chunk_agp), // 9
iob(0, chunk_vp), // 10
iob(0, chunk_np) // 11
)
*/
; Configuration for chunking phrases defined in KPWr:
; * chunk_np (noun phrases),
; * chunk_adjp (top-level adjective phrases),
; * chunk_vp (verb phrases without nominal arguments),
; * chunk_agp (simple agreement-based noun or adj phrases, level on its own).
; The config assumes NKJP tagset.
[general]
tagset = nkjp
tagged = yes
[layers]
; the layer ordering is inferred from alphabetical order of their names!
layer1 = chunk_agp
layer2 = chunk_vp,chunk_np,chunk_adjp
[crf]
params = -a CRF-L2
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment