Skip to content
Snippets Groups Projects
Commit 3ea63ada authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

add nkjp-coarse config

parent ecbf6f27
Branches
No related merge requests found
# Unigram
# orth
U00:%x[-2,0]
U01:%x[-1,0]
U02:%x[0,0]
U03:%x[1,0]
U04:%x[2,0]
U05:%x[-1,0]/%x[0,0]
U06:%x[0,0]/%x[1,0]
# class
U10:%x[-2,1]
U11:%x[-1,1]
U12:%x[0,1]
U13:%x[1,1]
U14:%x[2,1]
U15:%x[-2,1]/%x[-1,1]
U16:%x[-1,1]/%x[0,1]
U17:%x[0,1]/%x[1,1]
U18:%x[1,1]/%x[2,1]
# cas
U20:%x[-2,2]
U21:%x[-1,2]
U22:%x[0,2]
U23:%x[1,2]
U24:%x[2,2]
# gnd
U30:%x[-2,3]
U31:%x[-1,3]
U32:%x[0,3]
U33:%x[1,3]
U34:%x[2,3]
# nmb
U40:%x[-2,4]
U41:%x[-1,4]
U42:%x[0,4]
U43:%x[1,4]
U44:%x[2,4]
# agr
U50:%x[-1,5] # agr(0,1) -> agr(-1,0)
U51:%x[0,5] # agr(0,1)
U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0)
U53:%x[0,6] # (-1,1)
U54:%x[1,6] # ... -> (0,2)
# regex feats
#U60:%x[-1,7]/%x[-1,8]
U61:%x[0,7]/%x[0,8]
#U62:%x[1,7]/%x[1,8]
# wordclass trigrams
U80:%x[-2,1]/%x[-1,1]/%x[0,1]
U81:%x[-1,1]/%x[0,1]/%x[1,1]
U82:%x[0,1]/%x[1,1]/%x[2,1]
# Bigram
B
@ "default" (
orth[0]; // 0
class[0]; // 1
cas[0]; // 2
gnd[0]; // 3
nmb[0]; // 4
agrpp(0,1,{nmb,gnd,cas}); // 5
and(inside(-1), inside(1), wagr(-1,1,{nmb,gnd,cas})); // 6
regex(orth[0], "\\P{Ll}.*"); regex(orth[0], "\\P{Lu}.*") // 7, 8
)
; Configuration for chunking of phrases taken from NKJP but subjected to merging.
; NKJP tagset.
;
; NP is merged from the following groups:
; * actual nominal groups (NG, NGadres, NGdata, NGgodz),
; * numeral groups (NumG*),
; prepositional-nominal and prepositional-numeral groups (PrepNG, PrepNumG, PrepNGadres, PrepNGb, PrepNGdata, PrepNGgodz, PrepNGp).
;
; AdjP is taken from top-level adjective and prep-adj groups (TODO: enumerate).
;
; VP is taken from syntactic words having verbal classes.
;
; There is only one layer for all the groups.
[general]
tagset = nkjp
tagged = yes
[layers]
; the layer ordering is inferred from alphabetical order of their names!
; channel names should contain no hyphens
layer1 = chunk_vp,chunk_np,chunk_adjp
[crf]
params = -a CRF-L2
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment