From 9394b990216bef1d035f4f710d0dccddab8c0134 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Thu, 19 Apr 2012 13:05:14 +0200 Subject: [PATCH] config for KPWr phrases --- config/kpwr-layer1.txt | 61 ++++++++++++++++++++++++++++++++++++++++++ config/kpwr-layer2.txt | 61 ++++++++++++++++++++++++++++++++++++++++++ config/kpwr.ccl | 23 ++++++++++++++++ config/kpwr.ini | 19 +++++++++++++ 4 files changed, 164 insertions(+) create mode 100644 config/kpwr-layer1.txt create mode 100644 config/kpwr-layer2.txt create mode 100644 config/kpwr.ccl create mode 100644 config/kpwr.ini diff --git a/config/kpwr-layer1.txt b/config/kpwr-layer1.txt new file mode 100644 index 0000000..b9e5ddd --- /dev/null +++ b/config/kpwr-layer1.txt @@ -0,0 +1,61 @@ +# Unigram +# orth +U00:%x[-2,0] +U01:%x[-1,0] +U02:%x[0,0] +U03:%x[1,0] +U04:%x[2,0] +U05:%x[-1,0]/%x[0,0] +U06:%x[0,0]/%x[1,0] + +# class +U10:%x[-2,1] +U11:%x[-1,1] +U12:%x[0,1] +U13:%x[1,1] +U14:%x[2,1] +U15:%x[-2,1]/%x[-1,1] +U16:%x[-1,1]/%x[0,1] +U17:%x[0,1]/%x[1,1] +U18:%x[1,1]/%x[2,1] + +# cas +U20:%x[-2,2] +U21:%x[-1,2] +U22:%x[0,2] +U23:%x[1,2] +U24:%x[2,2] + +# gnd +U30:%x[-2,3] +U31:%x[-1,3] +U32:%x[0,3] +U33:%x[1,3] +U34:%x[2,3] + +# nmb +U40:%x[-2,4] +U41:%x[-1,4] +U42:%x[0,4] +U43:%x[1,4] +U44:%x[2,4] + +# agr +U50:%x[-1,5] # agr(0,1) -> agr(-1,0) +U51:%x[0,5] # agr(0,1) +U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0) +U53:%x[0,6] # (-1,1) +U54:%x[1,6] # ... -> (0,2) + +# regex feats +#U60:%x[-1,7]/%x[-1,8] +U61:%x[0,7]/%x[0,8] +#U62:%x[1,7]/%x[1,8] + +# wordclass trigrams +U80:%x[-2,1]/%x[-1,1]/%x[0,1] +U81:%x[-1,1]/%x[0,1]/%x[1,1] +U82:%x[0,1]/%x[1,1]/%x[2,1] + +# Bigram +B diff --git a/config/kpwr-layer2.txt b/config/kpwr-layer2.txt new file mode 100644 index 0000000..b9e5ddd --- /dev/null +++ b/config/kpwr-layer2.txt @@ -0,0 +1,61 @@ +# Unigram +# orth +U00:%x[-2,0] +U01:%x[-1,0] +U02:%x[0,0] +U03:%x[1,0] +U04:%x[2,0] +U05:%x[-1,0]/%x[0,0] +U06:%x[0,0]/%x[1,0] + +# class +U10:%x[-2,1] +U11:%x[-1,1] +U12:%x[0,1] +U13:%x[1,1] +U14:%x[2,1] +U15:%x[-2,1]/%x[-1,1] +U16:%x[-1,1]/%x[0,1] +U17:%x[0,1]/%x[1,1] +U18:%x[1,1]/%x[2,1] + +# cas +U20:%x[-2,2] +U21:%x[-1,2] +U22:%x[0,2] +U23:%x[1,2] +U24:%x[2,2] + +# gnd +U30:%x[-2,3] +U31:%x[-1,3] +U32:%x[0,3] +U33:%x[1,3] +U34:%x[2,3] + +# nmb +U40:%x[-2,4] +U41:%x[-1,4] +U42:%x[0,4] +U43:%x[1,4] +U44:%x[2,4] + +# agr +U50:%x[-1,5] # agr(0,1) -> agr(-1,0) +U51:%x[0,5] # agr(0,1) +U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0) +U53:%x[0,6] # (-1,1) +U54:%x[1,6] # ... -> (0,2) + +# regex feats +#U60:%x[-1,7]/%x[-1,8] +U61:%x[0,7]/%x[0,8] +#U62:%x[1,7]/%x[1,8] + +# wordclass trigrams +U80:%x[-2,1]/%x[-1,1]/%x[0,1] +U81:%x[-1,1]/%x[0,1]/%x[1,1] +U82:%x[0,1]/%x[1,1]/%x[2,1] + +# Bigram +B diff --git a/config/kpwr.ccl b/config/kpwr.ccl new file mode 100644 index 0000000..c97321a --- /dev/null +++ b/config/kpwr.ccl @@ -0,0 +1,23 @@ +@ "default" ( + orth[0]; // 0 + class[0]; // 1 + cas[0]; // 2 + gnd[0]; // 3 + nmb[0]; // 4 + agrpp(0,1,{nmb,gnd,cas}); // 5 + and(inside(-1), inside(1), wagr(-1,1,{nmb,gnd,cas})); // 6 + regex(orth[0], "\\P{Ll}.*"); regex(orth[0], "\\P{Lu}.*") // 7, 8 +) + +/* +@ "chunk_np" ( + iob(0, "chunk_agp"); // 9 + iob(0, "chunk_vp") // 10 +) + +@ "chunk_adjp" ( + iob(0, chunk_agp), // 9 + iob(0, chunk_vp), // 10 + iob(0, chunk_np) // 11 +) +*/ diff --git a/config/kpwr.ini b/config/kpwr.ini new file mode 100644 index 0000000..0b4f6ff --- /dev/null +++ b/config/kpwr.ini @@ -0,0 +1,19 @@ +; Configuration for chunking phrases defined in KPWr: +; * chunk_np (noun phrases), +; * chunk_adjp (top-level adjective phrases), +; * chunk_vp (verb phrases without nominal arguments), +; * chunk_agp (simple agreement-based noun or adj phrases, level on its own). +; The config assumes NKJP tagset. + +[general] +tagset = nkjp +tagged = yes + +[layers] +; the layer ordering is inferred from alphabetical order of their names! +layer1 = chunk_agp +layer2 = chunk_vp,chunk_np,chunk_adjp + +[crf] +params = -a CRF-L2 + -- GitLab