From 3ea63adaecefaefc5a551eea009d8810a0ba047b Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Wed, 4 Jul 2012 10:31:21 +0200
Subject: [PATCH] add nkjp-coarse config

---
 iobber/data/nkjp-coarse-layer1.txt | 61 ++++++++++++++++++++++++++++++
 iobber/data/nkjp-coarse.ccl        | 11 ++++++
 iobber/data/nkjp-coarse.ini        | 26 +++++++++++++
 3 files changed, 98 insertions(+)
 create mode 100644 iobber/data/nkjp-coarse-layer1.txt
 create mode 100644 iobber/data/nkjp-coarse.ccl
 create mode 100644 iobber/data/nkjp-coarse.ini

diff --git a/iobber/data/nkjp-coarse-layer1.txt b/iobber/data/nkjp-coarse-layer1.txt
new file mode 100644
index 0000000..b9e5ddd
--- /dev/null
+++ b/iobber/data/nkjp-coarse-layer1.txt
@@ -0,0 +1,61 @@
+# Unigram
+# orth
+U00:%x[-2,0]
+U01:%x[-1,0]
+U02:%x[0,0]
+U03:%x[1,0]
+U04:%x[2,0]
+U05:%x[-1,0]/%x[0,0]
+U06:%x[0,0]/%x[1,0]
+
+# class
+U10:%x[-2,1]
+U11:%x[-1,1]
+U12:%x[0,1]
+U13:%x[1,1]
+U14:%x[2,1]
+U15:%x[-2,1]/%x[-1,1]
+U16:%x[-1,1]/%x[0,1]
+U17:%x[0,1]/%x[1,1]
+U18:%x[1,1]/%x[2,1]
+
+# cas
+U20:%x[-2,2]
+U21:%x[-1,2]
+U22:%x[0,2]
+U23:%x[1,2]
+U24:%x[2,2]
+
+# gnd
+U30:%x[-2,3]
+U31:%x[-1,3]
+U32:%x[0,3]
+U33:%x[1,3]
+U34:%x[2,3]
+
+# nmb
+U40:%x[-2,4]
+U41:%x[-1,4]
+U42:%x[0,4]
+U43:%x[1,4]
+U44:%x[2,4]
+
+# agr
+U50:%x[-1,5] # agr(0,1) -> agr(-1,0)
+U51:%x[0,5] # agr(0,1)
+U52:%x[-1,6] # agr..(-1,1) -> agr(-2,0)
+U53:%x[0,6] # (-1,1)
+U54:%x[1,6] # ... -> (0,2)
+
+# regex feats
+#U60:%x[-1,7]/%x[-1,8]
+U61:%x[0,7]/%x[0,8]
+#U62:%x[1,7]/%x[1,8]
+
+# wordclass trigrams
+U80:%x[-2,1]/%x[-1,1]/%x[0,1]
+U81:%x[-1,1]/%x[0,1]/%x[1,1]
+U82:%x[0,1]/%x[1,1]/%x[2,1]
+
+# Bigram
+B
diff --git a/iobber/data/nkjp-coarse.ccl b/iobber/data/nkjp-coarse.ccl
new file mode 100644
index 0000000..b6c2d15
--- /dev/null
+++ b/iobber/data/nkjp-coarse.ccl
@@ -0,0 +1,11 @@
+@ "default" (
+   orth[0];  // 0
+   class[0]; // 1
+   cas[0];   // 2
+   gnd[0];   // 3
+   nmb[0];   // 4
+   agrpp(0,1,{nmb,gnd,cas}); // 5
+   and(inside(-1), inside(1), wagr(-1,1,{nmb,gnd,cas})); // 6
+   regex(orth[0], "\\P{Ll}.*"); regex(orth[0], "\\P{Lu}.*") // 7, 8
+)
+
diff --git a/iobber/data/nkjp-coarse.ini b/iobber/data/nkjp-coarse.ini
new file mode 100644
index 0000000..0062c8d
--- /dev/null
+++ b/iobber/data/nkjp-coarse.ini
@@ -0,0 +1,26 @@
+; Configuration for chunking of phrases taken from NKJP but subjected to merging.
+; NKJP tagset.
+;
+; NP is merged from the following groups:
+; * actual nominal groups (NG, NGadres, NGdata, NGgodz),
+; * numeral groups (NumG*),
+; prepositional-nominal and prepositional-numeral groups (PrepNG, PrepNumG, PrepNGadres, PrepNGb, PrepNGdata, PrepNGgodz, PrepNGp).
+;
+; AdjP is taken from top-level adjective and prep-adj groups (TODO: enumerate).
+;
+; VP is taken from syntactic words having verbal classes.
+;
+; There is only one layer for all the groups.
+
+[general]
+tagset   = nkjp
+tagged   = yes
+
+[layers]
+; the layer ordering is inferred from alphabetical order of their names!
+; channel names should contain no hyphens
+layer1   = chunk_vp,chunk_np,chunk_adjp
+
+[crf]
+params   = -a CRF-L2
+
-- 
GitLab