Commit 72e211b3 authored by Adam Radziszewski's avatar Adam Radziszewski

auto-split huge sentences in nkjp config

parent 1df6ebd6
......@@ -125,6 +125,13 @@
process_types=t
type:ts=\p{L}*\P{L}.*
[layer:long_sent]
; max sent in NKJP: 104 tokens
; assuming 1.5 * 104 as upper limit to be on the safe side
class=check
max_sentence_size=156
huge_sentence_split=1
[layers]
layer=exc_0
layer=suff_safe
......@@ -138,6 +145,7 @@
layer=th_classify
layer=hyphen
layer=ts_classify
layer=long_sent
[debug]
format=$orth/$type:$ws\n
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment