Skip to content
Snippets Groups Projects
Commit 72e211b3 authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

auto-split huge sentences in nkjp config

parent 1df6ebd6
No related branches found
No related tags found
No related merge requests found
...@@ -125,6 +125,13 @@ ...@@ -125,6 +125,13 @@
process_types=t process_types=t
type:ts=\p{L}*\P{L}.* type:ts=\p{L}*\P{L}.*
[layer:long_sent]
; max sent in NKJP: 104 tokens
; assuming 1.5 * 104 as upper limit to be on the safe side
class=check
max_sentence_size=156
huge_sentence_split=1
[layers] [layers]
layer=exc_0 layer=exc_0
layer=suff_safe layer=suff_safe
...@@ -138,6 +145,7 @@ ...@@ -138,6 +145,7 @@
layer=th_classify layer=th_classify
layer=hyphen layer=hyphen
layer=ts_classify layer=ts_classify
layer=long_sent
[debug] [debug]
format=$orth/$type:$ws\n format=$orth/$type:$ws\n
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment