From ba8acc86771fec803d2e39ee59373d12c01156ff Mon Sep 17 00:00:00 2001
From: Piotr <piotr.m.przybyla@gmail.com>
Date: Wed, 19 Oct 2022 09:44:19 +0200
Subject: [PATCH] LAMBO integration working.

---
 combo/utils/lambo.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/combo/utils/lambo.py b/combo/utils/lambo.py
index 990b03f..75284ef 100644
--- a/combo/utils/lambo.py
+++ b/combo/utils/lambo.py
@@ -6,7 +6,7 @@ from lambo.segmenter.lambo import Lambo
 
 class LamboTokenizer(Tokenizer):
 
-    def __init__(self, model: str = "en",) -> None:
+    def __init__(self, model: str = "LAMBO_no_pretraining-UD_Polish-PDB",) -> None:
         self.lambo=Lambo.get(model)
 
     # Simple tokenisation: ignoring sentence split
@@ -20,13 +20,13 @@ class LamboTokenizer(Tokenizer):
         return result
     
     # Full segmentation: divide into sentences and tokens
-    def segment(self, text: str) -> List[List[Token]]:
+    def segment(self, text: str) -> List[List[str]]:
         result = []
         document = self.lambo.segment(text)
         for turn in document.turns:
             for sentence in turn.sentences:
                 resultS=[]
                 for token in sentence.tokens:
-                    resultS.append(Token(token.text))
+                    resultS.append(token.text)
                 result.append(resultS)
         return result
\ No newline at end of file
-- 
GitLab