diff --git a/combo/data/tokenizers/lambo_tokenizer.py b/combo/data/tokenizers/lambo_tokenizer.py
index c5f4451034ec614cacfc08429be9bdbd644b7229..8d4e4e6c8faca685f0554bab031c308288bdc97b 100644
--- a/combo/data/tokenizers/lambo_tokenizer.py
+++ b/combo/data/tokenizers/lambo_tokenizer.py
@@ -43,9 +43,16 @@ class LamboTokenizer(Tokenizer):
 
         document = self.__tokenizer.segment(text)
         sentences = []
+        sentence_tokens = []
 
         for turn in document.turns:
             for sentence in turn.sentences:
-                sentences.append([t.text for t in sentence.tokens])
+                sentence_tokens = []
+                for token in sentence.tokens:
+                    if len(token.subwords) > 0:
+                        sentence_tokens.extend([s for s in token.subwords])
+                    else:
+                        sentence_tokens.append(token.text)
+                sentences.append(sentence_tokens)
 
         return sentences