diff --git a/combo/data/tokenizers/lambo_tokenizer.py b/combo/data/tokenizers/lambo_tokenizer.py
index e0beb838ec2ec31779a53a8917d3bb8be4d8d882..e88f098393058ec3fa7ffabbd05ff6d22e528fa6 100644
--- a/combo/data/tokenizers/lambo_tokenizer.py
+++ b/combo/data/tokenizers/lambo_tokenizer.py
@@ -28,7 +28,7 @@ def _sentence_tokens(token: Token,
                      split_subwords: Optional[bool] = None) -> List[Token]:
     if split_subwords and len(token.subwords) > 0:
         subword_idxs = [next(_token_idx()) for _ in range(len(token.subwords))]
-        multiword = (token.text, (subword_idxs[0], subword_idxs[1]))
+        multiword = (token.text, (subword_idxs[0], subword_idxs[-1]))
         tokens = [Token(idx=s_idx, text=subword, multiword=multiword) for (s_idx, subword)
                   in zip(subword_idxs, token.subwords)]
         return tokens
@@ -74,12 +74,14 @@ class LamboTokenizer(Tokenizer):
             for turn in document.turns:
                 sentence_tokens = []
                 for sentence in turn.sentences:
+                    _reset_idx()
                     for token in sentence.tokens:
                         sentence_tokens.extend(_sentence_tokens(token, split_subwords))
                 tokens.append(sentence_tokens)
         elif split_level.upper() == "SENTENCE":
             for turn in document.turns:
                 for sentence in turn.sentences:
+                    _reset_idx()
                     sentence_tokens = []
                     for token in sentence.tokens:
                         sentence_tokens.extend(_sentence_tokens(token, split_subwords))
@@ -87,6 +89,7 @@ class LamboTokenizer(Tokenizer):
         else:
             for turn in document.turns:
                 for sentence in turn.sentences:
+                    _reset_idx()
                     for token in sentence.tokens:
                         tokens.extend(_sentence_tokens(token, split_subwords))
             tokens = [tokens]
@@ -116,13 +119,32 @@ class LamboTokenizer(Tokenizer):
             if turns:
                 sentence_tokens = []
             for sentence in turn.sentences:
+                _reset_idx()
                 if not turns:
                     sentence_tokens = []
                 for token in sentence.tokens:
                     if len(token.subwords) > 0 and split_subwords:
-                        sentence_tokens.extend([s for s in token.subwords])
-                    else:
-                        sentence_tokens.append(token.text)
+                        # @TODO this is a very dirty fix for Lambo model's shortcomings
+                        # I noticed that for longer words with multiwords it tends to remove the last letter in the last multiword
+                        # so this is a quick workaround to fix it
+
+                        # check if subwords in token.subwords are consistent with token.text
+                        if "".join(token.subwords) != token.text:
+                            fixed_subwords = []
+                            text_it = 0
+                            for i, subword in enumerate(token.subwords):
+                                if token.text[text_it:text_it + len(subword)] == subword:
+                                    if i == len(token.subwords) - 1 and (text_it + len(subword) < len(token.text)):
+                                        subword = token.text[text_it:]
+                                    fixed_subwords.append(subword)
+                                    text_it += len(subword)
+                                else:
+                                    fixed_subwords.append(token.text[text_it:text_it + len(subword)])
+                                    text_it += len(subword)
+                            token.subwords = fixed_subwords
+                        # sentence_tokens.extend(_sentence_tokens(token, split_subwords))
+                    # else:
+                    sentence_tokens.extend(_sentence_tokens(token, split_subwords))
                 if not turns:
                     sentences.append(sentence_tokens)
             if turns: