diff --git a/Dockerfile b/Dockerfile
index 65b248deda192b5688ffe24476602661759be09c..60a83f1fc9cc028bfb036042b46b078ed75f07a8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -103,5 +103,4 @@ COPY stopwords.list stopwords.list
 COPY resp-cmd.ini resp-cmd.ini
 COPY p4.ccl p4.ccl
 COPY start.sh start.sh
-
-
+COPY text.ccl text.ccl
diff --git a/resp-cmd.ini b/resp-cmd.ini
index b37f215dd1e28d67e74bf6855eae30cd9400cad8..35c4b08e115a7c888189df043313c1106dc4fe6d 100644
--- a/resp-cmd.ini
+++ b/resp-cmd.ini
@@ -77,7 +77,7 @@ filename = resp.tsv
 
 [resp.documents]
 extractors = sin_ext
-documents = p4.ccl
+documents = text.ccl
 tagset = nkjp
 document_id_generator = docid
 
diff --git a/resp/resp/data_structures/table/processors/remove_substrings.py b/resp/resp/data_structures/table/processors/remove_substrings.py
index b203630b9296a9eb0fedf853b6d894b3eb93ab7c..0ecd176b8dd237ef557662c1ebec2d3bd8fb8215 100644
--- a/resp/resp/data_structures/table/processors/remove_substrings.py
+++ b/resp/resp/data_structures/table/processors/remove_substrings.py
@@ -19,6 +19,7 @@ from __future__ import absolute_import, unicode_literals, division
 
 
 import logging
+from collections import defaultdict
 
 from ...base import DataStructureProcessorBase
 from ....utils.config import fields as conff
@@ -65,6 +66,70 @@ class TableRemoveSubStrings(DataStructureProcessorBase):
         # storage is not used here
         removed_idxs = set()
         
+	index = defaultdict(list)
+	for idx, rec in enumerate(data_structure):			
+	    lemmas = rec.lemma.split()
+	    for lemma in lemmas:
+		index[lemma].append((idx, rec))
+
+	for lemma, recs in index.iteritems():
+	    if len(recs) > 10:
+		continue
+	    for idx1, rec1 in recs:
+		if idx1 in removed_idxs:
+		    continue
+
+		next_idx = idx1 + 1
+		gval1 = (rec1[self.group_by]
+			 if self.group_by is not None
+			 else None)
+
+		for idx2, rec2 in recs:
+		    if idx2 in removed_idxs:
+			continue
+
+		    gval2 = (rec2[self.group_by]
+			     if self.group_by is not None
+			     else None)
+
+		    # Do not compare substrings from different groups (if grouping
+		    # is active)
+		    if gval1 != gval2:
+			continue
+
+		    for kname in self.key_columns:
+			kval1 = rec1[kname]
+			kval2 = rec2[kname]
+
+			# Do not remove complete duplicates
+			if kval1 == kval2:
+			    continue
+
+			if kval1 in kval2:
+			    _log.debug(
+				'Removing “%s” ∈ “%s”',
+				UStr(kval1),
+				UStr(kval2),
+			    )
+			    removed_idxs.add(idx1)
+			    break
+			elif kval2 in kval1:
+			    _log.debug(
+				'Removing “%s” ∈ “%s”',
+				UStr(kval2),
+				UStr(kval1),
+			    )
+			    removed_idxs.add(idx2)
+			    break
+
+        data_structure.remove_rows(*removed_idxs)
+        _log.debug('Done removing sub-strings')
+
+    def process_old(self, data_structure, storage):
+        _log.debug('Removing sub-strings on key columns %r', self.key_columns)
+        # storage is not used here
+        removed_idxs = set()
+        
         for idx1, rec1 in enumerate(data_structure):
             if idx1 in removed_idxs:
                 continue