diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cbaa9dab3608c4b012b473b40beac084647aadd6..3b937f6d286640452a10a69774a2443155591412 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,7 +1,7 @@
 PROJECT(corpus2)
 
 set(corpus2_ver_major "1")
-set(corpus2_ver_minor "8")
+set(corpus2_ver_minor "9")
 set(corpus2_ver_patch "0")
 set(CORPUS2_VERSION "${corpus2_ver_major}.${corpus2_ver_minor}.${corpus2_ver_patch}")
 
diff --git a/src/libcorpus2/lexeme.cpp b/src/libcorpus2/lexeme.cpp
index b6b6650956e74bdf439995cd6975f0982d4c41b3..2fcfea3334b75780faf6370ff9ee0660df7125be 100644
--- a/src/libcorpus2/lexeme.cpp
+++ b/src/libcorpus2/lexeme.cpp
@@ -64,4 +64,13 @@ bool Lexeme::DisamblessComparator::operator()(const Lexeme& l, const Lexeme& oth
 	return l.lemma_ == other.lemma_	&& l.tag_ == other.tag_;
 }
 
+size_t hash_value(const Lexeme &lexeme)
+{
+	std::size_t seed = 0;
+	boost::hash_combine(seed, lexeme.lemma_utf8());
+	boost::hash_combine(seed, lexeme.tag());
+	boost::hash_combine(seed, lexeme.is_disamb());
+	return seed;
+}
+
 } /* end ns Corpus2 */
diff --git a/src/libcorpus2/lexeme.h b/src/libcorpus2/lexeme.h
index 8b9084c3aafbd97e38adbe5d2a8d63e97bfc327d..48189685606b02cdbc0627d73b3c9280b411385c 100644
--- a/src/libcorpus2/lexeme.h
+++ b/src/libcorpus2/lexeme.h
@@ -138,6 +138,8 @@ private:
 	bool disamb_;
 };
 
+size_t hash_value(const Lexeme &lexeme);
+
 } /* end ns Corpus2 */
 
 #endif // LIBCORPUS2_LEXEME_H
diff --git a/src/libcorpus2/token.cpp b/src/libcorpus2/token.cpp
index 857980e9500aa66b0c8ff8bfb1077f74a87f482d..2afaf81a3c601825ab0120c1b16642132f62c72a 100644
--- a/src/libcorpus2/token.cpp
+++ b/src/libcorpus2/token.cpp
@@ -157,4 +157,13 @@ void Token::create_metadata()
 	metadata_ = boost::make_shared<TokenMetaData>();
 }
 
+size_t hash_value(const Token &token)
+{
+	std::size_t seed = 0;
+	boost::hash_combine(seed, token.orth_utf8());
+	boost::hash_combine(seed, token.wa());
+	boost::hash_combine(seed, token.lexemes());
+	return seed;
+}
+
 } /* end ns Corpus2 */
diff --git a/src/libcorpus2/token.h b/src/libcorpus2/token.h
index ea2ed5bf99c4e9e289ef3d52283c325ce3c31f94..6be8ec2f0902d735755c70ea58de4cc6afac4a5f 100644
--- a/src/libcorpus2/token.h
+++ b/src/libcorpus2/token.h
@@ -233,6 +233,8 @@ private:
 	boost::shared_ptr<TokenMetaData> metadata_;
 };
 
+size_t hash_value(const Token &token);
+
 } /* end ns Corpus2 */
 
 #endif // LIBCORPUS2_TOKEN_H
diff --git a/src/swig/token.i b/src/swig/token.i
index 2cde15e0e74ee7f7fac370b6b591409491da51f9..0aa11fd9d96652fff105175ab4952157630904eb 100644
--- a/src/swig/token.i
+++ b/src/swig/token.i
@@ -74,7 +74,13 @@ namespace Corpus2 {
     void create_metadata();
   };
 
+  size_t hash_value(const Token &token);
+
   %extend Token {
+    long __hash__() {
+      return (long) hash_value(*self);
+    }
+
     // otherwise x != y wont trigger operator==
     %pythoncode %{
       def __ne__(self, other):
diff --git a/src/tests/python/data/ccl01.xml b/src/tests/python/data/ccl01.xml
new file mode 100644
index 0000000000000000000000000000000000000000..fd05dc0c5ae395673fec2b3e707ce6ff89dbce4f
--- /dev/null
+++ b/src/tests/python/data/ccl01.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE chunkList SYSTEM "ccl.dtd">
+<chunkList>
+  <chunk id="ch1">
+    <sentence id="sent1">
+      <tok>
+        <orth>historii</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <tok>
+        <orth>historii</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <tok>
+        <orth>historiami</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <tok>
+        <orth>historiami</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:pl:inst:f</ctag>
+        </lex>
+      </tok>
+    </sentence>
+    <sentence id="sent2">
+      <tok>
+        <orth>historii</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <tok>
+        <orth>historii</orth>
+        <lex disamb="1">
+          <base>Historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <tok>
+        <orth>historiami</orth>
+        <lex disamb="1">
+          <base>historia</base>
+          <ctag>subst:sg:gen:f</ctag>
+        </lex>
+      </tok>
+      <ns/>
+    </sentence>
+  </chunk>
+</chunkList>
diff --git a/src/tests/python/requirements-test.txt b/src/tests/python/requirements-test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4e823d75e1e01f5a36cd4ffbc10bd71e80e71280
--- /dev/null
+++ b/src/tests/python/requirements-test.txt
@@ -0,0 +1 @@
+cclutils
diff --git a/src/tests/python/test_token_hash.py b/src/tests/python/test_token_hash.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ed8ad0eb8446058dceb2265cd4572e93217545b
--- /dev/null
+++ b/src/tests/python/test_token_hash.py
@@ -0,0 +1,49 @@
+from collections import defaultdict
+
+import cclutils as ccl
+
+
+test_doc = "data/ccl01.xml"
+
+doc = ccl.read(test_doc)
+
+toks = [
+    t for p in doc.paragraphs() for s in p.sentences() for t in s.tokens()
+]
+
+all_toks = set(toks)
+assert len(all_toks) == 5
+
+# check if results of hash function and `==` operator are consistent
+for t1 in toks:
+    for t2 in toks:
+        are_equal = t1 == t2
+        have_equal_hashes = hash(t1) == hash(t2)
+        assert (
+            not any((are_equal, have_equal_hashes))
+            or all((are_equal, have_equal_hashes))
+        )
+
+toks2 = [
+    t for p in doc.paragraphs() for s in p.sentences() for t in s.tokens()
+]
+for t1, t2 in zip(toks, toks2):
+    assert hash(t1) == hash(t2)
+
+assert hash(toks[1]) == hash(toks[4])  # same toks in defferent sents
+assert hash(toks[2]) == hash(toks[6])  # same toks in defferent sents
+assert hash(toks[0]) != hash(toks[1])  # same tokens except whitespaces
+assert hash(toks[1]) != hash(toks[2])  # different orth
+assert hash(toks[2]) != hash(toks[3])  # different ctag
+assert hash(toks[4]) != hash(toks[5])  # different base
+
+tok_pos_idx = defaultdict(list)
+for i, t in enumerate(toks):
+    tok_pos_idx[t].append(i)
+assert tok_pos_idx == {
+    toks[0]: [0],
+    toks[1]: [1, 4],
+    toks[2]: [2, 6],
+    toks[3]: [3],
+    toks[5]: [5],
+}