From 5db3ef6d6ab8166d058af52753dabece31a2ff94 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Fri, 1 Oct 2010 12:16:38 +0200 Subject: [PATCH] Fix disamb ordering in Lexeme op< add disamb-ignoring equality use both of these in Token's remove duplicate lexemes to mEdukacjaWeb/bilanse.do?event=switchBilanse&bilanse=prognozowaneake disamb take priority in case of a disamb/ndisamb dupe --- libcorpus2/lexeme.cpp | 7 ++++++- libcorpus2/lexeme.h | 8 ++++++++ libcorpus2/token.cpp | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/libcorpus2/lexeme.cpp b/libcorpus2/lexeme.cpp index 9714860..cd53613 100644 --- a/libcorpus2/lexeme.cpp +++ b/libcorpus2/lexeme.cpp @@ -28,7 +28,7 @@ bool Lexeme::operator<(const Lexeme& other) const || (lemma_ == other.lemma_ && (tag_ < other.tag_ || (tag_ == other.tag_ - && disamb_ < other.disamb_))); + && disamb_ && !other.disamb_))); } bool Lexeme::operator==(const Lexeme& other) const @@ -37,4 +37,9 @@ bool Lexeme::operator==(const Lexeme& other) const disamb_ == other.disamb_; } +bool Lexeme::DisamblessComparator::operator()(const Lexeme& l, const Lexeme& other) const +{ + return l.lemma_ == other.lemma_ && l.tag_ == other.tag_; +} + } /* end ns Corpus2 */ diff --git a/libcorpus2/lexeme.h b/libcorpus2/lexeme.h index 1e70d17..0a508b1 100644 --- a/libcorpus2/lexeme.h +++ b/libcorpus2/lexeme.h @@ -88,6 +88,14 @@ public: */ bool operator==(const Lexeme& other) const; + /** + * Disamb-ignoring lexeme comparison + */ + struct DisamblessComparator + { + bool operator()(const Lexeme& l1, const Lexeme& l2) const; + }; + private: /// The lemma -- basic form //boost::flyweight<UnicodeString> lemma_; diff --git a/libcorpus2/token.cpp b/libcorpus2/token.cpp index f4317c5..ac86d5c 100644 --- a/libcorpus2/token.cpp +++ b/libcorpus2/token.cpp @@ -84,7 +84,7 @@ bool Token::remove_duplicate_lexemes() { size_t old_size = lexemes_.size(); std::sort(lexemes_.begin(), lexemes_.end()); - lexemes_.erase(std::unique(lexemes_.begin(), lexemes_.end()), + lexemes_.erase(std::unique(lexemes_.begin(), lexemes_.end(), Lexeme::DisamblessComparator()), lexemes_.end()); return old_size != lexemes_.size(); } -- GitLab