Skip to content
Snippets Groups Projects
Commit 3061dc24 authored by ilor's avatar ilor
Browse files

token metadata member (increases Token struct size only slightly)

parent c7ca37b6
Branches
No related tags found
No related merge requests found
......@@ -54,6 +54,7 @@ SET(libcorpus2_STAT_SRC
tagsetmanager.cpp
tagsetparser.cpp
token.cpp
tokenmetadata.cpp
io/cclreader.cpp
io/orthwriter.cpp
io/plainwriter.cpp
......
......@@ -17,11 +17,12 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libcorpus2/token.h>
#include <sstream>
#include <libpwrutils/foreach.h>
#include <libcorpus2/tokenmetadata.h>
namespace Corpus2 {
Token::Token()
: orth_(), wa_(), lexemes_()
: orth_(), wa_(), lexemes_(), metadata_(NULL)
{
}
......@@ -32,7 +33,13 @@ Token::Token(const UnicodeString &orth, PwrNlp::Whitespace::Enum wa)
Token* Token::clone() const
{
Token* t = new Token(*this);
Token* t = new Token();
t->orth_ = orth_;
t->wa_ = wa_;
t->lexemes_ = lexemes_;
if (metadata_.get()) {
t->set_metadata(metadata_->clone());
}
return t;
}
......@@ -118,4 +125,9 @@ bool Token::orth_pos_match(mask_t pos, const UnicodeString &orth) const
return true;
}
void Token::create_metadata()
{
metadata_.reset(new TokenMetaData);
}
} /* end ns Corpus2 */
......@@ -24,11 +24,16 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libpwrutils/whitespace.h>
#include <unicode/unistr.h>
#include <memory>
#include <string>
#include <vector>
namespace Corpus2 {
/// Forward declaration of optional Token metadata class
class TokenMetaData;
/**
* A single token with morphological analyses.
*
......@@ -37,7 +42,7 @@ namespace Corpus2 {
* of possible interpretations stored as lexemes.
*/
class Token
: boost::equality_comparable<Token>
: boost::equality_comparable<Token>, boost::noncopyable
{
public:
/// Creates an empty Token
......@@ -135,6 +140,19 @@ public:
*/
bool orth_pos_match(mask_t pos, const UnicodeString& orth) const;
/// Metadata setter
void set_metadata(TokenMetaData* md) {
metadata_.reset(md);
}
/// Metadata getter
TokenMetaData* get_metadata() const {
return metadata_.get();
}
/// Creates an empty metdata object for this Token
void create_metadata();
private:
/// The orth (actual encountered form)
//boost::flyweight<UnicodeString> orth_;
......@@ -145,6 +163,9 @@ private:
/// The possible lexemes
std::vector<Lexeme> lexemes_;
/// Metadata
std::auto_ptr<TokenMetaData> metadata_;
};
} /* end ns Corpus2 */
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <libcorpus2/tokenmetadata.h>
#include <libpwrutils/foreach.h>
#include <boost/make_shared.hpp>
namespace Corpus2 {
TokenMetaData::TokenMetaData()
{
}
TokenMetaData* TokenMetaData::clone() const
{
return new TokenMetaData(*this);
}
bool TokenMetaData::has_attribute(const std::string &name) const
{
return attributes_.find(name) != attributes_.end();
}
std::string TokenMetaData::get_attribute(const std::string &name) const
{
std::map<std::string, std::string>::const_iterator i;
i = attributes_.find(name);
if (i != attributes_.end()) {
return i->second;
} else {
return "";
}
}
void TokenMetaData::set_attribute(const std::string &name,
const std::string &value)
{
attributes_[name] = value;
}
} /* end ns Corpus2 */
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_TOKENMETADATA_H
#define LIBCORPUS2_TOKENMETADATA_H
#include <string>
#include <map>
#include <boost/shared_ptr.hpp>
namespace Corpus2 {
/// Token metadata class
class TokenMetaData
{
public:
TokenMetaData();
TokenMetaData* clone() const;
typedef std::map<std::string, std::string> attr_map_t;
bool has_attribute(const std::string& name) const;
std::string get_attribute(const std::string& name) const;
void set_attribute(const std::string& name, const std::string& value);
const attr_map_t& attributes() const {
return attributes_;
}
private:
attr_map_t attributes_;
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_TOKENMETADATA_H
......@@ -12,6 +12,7 @@ add_executable( tests
ioann.cpp
tag_split.cpp
tagset_parse.cpp
tokenmetadata.cpp
)
target_link_libraries ( tests corpus2 pwrutils ${Boost_LIBRARIES})
......
......@@ -47,14 +47,15 @@ BOOST_AUTO_TEST_CASE( token_dup_lexemes )
t.add_lexeme(l2);
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
Corpus2::Token tt(t);
Corpus2::Token* tt = t.clone();
t.add_lexeme(l1);
BOOST_CHECK(t != tt);
BOOST_CHECK(t != *tt);
BOOST_CHECK(t.check_duplicate_lexemes());
BOOST_CHECK(t.remove_duplicate_lexemes());
BOOST_CHECK(!t.check_duplicate_lexemes());
BOOST_CHECK(!t.remove_duplicate_lexemes());
BOOST_CHECK(t == tt);
BOOST_CHECK(t == *tt);
delete tt;
}
BOOST_AUTO_TEST_CASE( is_icu_working )
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include <boost/test/unit_test.hpp>
#include <libcorpus2/token.h>
#include <libcorpus2/tokenmetadata.h>
BOOST_AUTO_TEST_SUITE(token_metadata)
BOOST_AUTO_TEST_CASE(meta1)
{
Corpus2::Token t1(UnicodeString::fromUTF8("t1"), PwrNlp::Whitespace::None);
BOOST_CHECK(!t1.get_metadata());
t1.create_metadata();
BOOST_CHECK(t1.get_metadata());
BOOST_CHECK(t1.get_metadata()->attributes().empty());
Corpus2::Token* t2 = t1.clone();
BOOST_CHECK(t2->get_metadata());
BOOST_CHECK(t2->get_metadata()->attributes().empty());
t2->get_metadata()->set_attribute("A", "B");
BOOST_CHECK(!t2->get_metadata()->attributes().empty());
BOOST_CHECK(t1.get_metadata()->attributes().empty());
delete t2;
}
BOOST_AUTO_TEST_SUITE_END()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment