Commit 82f88e7b authored by Michał Moczulski's avatar Michał Moczulski

Introduced Guesser2Analyser.

parent abe25ea2
......@@ -76,6 +76,7 @@ SET(libmaca_STAT_SRC
morph/dispatchanalyser.cpp
morph/mapanalyser.cpp
morph/morphanalyser.cpp
morph/guesser2.cpp
util/confignode.cpp
util/debug.cpp
util/sentenceanalyser.cpp
......
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#include <libmaca/morph/guesser2.h>
#include <boost/algorithm/string.hpp>
#include <algorithm>
#include <morfeusz.h>
namespace Maca {
const char* Guesser2Analyser::identifier = "guesser2";
bool Guesser2Analyser::registered =
MorphAnalyser::register_analyser<Guesser2Analyser>();
Guesser2Analyser::Guesser2Analyser(const Config::Node &cfg)
: MorphAnalyser(cfg),
guesser(cfg.get<boost::filesystem::path>("guesser_data"), tagset())
{
}
Guesser2Analyser::Guesser2Analyser(const Corpus2::Tagset *tagset_, const boost::filesystem::path & data)
: MorphAnalyser(tagset_),
guesser(data, tagset())
{
}
Guesser2Analyser::~Guesser2Analyser()
{
}
Guesser2Analyser* Guesser2Analyser::clone() const
{
throw "Not implemented yet";
Guesser2Analyser* copy = new Guesser2Analyser(*this);
return copy;
}
bool Guesser2Analyser::process_functional(const Toki::Token &t,
boost::function<void (Corpus2::Token *)> sink)
{
const UnicodeString & orth = t.orth();
const PwrNlp::Whitespace::Enum wa = t.preceeding_whitespace();
std::vector<Corpus2::Lexeme> lexemes = guesser.guess(orth);
Corpus2::Token * token = new Corpus2::Token(orth, wa);
token->replace_lexemes(lexemes);
sink(token);
return true;
}
} /* end ns Maca */
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libmaca project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.MACA, LICENSE.GUESSER, COPYING.LESSER and COPYING files for more details.
*/
#ifndef LIBMACA_MORPH_GUESSER2_H
#define LIBMACA_MORPH_GUESSER2_H
#include <libmaca/morph/morphanalyser.h>
#include <libmaca/typedefs.h>
#include <boost/filesystem.hpp>
#include <libcorpus2/guesser/guesser.h>
namespace Maca {
/**
* Corpuslib guesser analyser interface.
*
* WARNING: There is no thread safety currently
*/
class Guesser2Analyser : public MorphAnalyser
{
public:
/**
* Constructor for a Guesser analyser with the given tagset and a path to its learned data
*/
Guesser2Analyser(const Corpus2::Tagset* tagset, const boost::filesystem::path & data);
/**
* Config node constructor. Recognized keys are:
* - guesser_data - path to learned data for Corpus2::Guesser
*/
Guesser2Analyser(const Config::Node& cfg);
/// Cloning
Guesser2Analyser* clone() const;
/// Destructor
~Guesser2Analyser();
/// MorphAnalyser override
bool process_functional(const Toki::Token& t,
boost::function<void (Corpus2::Token*)> sink);
/// Class identifier
static const char* identifier;
/// Registered flag
static bool registered;
private:
Corpus2::Guesser guesser;
};
} /* end ns Maca */
#endif // LIBMACA_MORPH_GUESSER2_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment