Skip to content
Snippets Groups Projects
Commit cc8c48ad authored by Bartosz Broda's avatar Bartosz Broda
Browse files

add add_lexicalunit to index, small improvement in creation of mwe in sax parser

parent cee3b953
No related merge requests found
......@@ -4,27 +4,21 @@
namespace Corpus2{
LexicalUnit::LexicalUnit(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
LexicalUnit::BoolOpPtr condition,
LexicalUnit::BoolOpPtr head_cond,
LexicalUnit::strmap variables)
: condition_(condition),
head_cond_(head_cond),
variables_(variables),
base_(base),
nowhere_(Wccl::Position())
{
// noop
for(strmap::iterator iter = variables_.begin();
iter != variables_.end(); ++iter)
potential_bases_.insert(iter->second);
}
FixedLU::FixedLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
}
bool FixedLU::IsHere(const Wccl::SentenceContext &sc,
bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
std::set<size_t> &out_position)
{
boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
......@@ -45,4 +39,40 @@ bool FixedLU::IsHere(const Wccl::SentenceContext &sc,
return true;
}
FixedLU::FixedLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
}
FlexLU::FlexLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
}
MWEIndex::MWEIndex()
{
}
void MWEIndex::add_lexicalunit(LexicalUnitPtr lu)
{
foreach(const std::string& base, lu->get_potential_bases()){
value_type::iterator find = index_.find(base);
if(find == index_.end()){ // not found -> create new one
luvec v;
v.push_back(lu);
index_.insert( std::make_pair(base, v));
}else{// already exists -> add lu
(find->second).push_back(lu);
}
}
}
}//ns Corpus2
#ifndef LIBMWEREADER_MWE_H
#define LIBMWEREADER_MWE_H
#include <boost/unordered_map.hpp>
#include <libcorpus2/io/reader.h>
#include <libwccl/ops/operator.h>
......@@ -11,24 +13,48 @@ namespace Corpus2 {
class LexicalUnit
{
public:
LexicalUnit(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables
typedef std::map<std::string, std::string> strmap;
typedef std::set<std::string> strset;
typedef boost::shared_ptr<Wccl::Operator<Wccl::Bool> > BoolOpPtr;
LexicalUnit(const std::string &base, BoolOpPtr condition,
BoolOpPtr head_cond, strmap variables
);
/**
* \param sc SentenceContext with position set to value which
* will be checked
* \param out_positions will contain absolute position in
* SentenceContext (called with sc->get_abs_position) only if
* the main condition of this LexicalUnit will return true in current
* sentence context
* \returns true if this lexical unit was found here
*/
virtual bool IsHere(const Wccl::SentenceContext& sc,
std::set<size_t> &out_position) = 0;
std::set<size_t> &out_positions) ;
const std::string & get_base() const{ return base_;}
const strmap & get_variables() const{ return variables_;}
const strset& get_potential_bases() const{ return potential_bases_;}
protected:
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition_;
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond_;
std::map<std::string, std::string> variables_;
strmap variables_;
std::string base_;
strset potential_bases_;
const Wccl::Position nowhere_;
};
typedef boost::shared_ptr<LexicalUnit> LexicalUnitPtr;
// TODO: czy bedzie potrzebny podzial na fix/flex w kodzie?
class FixedLU : public LexicalUnit
{
public:
......@@ -37,18 +63,32 @@ public:
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables
);
virtual bool IsHere(const Wccl::SentenceContext& sc,
std::set<size_t> &out_position);
};
class FlexLU : public LexicalUnit
{
public:
FlexLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables
);
};
class MWEIndex // lub base -> vector<LexicalUnit>
{
public:
MWEIndex();
void add_lexicalunit(LexicalUnitPtr lu);
protected:
typedef std::vector<LexicalUnitPtr> luvec;
typedef boost::unordered_map<std::string,luvec> value_type;
value_type index_;
};
}// ns Corpus2
......
......@@ -60,10 +60,17 @@ namespace Corpus2 {
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_);
//foreach(const std::string&varname, main->valid_variable_names())
//if(boost::algorithm::starts_with(varname, "Pos"))
//std::cout << "Pozycja: " << varname << std::endl;
LexicalUnitPtr lu;
if(group_type_ == "fix"){ // group_name_ -> lower case
lu = LexicalUnitPtr(new FixedLU(mwe_base_, main, head,
variables_));
} else if(group_type_ == "flex"){
lu = LexicalUnitPtr(new FlexLU(mwe_base_, main, head,
variables_));
} else {
throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_);
}
}
std::string MWEParser::get_attribute(const AttributeList& attributes,
......@@ -85,6 +92,7 @@ namespace Corpus2 {
group_name_ = a.value;
} else if(a.name == "type"){
group_type_ = a.value;
boost::algorithm::to_lower(group_type_);
} else if(a.name == "class"){
group_class_ = a.value;
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment