Skip to content
Snippets Groups Projects
Commit cc8c48ad authored by Bartosz Broda's avatar Bartosz Broda
Browse files

add add_lexicalunit to index, small improvement in creation of mwe in sax parser

parent cee3b953
No related branches found
No related tags found
No related merge requests found
...@@ -4,27 +4,21 @@ ...@@ -4,27 +4,21 @@
namespace Corpus2{ namespace Corpus2{
LexicalUnit::LexicalUnit(const std::string &base, LexicalUnit::LexicalUnit(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition, LexicalUnit::BoolOpPtr condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond, LexicalUnit::BoolOpPtr head_cond,
std::map<std::string, std::string> variables) LexicalUnit::strmap variables)
: condition_(condition), : condition_(condition),
head_cond_(head_cond), head_cond_(head_cond),
variables_(variables), variables_(variables),
base_(base), base_(base),
nowhere_(Wccl::Position()) nowhere_(Wccl::Position())
{ {
// noop for(strmap::iterator iter = variables_.begin();
} iter != variables_.end(); ++iter)
potential_bases_.insert(iter->second);
FixedLU::FixedLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
} }
bool FixedLU::IsHere(const Wccl::SentenceContext &sc, bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
std::set<size_t> &out_position) std::set<size_t> &out_position)
{ {
boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc); boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
...@@ -45,4 +39,40 @@ bool FixedLU::IsHere(const Wccl::SentenceContext &sc, ...@@ -45,4 +39,40 @@ bool FixedLU::IsHere(const Wccl::SentenceContext &sc,
return true; return true;
} }
FixedLU::FixedLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
}
FlexLU::FlexLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables)
: LexicalUnit(base, condition, head_cond, variables)
{
}
MWEIndex::MWEIndex()
{
}
void MWEIndex::add_lexicalunit(LexicalUnitPtr lu)
{
foreach(const std::string& base, lu->get_potential_bases()){
value_type::iterator find = index_.find(base);
if(find == index_.end()){ // not found -> create new one
luvec v;
v.push_back(lu);
index_.insert( std::make_pair(base, v));
}else{// already exists -> add lu
(find->second).push_back(lu);
}
}
}
}//ns Corpus2 }//ns Corpus2
#ifndef LIBMWEREADER_MWE_H #ifndef LIBMWEREADER_MWE_H
#define LIBMWEREADER_MWE_H #define LIBMWEREADER_MWE_H
#include <boost/unordered_map.hpp>
#include <libcorpus2/io/reader.h> #include <libcorpus2/io/reader.h>
#include <libwccl/ops/operator.h> #include <libwccl/ops/operator.h>
...@@ -11,24 +13,48 @@ namespace Corpus2 { ...@@ -11,24 +13,48 @@ namespace Corpus2 {
class LexicalUnit class LexicalUnit
{ {
public: public:
LexicalUnit(const std::string &base, typedef std::map<std::string, std::string> strmap;
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition, typedef std::set<std::string> strset;
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond, typedef boost::shared_ptr<Wccl::Operator<Wccl::Bool> > BoolOpPtr;
std::map<std::string, std::string> variables
LexicalUnit(const std::string &base, BoolOpPtr condition,
BoolOpPtr head_cond, strmap variables
); );
/**
* \param sc SentenceContext with position set to value which
* will be checked
* \param out_positions will contain absolute position in
* SentenceContext (called with sc->get_abs_position) only if
* the main condition of this LexicalUnit will return true in current
* sentence context
* \returns true if this lexical unit was found here
*/
virtual bool IsHere(const Wccl::SentenceContext& sc, virtual bool IsHere(const Wccl::SentenceContext& sc,
std::set<size_t> &out_position) = 0; std::set<size_t> &out_positions) ;
const std::string & get_base() const{ return base_;}
const strmap & get_variables() const{ return variables_;}
const strset& get_potential_bases() const{ return potential_bases_;}
protected: protected:
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition_; boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition_;
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond_; boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond_;
std::map<std::string, std::string> variables_; strmap variables_;
std::string base_; std::string base_;
strset potential_bases_;
const Wccl::Position nowhere_; const Wccl::Position nowhere_;
}; };
typedef boost::shared_ptr<LexicalUnit> LexicalUnitPtr;
// TODO: czy bedzie potrzebny podzial na fix/flex w kodzie?
class FixedLU : public LexicalUnit class FixedLU : public LexicalUnit
{ {
public: public:
...@@ -37,18 +63,32 @@ public: ...@@ -37,18 +63,32 @@ public:
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond, boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables std::map<std::string, std::string> variables
); );
virtual bool IsHere(const Wccl::SentenceContext& sc,
std::set<size_t> &out_position);
}; };
class FlexLU : public LexicalUnit class FlexLU : public LexicalUnit
{ {
public:
FlexLU(const std::string &base,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > condition,
boost::shared_ptr<Wccl::Operator<Wccl::Bool> > head_cond,
std::map<std::string, std::string> variables
);
}; };
class MWEIndex // lub base -> vector<LexicalUnit> class MWEIndex // lub base -> vector<LexicalUnit>
{ {
public:
MWEIndex();
void add_lexicalunit(LexicalUnitPtr lu);
protected:
typedef std::vector<LexicalUnitPtr> luvec;
typedef boost::unordered_map<std::string,luvec> value_type;
value_type index_;
}; };
}// ns Corpus2 }// ns Corpus2
......
...@@ -60,10 +60,17 @@ namespace Corpus2 { ...@@ -60,10 +60,17 @@ namespace Corpus2 {
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition( MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_); head_cond_);
//foreach(const std::string&varname, main->valid_variable_names()) LexicalUnitPtr lu;
//if(boost::algorithm::starts_with(varname, "Pos")) if(group_type_ == "fix"){ // group_name_ -> lower case
//std::cout << "Pozycja: " << varname << std::endl; lu = LexicalUnitPtr(new FixedLU(mwe_base_, main, head,
variables_));
} else if(group_type_ == "flex"){
lu = LexicalUnitPtr(new FlexLU(mwe_base_, main, head,
variables_));
} else {
throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_);
}
} }
std::string MWEParser::get_attribute(const AttributeList& attributes, std::string MWEParser::get_attribute(const AttributeList& attributes,
...@@ -85,6 +92,7 @@ namespace Corpus2 { ...@@ -85,6 +92,7 @@ namespace Corpus2 {
group_name_ = a.value; group_name_ = a.value;
} else if(a.name == "type"){ } else if(a.name == "type"){
group_type_ = a.value; group_type_ = a.value;
boost::algorithm::to_lower(group_type_);
} else if(a.name == "class"){ } else if(a.name == "class"){
group_class_ = a.value; group_class_ = a.value;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment