Commit 5630c8b4 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski

Extend mwereader to allow setting custom annotation name

parent 011e9eac
......@@ -37,6 +37,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
: TokenReader(tagset), inner_filename_(filename)
{
mwes_counter=0;
chan_ann_name = "mwe";
}
MWEReader::MWEReader(const Tagset &tagset, const std::string &filename,
......@@ -45,6 +46,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
{
mwes_counter=0;
inner_reader_ = reader;
chan_ann_name = "mwe";
}
void MWEReader::setFile(const std::string &filename)
......@@ -58,6 +60,16 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// TODO implementataion
}
void set_annotation_channel(const std::string & chan_name)
{
chan_ann_name = chan_name;
}
std::string get_annotation_channel_base_name()
{
return chan_ann_name + "_base";
}
Token* MWEReader::get_next_token()
{
if(currentSentence->empty())
......@@ -104,11 +116,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// create 'mwe' channel if not exists
ChanMapT chan_map = ann_sentence->all_channels();
if (chan_map.find("mwe") == chan_map.end()) {
ann_sentence->create_channel("mwe");
if (chan_map.find(chan_ann_name) == chan_map.end()) {
ann_sentence->create_channel(chan_ann_name);
}
AnnotationChannel& channel = ann_sentence->get_channel("mwe");
AnnotationChannel& channel = ann_sentence->get_channel(chan_ann_name);
// if channel exists, we leave annotation numbers
int head_ann_num = channel.get_segment_at(head);
......@@ -123,7 +135,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
tokens[head]->create_metadata();
}
TokenMetaDataPtr md = tokens[head]->get_metadata();
md->set_attribute("mwe_base", new_base);
md->set_attribute(get_annotation_channel_base_name(), new_base);
// annotate mwe elements with annotation_number of head
std::set<int>::iterator pos_it;
......
......@@ -39,6 +39,12 @@ public:
/// Allows reusage of the reader for multiple files. It is needed for it stores huge index of MWEs
void setFile(const std::string & filename);
/// Setter for name of annotation to create when mwe is found
void set_annotation_channel(const std::string & chan_name);
/// name of annotation channel for base form of found mwe (term)
std::string get_annotation_channel_base_name();
/// retrieves whole sentence, finds MWEs, and return tokens
Token* get_next_token();
......@@ -118,6 +124,8 @@ private:
size_t mwes_counter;
/// use annotations instead of merging the tokens
bool annotate;
/// name of annotation to create when mwe is found
std::string chan_ann_name;
};
} // ns Corpus2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment