diff --git a/src/libmwereader/mwereader.cpp b/src/libmwereader/mwereader.cpp index 1b8574a3fbcc9ea991fc5eb658825cce0a292ad9..933a2a04b3133c5b891ddee939cc6bf7367ce99c 100644 --- a/src/libmwereader/mwereader.cpp +++ b/src/libmwereader/mwereader.cpp @@ -37,6 +37,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( : TokenReader(tagset), inner_filename_(filename) { mwes_counter=0; + chan_ann_name = "mwe"; } MWEReader::MWEReader(const Tagset &tagset, const std::string &filename, @@ -45,6 +46,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( { mwes_counter=0; inner_reader_ = reader; + chan_ann_name = "mwe"; } void MWEReader::setFile(const std::string &filename) @@ -58,6 +60,16 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( // TODO implementataion } + void MWEReader::set_annotation_channel(const std::string & chan_name) + { + chan_ann_name = chan_name; + } + + std::string MWEReader::get_annotation_channel_base_name() + { + return chan_ann_name + "_base"; + } + Token* MWEReader::get_next_token() { if(currentSentence->empty()) @@ -104,11 +116,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( // create 'mwe' channel if not exists ChanMapT chan_map = ann_sentence->all_channels(); - if (chan_map.find("mwe") == chan_map.end()) { - ann_sentence->create_channel("mwe"); + if (chan_map.find(chan_ann_name) == chan_map.end()) { + ann_sentence->create_channel(chan_ann_name); } - AnnotationChannel& channel = ann_sentence->get_channel("mwe"); + AnnotationChannel& channel = ann_sentence->get_channel(chan_ann_name); // if channel exists, we leave annotation numbers int head_ann_num = channel.get_segment_at(head); @@ -123,7 +135,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( tokens[head]->create_metadata(); } TokenMetaDataPtr md = tokens[head]->get_metadata(); - md->set_attribute("mwe_base", new_base); + md->set_attribute(get_annotation_channel_base_name(), new_base); // annotate mwe elements with annotation_number of head std::set<int>::iterator pos_it; diff --git a/src/libmwereader/mwereader.h b/src/libmwereader/mwereader.h index 3fdda1b42841303ba1e30dc840683d61aa033ca1..77429636e8d516066436aeea4b2dfd92676ae414 100644 --- a/src/libmwereader/mwereader.h +++ b/src/libmwereader/mwereader.h @@ -39,6 +39,12 @@ public: /// Allows reusage of the reader for multiple files. It is needed for it stores huge index of MWEs void setFile(const std::string & filename); + /// Setter for name of annotation to create when mwe is found + void set_annotation_channel(const std::string & chan_name); + + /// name of annotation channel for base form of found mwe (term) + std::string get_annotation_channel_base_name(); + /// retrieves whole sentence, finds MWEs, and return tokens Token* get_next_token(); @@ -118,6 +124,8 @@ private: size_t mwes_counter; /// use annotations instead of merging the tokens bool annotate; + /// name of annotation to create when mwe is found + std::string chan_ann_name; }; } // ns Corpus2