Skip to content
Snippets Groups Projects
Commit be2d2477 authored by Arkadiusz Janz's avatar Arkadiusz Janz
Browse files

Merge branch 'develop' into 'master'

develop into master

See merge request !7
parents 011e9eac ea58a93c
Branches
1 merge request!7develop into master
Pipeline #2537 passed with stage
in 15 minutes and 6 seconds
...@@ -37,6 +37,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -37,6 +37,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
: TokenReader(tagset), inner_filename_(filename) : TokenReader(tagset), inner_filename_(filename)
{ {
mwes_counter=0; mwes_counter=0;
chan_ann_name = "mwe";
} }
MWEReader::MWEReader(const Tagset &tagset, const std::string &filename, MWEReader::MWEReader(const Tagset &tagset, const std::string &filename,
...@@ -45,6 +46,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -45,6 +46,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
{ {
mwes_counter=0; mwes_counter=0;
inner_reader_ = reader; inner_reader_ = reader;
chan_ann_name = "mwe";
} }
void MWEReader::setFile(const std::string &filename) void MWEReader::setFile(const std::string &filename)
...@@ -58,6 +60,16 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -58,6 +60,16 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// TODO implementataion // TODO implementataion
} }
void MWEReader::set_annotation_channel(const std::string & chan_name)
{
chan_ann_name = chan_name;
}
std::string MWEReader::get_annotation_channel_base_name()
{
return chan_ann_name + "_base";
}
Token* MWEReader::get_next_token() Token* MWEReader::get_next_token()
{ {
if(currentSentence->empty()) if(currentSentence->empty())
...@@ -104,11 +116,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -104,11 +116,11 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// create 'mwe' channel if not exists // create 'mwe' channel if not exists
ChanMapT chan_map = ann_sentence->all_channels(); ChanMapT chan_map = ann_sentence->all_channels();
if (chan_map.find("mwe") == chan_map.end()) { if (chan_map.find(chan_ann_name) == chan_map.end()) {
ann_sentence->create_channel("mwe"); ann_sentence->create_channel(chan_ann_name);
} }
AnnotationChannel& channel = ann_sentence->get_channel("mwe"); AnnotationChannel& channel = ann_sentence->get_channel(chan_ann_name);
// if channel exists, we leave annotation numbers // if channel exists, we leave annotation numbers
int head_ann_num = channel.get_segment_at(head); int head_ann_num = channel.get_segment_at(head);
...@@ -123,7 +135,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -123,7 +135,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
tokens[head]->create_metadata(); tokens[head]->create_metadata();
} }
TokenMetaDataPtr md = tokens[head]->get_metadata(); TokenMetaDataPtr md = tokens[head]->get_metadata();
md->set_attribute("mwe_base", new_base); md->set_attribute(get_annotation_channel_base_name(), new_base);
// annotate mwe elements with annotation_number of head // annotate mwe elements with annotation_number of head
std::set<int>::iterator pos_it; std::set<int>::iterator pos_it;
......
...@@ -39,6 +39,12 @@ public: ...@@ -39,6 +39,12 @@ public:
/// Allows reusage of the reader for multiple files. It is needed for it stores huge index of MWEs /// Allows reusage of the reader for multiple files. It is needed for it stores huge index of MWEs
void setFile(const std::string & filename); void setFile(const std::string & filename);
/// Setter for name of annotation to create when mwe is found
void set_annotation_channel(const std::string & chan_name);
/// name of annotation channel for base form of found mwe (term)
std::string get_annotation_channel_base_name();
/// retrieves whole sentence, finds MWEs, and return tokens /// retrieves whole sentence, finds MWEs, and return tokens
Token* get_next_token(); Token* get_next_token();
...@@ -118,6 +124,8 @@ private: ...@@ -118,6 +124,8 @@ private:
size_t mwes_counter; size_t mwes_counter;
/// use annotations instead of merging the tokens /// use annotations instead of merging the tokens
bool annotate; bool annotate;
/// name of annotation to create when mwe is found
std::string chan_ann_name;
}; };
} // ns Corpus2 } // ns Corpus2
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment