Skip to content
Snippets Groups Projects
Commit ed68caee authored by dominik's avatar dominik
Browse files

conllwriter added

parent 89d0bbf8
No related merge requests found
......@@ -53,6 +53,7 @@ SET(libcorpus2_STAT_SRC
tokenmetadata.cpp
io/cclreader.cpp
io/cclwriter.cpp
io/conllwriter.cpp
io/helpers.cpp
io/fastxces.cpp
io/iob-chan.cpp
......
#include "conllwriter.h"
#include <libpwrutils/foreach.h>
#include <boost/algorithm/string.hpp>
namespace Corpus2 {
bool ConllWriter::registered = TokenWriter::register_writer<ConllWriter>("conll");
ConllWriter::ConllWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params)
: TokenWriter(os, tagset, params)
{
myTagset=tagset;
}
ConllWriter::~ConllWriter()
{
finish();
}
void ConllWriter::write_token(const Token &t)
{
os()<<t.orth_utf8()<<"\t";
Lexeme lex = t.get_preferred_lexeme(myTagset);
os()<<lex.lemma_utf8()+"\t";
std::string tag = myTagset.tag_to_string(lex.tag());
std::vector<std::string> strs;
boost::split(strs, tag, boost::is_any_of(":"));
os()<<strs[0]<<"\t"<<strs[0]<<"\t";
if(strs.size()>1)
{
size_t i;
for(i=1;i<strs.size()-1;i++)
{
os()<<strs[i]<<"|";
}
os()<<strs[i]<<"\t_\t_\t_\t_";
}
else
os()<<"_\t_\t_\t_\t_";
}
void ConllWriter::write_sentence(const Sentence& s)
{
int i=1;
foreach (const Token* t, s.tokens()) {
os()<<i<<"\t";
write_token(*t);
os()<<"\n";
i++;
}
}
void ConllWriter::write_chunk(const Chunk &c)
{
foreach (const Sentence::ConstPtr& s, c.sentences()) {
write_sentence(*s);
}
}
void ConllWriter::do_header()
{
}
void ConllWriter::do_footer()
{
}
} /* end ns Corpus2 */
#ifndef CONLLWRITER_H
#define CONLLWRITER_H
#include <libcorpus2/io/writer.h>
namespace Corpus2 {
class ConllWriter : public TokenWriter
{
public:
ConllWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params);
~ConllWriter();
void write_token(const Token &t);
void write_sentence(const Sentence &s);
void write_chunk(const Chunk &c);
static bool registered;
protected:
void do_header();
void do_footer();
private:
Tagset myTagset;
};
} /* end ns Corpus2 */
#endif // CONLLWRITER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment