Skip to content
Snippets Groups Projects
Commit 903e88c7 authored by ilor's avatar ilor
Browse files

add two new writers: a dummy "none" writer that disables output and a "stats"...

add two new writers: a dummy "none" writer that disables output and a "stats" writer that outputs general info only about token counts and annotation info
parent 1b0a59da
Branches
No related merge requests found
...@@ -58,12 +58,14 @@ SET(libcorpus2_STAT_SRC ...@@ -58,12 +58,14 @@ SET(libcorpus2_STAT_SRC
io/cclreader.cpp io/cclreader.cpp
io/cclwriter.cpp io/cclwriter.cpp
io/fastxces.cpp io/fastxces.cpp
io/nonewriter.cpp
io/orthwriter.cpp io/orthwriter.cpp
io/plainwriter.cpp io/plainwriter.cpp
io/premorphwriter.cpp io/premorphwriter.cpp
io/reader.cpp io/reader.cpp
io/rft.cpp io/rft.cpp
io/sax.cpp io/sax.cpp
io/statwriter.cpp
io/writer.cpp io/writer.cpp
io/xces.cpp io/xces.cpp
io/xcescommon.cpp io/xcescommon.cpp
......
#include <libcorpus2/io/nonewriter.h>
namespace Corpus2 {
bool NoneWriter::registered = TokenWriter::register_writer<NoneWriter>(
"none");
NoneWriter::NoneWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params)
: TokenWriter(os, tagset, params)
{
}
void NoneWriter::write_token(const Token&)
{
}
void NoneWriter::write_sentence(const Sentence&)
{
}
void NoneWriter::write_chunk(const Chunk&)
{
}
} /* end ns Corpus2 */
#ifndef LIBSORPUS2_IO_NONEWRITER_H
#define LIBCORPUS2_IO_NONEWRITER_H
#include <libcorpus2/io/writer.h>
namespace Corpus2 {
class NoneWriter : public TokenWriter
{
public:
NoneWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params);
void write_token(const Token& t);
void write_sentence(const Sentence& t);
void write_chunk(const Chunk& c);
static bool registered;
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_IO_NONEWRITER_H
...@@ -19,7 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -19,7 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
namespace Corpus2 { namespace Corpus2 {
bool PlainWriter::registered = PlainWriter::register_writer<PlainWriter>( bool PlainWriter::registered = TokenWriter::register_writer<PlainWriter>(
"plain"); "plain");
PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset, PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset,
......
#include <libcorpus2/io/statwriter.h>
#include <libcorpus2/ann/annotatedsentence.h>
#include <iomanip>
#include <libpwrutils/foreach.h>
namespace Corpus2 {
bool StatWriter::registered = TokenWriter::register_writer<StatWriter>(
"stat");
StatWriter::StatWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params)
: TokenWriter(os, tagset, params)
{
}
void StatWriter::write_token(const Token& t)
{
if (t.orth().length() == 0) {
os() << "~";
} else {
UnicodeString o1(t.orth().charAt(0));
os() << PwrNlp::to_utf8(o1);
}
}
void StatWriter::write_sentence(const Sentence& s)
{
os() << std::setw(8) << "tokens" << " ";
foreach (const Token* t, s.tokens()) {
write_token(*t);
}
os() << "\n";
const AnnotatedSentence* as = dynamic_cast<const AnnotatedSentence*>(&s);
if (as) {
foreach (const AnnotatedSentence::chan_map_t::value_type& vt, as->all_channels()) {
os() << std::setw(8) << vt.first << " ";
os() << vt.second.dump_alpha();
os() << "\n";
}
}
os() << "\n";
}
void StatWriter::write_chunk(const Chunk& c)
{
foreach (const Sentence::Ptr s, c.sentences()) {
write_sentence(*s);
}
os() << "\n";
}
} /* end ns Corpus2 */
#ifndef LIBSORPUS2_IO_STATWRITER_H
#define LIBCORPUS2_IO_STATWRITER_H
#include <libcorpus2/io/writer.h>
namespace Corpus2 {
class StatWriter : public TokenWriter
{
public:
StatWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params);
void write_token(const Token& t);
void write_sentence(const Sentence& t);
void write_chunk(const Chunk& c);
static bool registered;
};
} /* end ns Corpus2 */
#endif // LIBCORPUS2_IO_STATWRITER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment