Skip to content
Snippets Groups Projects
Commit b138dbb5 authored by ilor's avatar ilor
Browse files

revamp plaintex writer: plain-er format

parent 62e5839d
Branches
No related merge requests found
...@@ -20,38 +20,63 @@ or FITNESS FOR A PARTICULAR PURPOSE. ...@@ -20,38 +20,63 @@ or FITNESS FOR A PARTICULAR PURPOSE.
namespace Corpus2 { namespace Corpus2 {
bool PlainWriter::registered = TokenWriter::register_writer<PlainWriter>( bool PlainWriter::registered = TokenWriter::register_writer<PlainWriter>(
"plain"); "plain", "nows,no_disamb_info,disamb_only,ds");
PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset, PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset,
const string_range_vector& params) const string_range_vector& params)
: TokenWriter(os, tagset, params) : TokenWriter(os, tagset, params), ws_(true), disamb_(true)
, disamb_only_(false)
{ {
foreach (const string_range& param, params) {
std::string p = boost::copy_range<std::string>(param);
if (p == "nows") {
ws_ = false;
} else if (p == "no_disamb_info") {
disamb_ = false;
} else if (p == "disamb_only") {
disamb_only_ = true;
} else if (p == "ds") {
disamb_ = false;
disamb_only_ = true;
}
}
} }
void PlainWriter::write_token(const Token &t) void PlainWriter::write_token(const Token &t)
{ {
os() << t.orth_utf8() << "\n"; os() << t.orth_utf8();
if (ws_) {
os() << "\t" << PwrNlp::Whitespace::to_string(t.wa());
}
os() << "\n";
foreach (const Lexeme& lex, t.lexemes()) { foreach (const Lexeme& lex, t.lexemes()) {
os() << "\t" << lex.lemma_utf8() << "\t" if (!disamb_only_ || lex.is_disamb()) {
<< tagset().tag_to_string(lex.tag()) << "\n"; os() << "\t" << lex.lemma_utf8() << "\t"
<< tagset().tag_to_string(lex.tag());
if (disamb_) {
if (lex.is_disamb()) {
os() << "\t";
os() << "disamb";
}
}
os() << "\n";
}
} }
} }
void PlainWriter::write_sentence(const Sentence &s) void PlainWriter::write_sentence(const Sentence &s)
{ {
os() << "[[[\n";
foreach (const Token* t, s.tokens()) { foreach (const Token* t, s.tokens()) {
write_token(*t); write_token(*t);
} }
os() << "]]]\n"; os() << "\n";
} }
void PlainWriter::write_chunk(const Chunk& c) void PlainWriter::write_chunk(const Chunk& c)
{ {
os() << "[[[<<<\n\n";
foreach (const boost::shared_ptr<Sentence>& s, c.sentences()) { foreach (const boost::shared_ptr<Sentence>& s, c.sentences()) {
write_sentence(*s); write_sentence(*s);
} }
os() << ">>>]]]\n\n"; os() << "\n";
} }
} /* end ns Corpus2 */ } /* end ns Corpus2 */
...@@ -34,6 +34,13 @@ public: ...@@ -34,6 +34,13 @@ public:
void write_chunk(const Chunk& c); void write_chunk(const Chunk& c);
static bool registered; static bool registered;
private:
bool ws_;
bool disamb_;
bool disamb_only_;
}; };
} /* end ns Corpus2 */ } /* end ns Corpus2 */
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment