Skip to content
Snippets Groups Projects
Commit b000d45b authored by ilor's avatar ilor
Browse files

preliminary disamb_sh support in sces reader, bumps version to 0.0.2

parent 1875810e
Branches
No related merge requests found
......@@ -3,7 +3,7 @@ PROJECT(corpus2)
set(corpus2_ver_major "0")
set(corpus2_ver_minor "0")
set(corpus2_ver_patch "1")
set(corpus2_ver_patch "2")
if(NOT LIBCORPUS2_SRC_DATA_DIR)
......
......@@ -9,7 +9,7 @@ class XcesReaderImpl : public BasicSaxParser
{
public:
XcesReaderImpl(const Tagset& tagset, std::deque<Chunk*>& obuf,
bool disamb_only);
bool disamb_only, bool disamb_sh);
~XcesReaderImpl();
......@@ -37,12 +37,14 @@ protected:
std::deque<Chunk*>& obuf_;
bool disamb_only_;
bool disamb_sh_;
};
XcesReader::XcesReader(const Tagset& tagset, std::istream& is,
bool disamb_only)
bool disamb_only, bool disamb_sh)
: BufferedChunkReader(tagset), is_(is)
, impl_(new XcesReaderImpl(tagset, chunk_buf_, disamb_only))
, impl_(new XcesReaderImpl(tagset, chunk_buf_, disamb_only, disamb_sh))
{
}
......@@ -64,11 +66,11 @@ void XcesReader::ensure_more()
}
XcesReaderImpl::XcesReaderImpl(const Tagset& tagset,
std::deque<Chunk*>& obuf, bool disamb_only)
std::deque<Chunk*>& obuf, bool disamb_only, bool disamb_sh)
: BasicSaxParser()
, tagset_(tagset), state_(XS_NONE), wa_(PwrNlp::Whitespace::Newline)
, sbuf_(), tok_(NULL), sent_(NULL), chunk_(NULL), obuf_(obuf)
, disamb_only_(disamb_only)
, disamb_only_(disamb_only), disamb_sh_(disamb_sh)
{
}
......@@ -119,9 +121,18 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name,
} else if (state_ == XS_TOK && name == "lex") {
assert(tok_ != NULL);
bool is_disamb = false;
foreach (const Attribute& a, attributes) {
if (a.name == "disamb" && a.value == "1") {
is_disamb = true;
if (!disamb_sh_) {
foreach (const Attribute& a, attributes) {
if (a.name == "disamb" && a.value == "1") {
is_disamb = true;
}
}
} else {
is_disamb = true;
foreach (const Attribute& a, attributes) {
if (a.name == "disamb_sh" && a.value == "0") {
is_disamb = false;
}
}
}
if (!disamb_only_ || is_disamb) {
......
......@@ -15,7 +15,7 @@ class XcesReader : public BufferedChunkReader
{
public:
XcesReader(const Tagset& tagset, std::istream& is,
bool disamb_only = false);
bool disamb_only = false, bool disamb_sh = false);
~XcesReader();
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment