Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
b000d45b
Commit
b000d45b
authored
14 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
preliminary disamb_sh support in sces reader, bumps version to 0.0.2
parent
1875810e
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
libcorpus2/CMakeLists.txt
+1
-1
1 addition, 1 deletion
libcorpus2/CMakeLists.txt
libcorpus2/io/xcesreader.cpp
+19
-8
19 additions, 8 deletions
libcorpus2/io/xcesreader.cpp
libcorpus2/io/xcesreader.h
+1
-1
1 addition, 1 deletion
libcorpus2/io/xcesreader.h
with
21 additions
and
10 deletions
libcorpus2/CMakeLists.txt
+
1
−
1
View file @
b000d45b
...
...
@@ -3,7 +3,7 @@ PROJECT(corpus2)
set
(
corpus2_ver_major
"0"
)
set
(
corpus2_ver_minor
"0"
)
set
(
corpus2_ver_patch
"
1
"
)
set
(
corpus2_ver_patch
"
2
"
)
if
(
NOT LIBCORPUS2_SRC_DATA_DIR
)
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/io/xcesreader.cpp
+
19
−
8
View file @
b000d45b
...
...
@@ -9,7 +9,7 @@ class XcesReaderImpl : public BasicSaxParser
{
public:
XcesReaderImpl
(
const
Tagset
&
tagset
,
std
::
deque
<
Chunk
*>&
obuf
,
bool
disamb_only
);
bool
disamb_only
,
bool
disamb_sh
);
~
XcesReaderImpl
();
...
...
@@ -37,12 +37,14 @@ protected:
std
::
deque
<
Chunk
*>&
obuf_
;
bool
disamb_only_
;
bool
disamb_sh_
;
};
XcesReader
::
XcesReader
(
const
Tagset
&
tagset
,
std
::
istream
&
is
,
bool
disamb_only
)
bool
disamb_only
,
bool
disamb_sh
)
:
BufferedChunkReader
(
tagset
),
is_
(
is
)
,
impl_
(
new
XcesReaderImpl
(
tagset
,
chunk_buf_
,
disamb_only
))
,
impl_
(
new
XcesReaderImpl
(
tagset
,
chunk_buf_
,
disamb_only
,
disamb_sh
))
{
}
...
...
@@ -64,11 +66,11 @@ void XcesReader::ensure_more()
}
XcesReaderImpl
::
XcesReaderImpl
(
const
Tagset
&
tagset
,
std
::
deque
<
Chunk
*>&
obuf
,
bool
disamb_only
)
std
::
deque
<
Chunk
*>&
obuf
,
bool
disamb_only
,
bool
disamb_sh
)
:
BasicSaxParser
()
,
tagset_
(
tagset
),
state_
(
XS_NONE
),
wa_
(
PwrNlp
::
Whitespace
::
Newline
)
,
sbuf_
(),
tok_
(
NULL
),
sent_
(
NULL
),
chunk_
(
NULL
),
obuf_
(
obuf
)
,
disamb_only_
(
disamb_only
)
,
disamb_only_
(
disamb_only
)
,
disamb_sh_
(
disamb_sh
)
{
}
...
...
@@ -119,9 +121,18 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name,
}
else
if
(
state_
==
XS_TOK
&&
name
==
"lex"
)
{
assert
(
tok_
!=
NULL
);
bool
is_disamb
=
false
;
foreach
(
const
Attribute
&
a
,
attributes
)
{
if
(
a
.
name
==
"disamb"
&&
a
.
value
==
"1"
)
{
is_disamb
=
true
;
if
(
!
disamb_sh_
)
{
foreach
(
const
Attribute
&
a
,
attributes
)
{
if
(
a
.
name
==
"disamb"
&&
a
.
value
==
"1"
)
{
is_disamb
=
true
;
}
}
}
else
{
is_disamb
=
true
;
foreach
(
const
Attribute
&
a
,
attributes
)
{
if
(
a
.
name
==
"disamb_sh"
&&
a
.
value
==
"0"
)
{
is_disamb
=
false
;
}
}
}
if
(
!
disamb_only_
||
is_disamb
)
{
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/io/xcesreader.h
+
1
−
1
View file @
b000d45b
...
...
@@ -15,7 +15,7 @@ class XcesReader : public BufferedChunkReader
{
public:
XcesReader
(
const
Tagset
&
tagset
,
std
::
istream
&
is
,
bool
disamb_only
=
false
);
bool
disamb_only
=
false
,
bool
disamb_sh
=
false
);
~
XcesReader
();
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment