Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
19b27f93
Commit
19b27f93
authored
14 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
make wcclrules process a chunk at a time, add progress info
parent
c25d539a
Branches
Branches containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
wcclrules/main.cpp
+31
-10
31 additions, 10 deletions
wcclrules/main.cpp
with
31 additions
and
10 deletions
wcclrules/main.cpp
+
31
−
10
View file @
19b27f93
...
...
@@ -7,6 +7,8 @@
#include
<libwccl/parser/Parser.h>
#include
<libwccl/ops/rulesequence.h>
#include
<libcorpus2/tagsetmanager.h>
#include
<libcorpus2/util/tokentimer.h>
#include
<boost/bind.hpp>
#include
<boost/algorithm/string.hpp>
...
...
@@ -20,6 +22,7 @@
namespace
{
bool
quiet
=
false
;
bool
progress
=
false
;
struct
options
{
bool
first
;
...
...
@@ -39,7 +42,10 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
ret
=
parser
.
parseRuleSequence
(
is
);
if
(
ret
)
{
std
::
cerr
<<
ret
->
size
()
<<
"
\n
"
;
if
(
!
quiet
)
{
std
::
cerr
<<
"Loaded "
<<
ret
->
size
()
<<
" rule(s) from "
<<
filename
<<
"
\n
"
;
}
std
::
copy
(
ret
->
begin
(),
ret
->
end
(),
std
::
back_inserter
(
rules
));
return
true
;
}
else
{
...
...
@@ -69,17 +75,27 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
void
do_stream
(
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
,
const
Corpus2
::
Tagset
&
tagset
,
Wccl
::
RuleSequence
&
rules
,
std
::
istream
&
is
,
const
options
&
opts
)
{
Corpus2
::
XcesReader
xr
(
tagset
,
is
);
Corpus2
::
Sentence
::
Ptr
s
;
while
((
s
=
xr
.
get_next_sentence
()))
{
if
(
opts
.
until_done
)
{
rules
.
execute_until_done
(
s
,
opts
.
until_done_iterations
);
}
else
{
rules
.
execute_once
(
s
);
Corpus2
::
XcesReader
reader
(
tagset
,
is
);
Corpus2
::
TokenTimer
&
timer
=
Corpus2
::
global_timer
();
while
(
boost
::
shared_ptr
<
Corpus2
::
Chunk
>
c
=
reader
.
get_next_chunk
())
{
foreach
(
boost
::
shared_ptr
<
Corpus2
::
Sentence
>&
s
,
c
->
sentences
())
{
if
(
opts
.
until_done
)
{
rules
.
execute_until_done
(
s
,
opts
.
until_done_iterations
);
}
else
{
rules
.
execute_once
(
s
);
}
timer
.
count_sentence
(
*
s
);
if
(
progress
)
{
timer
.
check_slice
();
}
if
(
opts
.
first
)
break
;
}
writer
->
write_
sentence
(
*
s
);
writer
->
write_
chunk
(
*
c
);
if
(
opts
.
first
)
break
;
}
if
(
progress
)
{
timer
.
stats
();
}
}
...
...
@@ -110,6 +126,8 @@ int main(int argc, char** argv)
"Files to load, looking at the extension to determine type
\n
"
)
(
"output-format,o"
,
value
(
&
output_format
)
->
default_value
(
"xces"
),
writers_help
.
c_str
())
(
"progress,p"
,
value
(
&
progress
)
->
zero_tokens
(),
"Show progress info"
)
(
"quiet,q"
,
value
(
&
quiet
)
->
zero_tokens
(),
"Suppress messages
\n
"
)
(
"until-done,u"
,
value
(
&
opts
.
until_done
)
->
zero_tokens
(),
...
...
@@ -165,8 +183,11 @@ int main(int argc, char** argv)
}
}
if
(
!
rules
.
empty
())
{
Corpus2
::
TokenTimer
&
timer
=
Corpus2
::
global_timer
();
timer
.
register_signal_handler
();
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
;
writer
.
reset
(
Corpus2
::
TokenWriter
::
create
(
output_format
,
std
::
cout
,
tagset
));
foreach
(
const
std
::
string
&
f
,
corpora_files
)
{
writer
.
reset
(
Corpus2
::
TokenWriter
::
create
(
output_format
,
std
::
cout
,
tagset
));
foreach
(
const
std
::
string
&
f
,
corpora_files
)
{
std
::
ifstream
ifs
(
f
.
c_str
());
if
(
ifs
.
good
())
{
do_stream
(
writer
,
tagset
,
rules
,
ifs
,
opts
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment