Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
bd777f2f
Commit
bd777f2f
authored
14 years ago
by
Adam Radziszewski
Browse files
Options
Downloads
Plain Diff
Merge branch 'master' of nlp.pwr.wroc.pl:wccl
parents
bb165fb0
4592e3f3
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
CMakeScripts/FindLoki.cmake
+24
-0
24 additions, 0 deletions
CMakeScripts/FindLoki.cmake
wcclrules/CMakeLists.txt
+3
-0
3 additions, 0 deletions
wcclrules/CMakeLists.txt
wcclrules/main.cpp
+32
-7
32 additions, 7 deletions
wcclrules/main.cpp
with
59 additions
and
7 deletions
CMakeScripts/FindLoki.cmake
0 → 100644
+
24
−
0
View file @
bd777f2f
FIND_PATH
(
LOKI_INCLUDE_DIR loki/LokiExport.h /usr/include /usr/local/include
)
FIND_LIBRARY
(
LOKI_LIBRARY NAMES loki PATHS /usr/lib /usr/local/lib
)
MARK_AS_ADVANCED
(
LOKI_LIBRARY
)
MARK_AS_ADVANCED
(
LOKI_INCLUDE_DIR
)
IF
(
LOKI_INCLUDE_DIR AND LOKI_LIBRARY
)
SET
(
LOKI_FOUND TRUE
)
ENDIF
(
LOKI_INCLUDE_DIR AND LOKI_LIBRARY
)
IF
(
LOKI_FOUND
)
IF
(
NOT LOKI_FIND_QUIETLY
)
MESSAGE
(
STATUS
"Found LOKI:
${
LOKI_LIBRARY
}
"
)
ENDIF
(
NOT LOKI_FIND_QUIETLY
)
ELSE
(
LOKI_FOUND
)
IF
(
Loki_FIND_REQUIRED
)
MESSAGE
(
FATAL_ERROR
"Could not find Loki-lib"
)
ELSE
(
Loki_FIND_REQUIRED
)
MESSAGE
(
STATUS
"Loki not found"
)
ENDIF
(
Loki_FIND_REQUIRED
)
ENDIF
(
LOKI_FOUND
)
This diff is collapsed.
Click to expand it.
wcclrules/CMakeLists.txt
+
3
−
0
View file @
bd777f2f
...
...
@@ -5,6 +5,9 @@ include_directories(${LibXML++_INCLUDE_DIRS})
link_directories
(
${
LibXML++_LIBRARY_DIRS
}
)
set
(
LIBS
${
LIBS
}
${
LibXML++_LIBRARIES
}
)
find_package
(
Loki REQUIRED QUIET
)
set
(
LIBS
${
LIBS
}
loki
)
include_directories
(
${
CMAKE_SOURCE_DIR
}
)
add_definitions
(
-DLIBWCCL_WCCLRUN_DATA_DIR=
"
${
PROJECT_SOURCE_DIR
}
/"
)
...
...
This diff is collapsed.
Click to expand it.
wcclrules/main.cpp
+
32
−
7
View file @
bd777f2f
...
...
@@ -7,6 +7,8 @@
#include
<libwccl/parser/Parser.h>
#include
<libwccl/ops/rulesequence.h>
#include
<libcorpus2/tagsetmanager.h>
#include
<libcorpus2/util/tokentimer.h>
#include
<boost/bind.hpp>
#include
<boost/algorithm/string.hpp>
...
...
@@ -20,6 +22,7 @@
namespace
{
bool
quiet
=
false
;
bool
progress
=
false
;
struct
options
{
bool
first
;
...
...
@@ -39,7 +42,10 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
ret
=
parser
.
parseRuleSequence
(
is
);
if
(
ret
)
{
std
::
cerr
<<
ret
->
size
()
<<
"
\n
"
;
if
(
!
quiet
)
{
std
::
cerr
<<
"Loaded "
<<
ret
->
size
()
<<
" rule(s) from "
<<
filename
<<
"
\n
"
;
}
std
::
copy
(
ret
->
begin
(),
ret
->
end
(),
std
::
back_inserter
(
rules
));
return
true
;
}
else
{
...
...
@@ -69,13 +75,27 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
void
do_stream
(
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
,
const
Corpus2
::
Tagset
&
tagset
,
Wccl
::
RuleSequence
&
rules
,
std
::
istream
&
is
,
const
options
&
opts
)
{
Corpus2
::
XcesReader
xr
(
tagset
,
is
);
Corpus2
::
Sentence
::
Ptr
s
;
while
((
s
=
xr
.
get_next_sentence
()))
{
rules
.
execute_once
(
s
);
writer
->
write_sentence
(
*
s
);
Corpus2
::
XcesReader
reader
(
tagset
,
is
);
Corpus2
::
TokenTimer
&
timer
=
Corpus2
::
global_timer
();
while
(
boost
::
shared_ptr
<
Corpus2
::
Chunk
>
c
=
reader
.
get_next_chunk
())
{
foreach
(
boost
::
shared_ptr
<
Corpus2
::
Sentence
>&
s
,
c
->
sentences
())
{
if
(
opts
.
until_done
)
{
rules
.
execute_until_done
(
s
,
opts
.
until_done_iterations
);
}
else
{
rules
.
execute_once
(
s
);
}
timer
.
count_sentence
(
*
s
);
if
(
progress
)
{
timer
.
check_slice
();
}
if
(
opts
.
first
)
break
;
}
writer
->
write_chunk
(
*
c
);
if
(
opts
.
first
)
break
;
}
if
(
progress
)
{
timer
.
stats
();
}
}
...
...
@@ -106,6 +126,8 @@ int main(int argc, char** argv)
"Files to load, looking at the extension to determine type
\n
"
)
(
"output-format,o"
,
value
(
&
output_format
)
->
default_value
(
"xces"
),
writers_help
.
c_str
())
(
"progress,p"
,
value
(
&
progress
)
->
zero_tokens
(),
"Show progress info"
)
(
"quiet,q"
,
value
(
&
quiet
)
->
zero_tokens
(),
"Suppress messages
\n
"
)
(
"until-done,u"
,
value
(
&
opts
.
until_done
)
->
zero_tokens
(),
...
...
@@ -161,8 +183,11 @@ int main(int argc, char** argv)
}
}
if
(
!
rules
.
empty
())
{
Corpus2
::
TokenTimer
&
timer
=
Corpus2
::
global_timer
();
timer
.
register_signal_handler
();
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
;
writer
.
reset
(
Corpus2
::
TokenWriter
::
create
(
output_format
,
std
::
cout
,
tagset
));
foreach
(
const
std
::
string
&
f
,
corpora_files
)
{
writer
.
reset
(
Corpus2
::
TokenWriter
::
create
(
output_format
,
std
::
cout
,
tagset
));
foreach
(
const
std
::
string
&
f
,
corpora_files
)
{
std
::
ifstream
ifs
(
f
.
c_str
());
if
(
ifs
.
good
())
{
do_stream
(
writer
,
tagset
,
rules
,
ifs
,
opts
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment