Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
a3d5ae74
Commit
a3d5ae74
authored
14 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
rough util for running wccl rules
parent
a49d9fa1
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
CMakeLists.txt
+1
-0
1 addition, 0 deletions
CMakeLists.txt
wcclrules/CMakeLists.txt
+24
-0
24 additions, 0 deletions
wcclrules/CMakeLists.txt
wcclrules/main.cpp
+183
-0
183 additions, 0 deletions
wcclrules/main.cpp
with
208 additions
and
0 deletions
CMakeLists.txt
+
1
−
0
View file @
a3d5ae74
...
...
@@ -60,4 +60,5 @@ endif(MSVC OR BORLAND)
add_subdirectory
(
libwccl
)
add_subdirectory
(
wcclparser
)
add_subdirectory
(
wcclrun
)
add_subdirectory
(
wcclrules
)
add_subdirectory
(
tests
)
This diff is collapsed.
Click to expand it.
wcclrules/CMakeLists.txt
0 → 100644
+
24
−
0
View file @
a3d5ae74
PROJECT
(
wcclrules
)
find_package
(
LibXML++ REQUIRED
)
include_directories
(
${
LibXML++_INCLUDE_DIRS
}
)
link_directories
(
${
LibXML++_LIBRARY_DIRS
}
)
set
(
LIBS
${
LIBS
}
${
LibXML++_LIBRARIES
}
)
include_directories
(
${
CMAKE_SOURCE_DIR
}
)
add_definitions
(
-DLIBWCCL_WCCLRUN_DATA_DIR=
"
${
PROJECT_SOURCE_DIR
}
/"
)
add_executable
(
wcclrules
main.cpp
)
target_link_libraries
(
wcclrules wccl
${
Boost_LIBRARIES
}
antlr
${
LIBS
}
)
include_directories
(
${
Boost_INCLUDE_DIR
}
)
link_directories
(
${
Boost_LIBRARY_DIRS
}
)
if
(
UNIX
)
install
(
TARGETS wcclrules
RUNTIME DESTINATION bin
)
endif
(
UNIX
)
This diff is collapsed.
Click to expand it.
wcclrules/main.cpp
0 → 100644
+
183
−
0
View file @
a3d5ae74
#include
<cstdlib>
#include
<fstream>
#include
<iomanip>
#include
<libwccl/values/strset.h>
#include
<libwccl/parser/Parser.h>
#include
<libwccl/ops/rulesequence.h>
#include
<libcorpus2/tagsetmanager.h>
#include
<boost/bind.hpp>
#include
<boost/algorithm/string.hpp>
#include
<boost/make_shared.hpp>
#include
<boost/program_options.hpp>
#include
<libcorpus2/io/xcesreader.h>
#include
<libcorpus2/io/xceswriter.h>
#include
<antlr/NoViableAltException.hpp>
#include
<antlr/MismatchedTokenException.hpp>
namespace
{
bool
quiet
=
false
;
struct
options
{
bool
first
;
bool
until_done
;
int
until_done_iterations
;
};
}
bool
load_more_rules
(
Wccl
::
Parser
&
parser
,
const
std
::
string
&
filename
,
Wccl
::
RuleSequence
&
rules
)
{
boost
::
shared_ptr
<
Wccl
::
RuleSequence
>
ret
;
try
{
std
::
ifstream
is
(
filename
.
c_str
());
if
(
!
is
.
good
())
{
throw
Wccl
::
FileNotFound
(
filename
,
""
,
__FUNCTION__
);
}
ret
=
parser
.
parseRuleSequence
(
is
);
if
(
ret
)
{
std
::
cerr
<<
ret
->
size
()
<<
"
\n
"
;
std
::
copy
(
ret
->
begin
(),
ret
->
end
(),
std
::
back_inserter
(
rules
));
return
true
;
}
else
{
std
::
cerr
<<
"Problem while parsing -- "
<<
"parser returned NULL!"
<<
std
::
endl
;
}
}
catch
(
antlr
::
MismatchedTokenException
&
e
)
{
std
::
cerr
<<
e
.
getFileLineColumnString
()
<<
" "
<<
e
.
getMessage
()
<<
std
::
endl
;
}
catch
(
antlr
::
NoViableAltException
&
e
)
{
std
::
cerr
<<
e
.
getFileLineColumnString
()
<<
" "
<<
e
.
getMessage
()
<<
std
::
endl
;
}
catch
(
Wccl
::
InvalidVariableName
&
e
)
{
std
::
cerr
<<
"Wccl::InvalidVariableName "
<<
e
.
info
()
<<
std
::
endl
;
}
catch
(
Wccl
::
VariableTypeMismatch
&
e
)
{
std
::
cerr
<<
"Wccl::VariableTypeMismatch "
<<
e
.
info
()
<<
std
::
endl
;
}
catch
(
Wccl
::
WcclError
&
e
)
{
std
::
cerr
<<
"Wccl::WcclError:"
<<
e
.
info
()
<<
std
::
endl
;
}
catch
(
PwrNlp
::
PwrNlpError
&
e
)
{
std
::
cerr
<<
"PwrNlp::PwrNlpError "
<<
e
.
info
()
<<
std
::
endl
;
}
catch
(
antlr
::
ANTLRException
&
e
)
{
std
::
cerr
<<
"Antlr error "
<<
e
.
getMessage
()
<<
std
::
endl
;
}
return
false
;
}
void
do_stream
(
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
,
const
Corpus2
::
Tagset
&
tagset
,
Wccl
::
RuleSequence
&
rules
,
std
::
istream
&
is
,
const
options
&
opts
)
{
Corpus2
::
XcesReader
xr
(
tagset
,
is
);
Corpus2
::
Sentence
::
Ptr
s
;
while
((
s
=
xr
.
get_next_sentence
()))
{
rules
.
execute_once
(
s
);
writer
->
write_sentence
(
*
s
);
if
(
opts
.
first
)
break
;
}
}
int
main
(
int
argc
,
char
**
argv
)
{
std
::
string
tagset_load
=
"kipi"
;
std
::
string
output_format
;
options
opts
;
opts
.
first
=
false
;
opts
.
until_done
=
false
;
opts
.
until_done_iterations
=
1000
;
std
::
vector
<
std
::
string
>
corpora_files
,
ccl_files
,
files
;
bool
corpus_stdin
=
true
;
using
boost
::
program_options
::
value
;
std
::
string
writers
=
boost
::
algorithm
::
join
(
Corpus2
::
TokenWriter
::
available_writer_types_help
(),
" "
);
std
::
string
writers_help
=
"Output format, any of: "
+
writers
+
"
\n
"
;
boost
::
program_options
::
options_description
desc
(
"Allowed options"
);
desc
.
add_options
()
(
"tagset,t"
,
value
(
&
tagset_load
),
"Tagset to use
\n
"
)
(
"corpus,c"
,
value
(
&
corpora_files
),
"Corpus file to load (XCES), do not load from stdin
\n
"
)
(
"ccl-file,C"
,
value
(
&
ccl_files
),
"CCL rule files
\n
"
)
(
"files,f"
,
value
(
&
files
),
"Files to load, looking at the extension to determine type
\n
"
)
(
"output-format,o"
,
value
(
&
output_format
)
->
default_value
(
"xces"
),
writers_help
.
c_str
())
(
"quiet,q"
,
value
(
&
quiet
)
->
zero_tokens
(),
"Suppress messages
\n
"
)
(
"until-done,u"
,
value
(
&
opts
.
until_done
)
->
zero_tokens
(),
"Until-done mode
\n
"
)
(
"until-done-iterations"
,
value
(
&
opts
.
until_done_iterations
),
"Until-done iteration limit
\n
"
)
(
"first-sentence-only,1"
,
value
(
&
opts
.
first
)
->
zero_tokens
(),
"Only process first sentence
\n
"
)
(
"help,h"
,
"Show help"
)
;
boost
::
program_options
::
variables_map
vm
;
boost
::
program_options
::
positional_options_description
p
;
p
.
add
(
"files"
,
-
1
);
try
{
boost
::
program_options
::
store
(
boost
::
program_options
::
command_line_parser
(
argc
,
argv
)
.
options
(
desc
).
positional
(
p
).
run
(),
vm
);
}
catch
(
boost
::
program_options
::
error
&
e
)
{
std
::
cerr
<<
e
.
what
()
<<
std
::
endl
;
return
2
;
}
boost
::
program_options
::
notify
(
vm
);
if
(
vm
.
count
(
"help"
))
{
std
::
cerr
<<
"Usage "
<<
argv
[
0
]
<<
" [OPTIONS] FILES
\n
"
<<
"Files ending with .xml are treated as corpora, otherwise
\n
"
<<
"as CCL files. Use - to read corpus from stdin (as with -I)"
;
std
::
cout
<<
desc
<<
"
\n
"
;
return
1
;
}
foreach
(
const
std
::
string
&
f
,
files
)
{
if
(
boost
::
algorithm
::
ends_with
(
f
,
".xml"
))
{
corpora_files
.
push_back
(
f
);
corpus_stdin
=
false
;
}
else
{
ccl_files
.
push_back
(
f
);
}
}
try
{
const
Corpus2
::
Tagset
&
tagset
=
Corpus2
::
get_named_tagset
(
tagset_load
);
Wccl
::
Parser
parser
(
tagset
);
Wccl
::
RuleSequence
rules
;
foreach
(
const
std
::
string
&
f
,
ccl_files
)
{
size_t
sz
=
rules
.
size
();
if
(
!
load_more_rules
(
parser
,
f
,
rules
))
{
std
::
cerr
<<
"Warning: error while parsing "
<<
f
<<
"
\n
"
;
}
if
(
rules
.
size
()
==
sz
)
{
std
::
cerr
<<
"Warning: no rules loaded from "
<<
f
<<
"
\n
"
;
}
}
if
(
!
rules
.
empty
())
{
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
;
writer
.
reset
(
Corpus2
::
TokenWriter
::
create
(
output_format
,
std
::
cout
,
tagset
));
foreach
(
const
std
::
string
&
f
,
corpora_files
)
{
std
::
ifstream
ifs
(
f
.
c_str
());
if
(
ifs
.
good
())
{
do_stream
(
writer
,
tagset
,
rules
,
ifs
,
opts
);
}
else
{
std
::
cerr
<<
"Error reading corpus from "
<<
f
<<
"
\n
"
;
}
}
if
(
corpus_stdin
)
{
do_stream
(
writer
,
tagset
,
rules
,
std
::
cin
,
opts
);
}
}
}
catch
(
PwrNlp
::
PwrNlpError
&
e
)
{
std
::
cerr
<<
e
.
info
()
<<
std
::
endl
;
return
2
;
}
return
0
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment