Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Arkadiusz Janz
corpus2mwe
Commits
df506c18
Commit
df506c18
authored
Feb 17, 2021
by
Grzegorz Kostkowski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use custom annotation in python script, prepare testing environment scripts
parent
3a1dcc84
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
75 additions
and
3 deletions
+75
-3
src/cclmwe/tests/custom_annotations/Dockerfile
src/cclmwe/tests/custom_annotations/Dockerfile
+53
-0
src/cclmwe/tests/custom_annotations/test.sh
src/cclmwe/tests/custom_annotations/test.sh
+15
-0
src/tools/mwe_converter.py
src/tools/mwe_converter.py
+7
-3
No files found.
src/cclmwe/tests/custom_annotations/Dockerfile
0 → 100644
View file @
df506c18
FROM
clarinpl/python:3.6
RUN
apt-get update
&&
apt-get
install
-y
\
libxml++2.6-dev
\
libloki-dev
\
libboost-all-dev
\
libicu-dev
\
libffi-dev
\
libssl-dev
\
libxml2-utils
\
cmake
\
swig
\
pwrutils
\
gdebi-core
\
antlr
\
libantlr-dev
\
default-jdk
\
git
RUN
mkdir
-p
/home/install
WORKDIR
/home/install
RUN
bash
-c
"wget -q -O - http://apt.clarin-pl.eu/KEY.gpg | apt-key add -"
RUN
bash
-c
"echo 'deb https://apt.clarin-pl.eu/ /' > /etc/apt/sources.list.d/clarin.list"
# Install corpus2
RUN
apt-get update
&&
apt-get
install
-y
\
corpus2-python3.6
# Install wccl from branch with change
WORKDIR
/home/install
RUN
git clone
--single-branch
\
--branch
param_ann
\
https://gitlab.clarin-pl.eu/analysers/wccl.git
RUN
mkdir
wccl/src/build
&&
\
cat
wccl/src/libmwereader/mwereader.cpp
&&
\
cd
wccl/src/build
&&
\
cmake ..
&&
\
make
-j
8
&&
\
make
install
&&
\
ldconfig
# install corpus2mwe from this repository (branch param_ann)
WORKDIR
/home/install
COPY
./ ./corpus2mwe
RUN
mkdir
corpus2mwe/build
&&
\
cd
corpus2mwe/build
&&
\
cmake ..
&&
\
make
-j
8
&&
\
make
install
&&
\
ldconfig
RUN
pip
install
corpus_ccl
src/cclmwe/tests/custom_annotations/test.sh
0 → 100755
View file @
df506c18
#!/bin/bash
# docker build ../../.. -t corpus2mwe_wccl_param_ann -f Dockerfile
docker run
--rm
corpus2mwe_wccl_param_ann bash
-c
'
cd corpus2mwe/tools
echo "Without setting annotation name:"
python mwe_converter.py -c /home/install/corpus2mwe/cclmwe/tests/testdata/ccl.xml \
-o /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out.xml
cat /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out.xml
echo "With annotation test_ann:"
python mwe_converter.py -c /home/install/corpus2mwe/cclmwe/tests/testdata/ccl.xml \
-o /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out2.xml \
-a test_ann
cat /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out2.xml
'
\ No newline at end of file
src/tools/mwe_converter.py
View file @
df506c18
...
...
@@ -11,21 +11,25 @@ def parser():
aparser
.
add_argument
(
'-o'
,
'--output'
,
dest
=
'out_file'
,
required
=
True
)
aparser
.
add_argument
(
'-m'
,
'--mwe_merged'
,
action
=
'store_false'
)
aparser
.
add_argument
(
'-t'
,
'--tagset'
,
default
=
'nkjp'
)
aparser
.
add_argument
(
'-a'
,
'--annotation'
,
default
=
'mwe'
,
help
=
'Name of annotation to set'
)
return
aparser
class
MWEConverter
(
object
):
def
__init__
(
self
,
tagset
=
'nkjp'
):
def
__init__
(
self
,
tagset
=
'nkjp'
,
annotation
=
'mwe'
):
self
.
reader
=
None
if
tagset
:
self
.
tagset
=
ccl
.
get_tagset
(
tagset
)
else
:
self
.
tagset
=
ccl
.
get_tagset
(
'nkjp'
)
self
.
annotation
=
annotation
def
convert
(
self
,
ccl_file
,
out_mwe_file
,
annots_used
=
False
):
if
not
self
.
reader
:
self
.
reader
=
mwe
.
CclMWEReader
(
ccl_file
,
self
.
tagset
)
self
.
reader
=
mwe
.
CclMWEReader
(
ccl_file
,
self
.
tagset
,
''
,
self
.
annotation
)
self
.
reader
.
use_annotations
(
annots_used
)
else
:
self
.
reader
.
set_files
(
ccl_file
)
...
...
@@ -37,7 +41,7 @@ def main(argv=None):
aparser
=
parser
()
args
=
aparser
.
parse_args
(
argv
)
converter
=
MWEConverter
(
tagset
=
args
.
tagset
)
converter
=
MWEConverter
(
tagset
=
args
.
tagset
,
annotation
=
args
.
annotation
)
converter
.
convert
(
args
.
ccl_file
,
args
.
out_file
,
args
.
mwe_merged
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment