Commit df506c18 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski

Use custom annotation in python script, prepare testing environment scripts

parent 3a1dcc84
FROM clarinpl/python:3.6
RUN apt-get update && apt-get install -y \
libxml++2.6-dev \
libloki-dev \
libboost-all-dev \
libicu-dev \
libffi-dev \
libssl-dev \
libxml2-utils \
cmake \
swig \
pwrutils \
gdebi-core \
antlr \
libantlr-dev \
default-jdk \
git
RUN mkdir -p /home/install
WORKDIR /home/install
RUN bash -c "wget -q -O - http://apt.clarin-pl.eu/KEY.gpg | apt-key add -"
RUN bash -c "echo 'deb https://apt.clarin-pl.eu/ /' > /etc/apt/sources.list.d/clarin.list"
# Install corpus2
RUN apt-get update && apt-get install -y \
corpus2-python3.6
# Install wccl from branch with change
WORKDIR /home/install
RUN git clone --single-branch \
--branch param_ann \
https://gitlab.clarin-pl.eu/analysers/wccl.git
RUN mkdir wccl/src/build && \
cat wccl/src/libmwereader/mwereader.cpp && \
cd wccl/src/build && \
cmake .. && \
make -j 8 && \
make install && \
ldconfig
# install corpus2mwe from this repository (branch param_ann)
WORKDIR /home/install
COPY ./ ./corpus2mwe
RUN mkdir corpus2mwe/build && \
cd corpus2mwe/build && \
cmake .. && \
make -j 8 && \
make install && \
ldconfig
RUN pip install corpus_ccl
#!/bin/bash
# docker build ../../.. -t corpus2mwe_wccl_param_ann -f Dockerfile
docker run --rm corpus2mwe_wccl_param_ann bash -c '
cd corpus2mwe/tools
echo "Without setting annotation name:"
python mwe_converter.py -c /home/install/corpus2mwe/cclmwe/tests/testdata/ccl.xml \
-o /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out.xml
cat /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out.xml
echo "With annotation test_ann:"
python mwe_converter.py -c /home/install/corpus2mwe/cclmwe/tests/testdata/ccl.xml \
-o /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out2.xml \
-a test_ann
cat /home/install/corpus2mwe/cclmwe/tests/testdata/ccl_out2.xml
'
\ No newline at end of file
......@@ -11,21 +11,25 @@ def parser():
aparser.add_argument('-o', '--output', dest='out_file', required=True)
aparser.add_argument('-m', '--mwe_merged', action='store_false')
aparser.add_argument('-t', '--tagset', default='nkjp')
aparser.add_argument('-a', '--annotation', default='mwe',
help='Name of annotation to set')
return aparser
class MWEConverter(object):
def __init__(self, tagset='nkjp'):
def __init__(self, tagset='nkjp', annotation='mwe'):
self.reader = None
if tagset:
self.tagset = ccl.get_tagset(tagset)
else:
self.tagset = ccl.get_tagset('nkjp')
self.annotation = annotation
def convert(self, ccl_file, out_mwe_file, annots_used=False):
if not self.reader:
self.reader = mwe.CclMWEReader(ccl_file, self.tagset)
self.reader = mwe.CclMWEReader(ccl_file, self.tagset, '',
self.annotation)
self.reader.use_annotations(annots_used)
else:
self.reader.set_files(ccl_file)
......@@ -37,7 +41,7 @@ def main(argv=None):
aparser = parser()
args = aparser.parse_args(argv)
converter = MWEConverter(tagset=args.tagset)
converter = MWEConverter(tagset=args.tagset, annotation=args.annotation)
converter.convert(args.ccl_file, args.out_file, args.mwe_merged)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment