Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Arkadiusz Janz
corpus2mwe
Commits
14366fbc
Commit
14366fbc
authored
Apr 03, 2020
by
Arkadiusz Janz
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
initial, moved from our old git
parent
4f2054e7
Changes
33
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
33 changed files
with
2448452 additions
and
0 deletions
+2448452
-0
CMakeLists.txt
CMakeLists.txt
+23
-0
CMakeScripts/FindCorpus2.cmake
CMakeScripts/FindCorpus2.cmake
+49
-0
CMakeScripts/FindCorpus2Whole.cmake
CMakeScripts/FindCorpus2Whole.cmake
+22
-0
CMakeScripts/FindGlib.cmake
CMakeScripts/FindGlib.cmake
+39
-0
CMakeScripts/FindGlibmm.cmake
CMakeScripts/FindGlibmm.cmake
+38
-0
CMakeScripts/FindICU.cmake
CMakeScripts/FindICU.cmake
+78
-0
CMakeScripts/FindLibXML++.cmake
CMakeScripts/FindLibXML++.cmake
+41
-0
CMakeScripts/FindLibXML2.cmake
CMakeScripts/FindLibXML2.cmake
+90
-0
CMakeScripts/FindMWEReader.cmake
CMakeScripts/FindMWEReader.cmake
+26
-0
CMakeScripts/FindSigC++.cmake
CMakeScripts/FindSigC++.cmake
+34
-0
CMakeScripts/LibFindMacros.cmake
CMakeScripts/LibFindMacros.cmake
+99
-0
README.md
README.md
+137
-0
cclmwe/CMakeLists.txt
cclmwe/CMakeLists.txt
+84
-0
cclmwe/cclmwereader.cpp
cclmwe/cclmwereader.cpp
+81
-0
cclmwe/cclmwereader.h
cclmwe/cclmwereader.h
+52
-0
cclmwe/mwemanager.cpp
cclmwe/mwemanager.cpp
+30
-0
cclmwe/mwemanager.h
cclmwe/mwemanager.h
+25
-0
cclmwe/tests/CMakeLists.txt
cclmwe/tests/CMakeLists.txt
+50
-0
cclmwe/tests/ccl_gz_tests/__init__.py
cclmwe/tests/ccl_gz_tests/__init__.py
+0
-0
cclmwe/tests/ccl_gz_tests/documents.py
cclmwe/tests/ccl_gz_tests/documents.py
+567
-0
cclmwe/tests/ccl_gz_tests/testMWE.py
cclmwe/tests/ccl_gz_tests/testMWE.py
+216
-0
cclmwe/tests/cclmwe_test.cpp
cclmwe/tests/cclmwe_test.cpp
+183
-0
cclmwe/tests/testdata/ccl.xml
cclmwe/tests/testdata/ccl.xml
+45
-0
cclmwe/tests/testdata/ccl1.xml
cclmwe/tests/testdata/ccl1.xml
+45
-0
cclmwe/tests/testdata/ccl2.xml
cclmwe/tests/testdata/ccl2.xml
+81
-0
data/lod.xml
data/lod.xml
+2216031
-0
data/mwe.xml
data/mwe.xml
+230053
-0
install.sh
install.sh
+14
-0
swig/CMakeLists.txt
swig/CMakeLists.txt
+92
-0
swig/cclmwereader.i
swig/cclmwereader.i
+49
-0
swig/corpus2mwe.i
swig/corpus2mwe.i
+11
-0
swig/mwemanager.i
swig/mwemanager.i
+22
-0
tools/mwe_converter.py
tools/mwe_converter.py
+45
-0
No files found.
CMakeLists.txt
0 → 100644
View file @
14366fbc
project
(
corpus2mwe
)
cmake_minimum_required
(
VERSION 2.8
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++0x"
)
set
(
corpus2_mwe_ver_major
"0"
)
set
(
corpus2_mwe_ver_minor
"1"
)
set
(
corpus2_mwe_ver_patch
"0"
)
set
(
CMAKE_MODULE_PATH
${
CMAKE_SOURCE_DIR
}
/CMakeScripts
)
set
(
CMAKE_BUILD_TYPE Debug
)
set
(
LIBS
""
)
include_directories
(
${
corpus2mwe_SOURCE_DIR
}
)
add_subdirectory
(
cclmwe
)
find_package
(
SWIG
)
if
(
SWIG_FOUND
)
message
(
STATUS
"SWIG found - building Python wrappers"
)
add_subdirectory
(
swig
)
else
()
message
(
STATUS
"WARNING: SWIG not found - Python wrappers will not be built"
)
endif
(
SWIG_FOUND
)
CMakeScripts/FindCorpus2.cmake
0 → 100644
View file @
14366fbc
IF
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
#Already in cache
SET
(
Corpus2_FOUND TRUE
)
ELSE
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
FIND_PATH
(
Corpus2_INCLUDE_DIR libcorpus2/token.h /usr/include /usr/local/include
)
FIND_LIBRARY
(
Corpus2_LIBRARY corpus2 /usr/lib /usr/local/lib
)
MARK_AS_ADVANCED
(
Corpus2_LIBRARY
)
MARK_AS_ADVANCED
(
Corpus2_INCLUDE_DIR
)
IF
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
SET
(
Corpus2_FOUND TRUE
)
ENDIF
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
ENDIF
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
IF
(
Corpus2_FOUND
)
set
(
Corpus2_VERSION 0.0.0
)
FIND_FILE
(
_Corpus2_VERSION_FILE libcorpus2/version.h
${
Corpus2_INCLUDE_DIR
}
)
MARK_AS_ADVANCED
(
_Corpus2_VERSION_FILE
)
IF
(
_Corpus2_VERSION_FILE
)
FILE
(
READ
${
_Corpus2_VERSION_FILE
}
_Corpus2_VERSION_CONENTS
)
STRING
(
REGEX REPLACE
".*#define LIBCORPUS2_VERSION
\\\"
([0-9.]+)
\\\"
.*"
"
\\
1"
Corpus2_VERSION
"
${
_Corpus2_VERSION_CONENTS
}
"
)
ENDIF
(
_Corpus2_VERSION_FILE
)
IF
(
Corpus2_FIND_VERSION
)
IF
(
Corpus2_VERSION VERSION_LESS Corpus2_FIND_VERSION
)
IF
(
Corpus2_FIND_REQUIRED
)
MESSAGE
(
${
_Corpus2_VERSION_FILE
}
)
MESSAGE
(
FATAL_ERROR
"Corpus2 version too old:
${
Corpus2_VERSION
}
, requested >=
${
Corpus2_FIND_VERSION
}
"
)
ELSE
(
Corpus2_FIND_REQUIRED
)
IF
(
NOT Corpus2_FIND_QUIETLY
)
MESSAGE
(
STATUS
"Corpus2 version too old:
${
Corpus2_VERSION
}
, requested >=
${
Corpus2_FIND_VERSION
}
"
)
ENDIF
(
NOT Corpus2_FIND_QUIETLY
)
ENDIF
(
Corpus2_FIND_REQUIRED
)
set
(
Corpus2_FOUND False
)
ENDIF
(
Corpus2_VERSION VERSION_LESS Corpus2_FIND_VERSION
)
ENDIF
(
Corpus2_FIND_VERSION
)
IF
(
NOT Corpus2_FIND_QUIETLY
)
MESSAGE
(
STATUS
"Found libcorpus2
${
Corpus2_VERSION
}
:
${
Corpus2_LIBRARY
}
"
)
ENDIF
(
NOT Corpus2_FIND_QUIETLY
)
ELSE
(
Corpus2_FOUND
)
IF
(
Corpus2_FIND_REQUIRED
)
MESSAGE
(
FATAL_ERROR
"Could not find libcorpus2"
)
ELSE
(
Corpus2_FIND_REQUIRED
)
MESSAGE
(
STATUS
"libcorpus2 not found"
)
ENDIF
(
Corpus2_FIND_REQUIRED
)
ENDIF
(
Corpus2_FOUND
)
CMakeScripts/FindCorpus2Whole.cmake
0 → 100644
View file @
14366fbc
IF
(
Corpus2Whole_INCLUDE_DIR AND Corpus2Whole_LIBRARY
)
#Already in cache
SET
(
Corpus2Whole_FOUND TRUE
)
ELSE
(
Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY
)
FIND_PATH
(
Corpus2Whole_INCLUDE_DIR libcorpus2_whole/document.h /usr/include /usr/local/include
)
FIND_LIBRARY
(
Corpus2Whole_LIBRARY corpus2_whole /usr/lib /usr/local/lib
)
MARK_AS_ADVANCED
(
Corpus2Whole_LIBRARY
)
MARK_AS_ADVANCED
(
Corpus2Whole_INCLUDE_DIR
)
IF
(
Corpus2Whole_INCLUDE_DIR AND Corpus2Whole_LIBRARY
)
SET
(
Corpus2Whole_FOUND TRUE
)
ENDIF
(
Corpus2Whole_INCLUDE_DIR AND Corpus2Whole_LIBRARY
)
ENDIF
(
Corpus2Whole_INCLUDE_DIR AND Corpus2Whole_LIBRARY
)
IF
(
Corpus2Whole_FOUND
)
MESSAGE
(
STATUS
"Found libcorpus2_whole"
)
ELSE
(
Corpus2Whole_FOUND
)
MESSAGE
(
FATAL_ERROR
"Could not find libcorpus2_whole"
)
ENDIF
(
Corpus2Whole_FOUND
)
CMakeScripts/FindGlib.cmake
0 → 100644
View file @
14366fbc
# - Try to find Glib-2.0 (with gobject)
# Once done, this will define
#
# Glib_FOUND - system has Glib
# Glib_INCLUDE_DIRS - the Glib include directories
# Glib_LIBRARIES - link these to use Glib
include
(
LibFindMacros
)
# Use pkg-config to get hints about paths
libfind_pkg_check_modules
(
Glib_PKGCONF glib-2.0
)
# Main include dir
find_path
(
Glib_INCLUDE_DIR
NAMES glib.h
PATHS
${
Glib_PKGCONF_INCLUDE_DIRS
}
PATH_SUFFIXES glib-2.0
)
# Glib-related libraries also use a separate config header, which is in lib dir
find_path
(
GlibConfig_INCLUDE_DIR
NAMES glibconfig.h
PATHS
${
Glib_PKGCONF_INCLUDE_DIRS
}
/usr
PATH_SUFFIXES lib/glib-2.0/include
)
# Finally the library itself
find_library
(
Glib_LIBRARY
NAMES glib-2.0
PATHS
${
Glib_PKGCONF_LIBRARY_DIRS
}
)
# Set the include dir variables and the libraries and let libfind_process do the rest.
# NOTE: Singular variables for this library, plural for libraries this this lib depends on.
set
(
Glib_PROCESS_INCLUDES Glib_INCLUDE_DIR GlibConfig_INCLUDE_DIR
)
set
(
Glib_PROCESS_LIBS Glib_LIBRARY
)
libfind_process
(
Glib
)
CMakeScripts/FindGlibmm.cmake
0 → 100644
View file @
14366fbc
# - Try to find Glibmm-2.4
# Once done, this will define
#
# Glibmm_FOUND - system has Glibmm
# Glibmm_INCLUDE_DIRS - the Glibmm include directories
# Glibmm_LIBRARIES - link these to use Glibmm
include
(
LibFindMacros
)
# Dependencies
libfind_package
(
Glibmm Glib
)
libfind_package
(
Glibmm SigC++
)
# Use pkg-config to get hints about paths
libfind_pkg_check_modules
(
Glibmm_PKGCONF glibmm-2.4
)
# Main include dir
find_path
(
Glibmm_INCLUDE_DIR
NAMES glibmm/main.h
PATHS
${
Glibmm_PKGCONF_INCLUDE_DIRS
}
PATH_SUFFIXES glibmm-2.4
)
# Glib-related libraries also use a separate config header, which is in lib dir
find_path
(
GlibmmConfig_INCLUDE_DIR
NAMES glibmmconfig.h
PATHS
${
Glibmm_PKGCONF_INCLUDE_DIRS
}
/usr
PATH_SUFFIXES lib/glibmm-2.4/include
)
libfind_library
(
Glibmm glibmm 2.4
)
# Set the include dir variables and the libraries and let libfind_process do the rest.
# NOTE: Singular variables for this library, plural for libraries this this lib depends on.
set
(
Glibmm_PROCESS_INCLUDES Glibmm_INCLUDE_DIR GlibmmConfig_INCLUDE_DIR Glib_INCLUDE_DIRS SigC++_INCLUDE_DIRS
)
set
(
Glibmm_PROCESS_LIBS Glibmm_LIBRARY Glib_LIBRARIES SigC++_LIBRARIES
)
libfind_process
(
Glibmm
)
CMakeScripts/FindICU.cmake
0 → 100644
View file @
14366fbc
# Finds the International Components for Unicode (ICU) Library
#
# ICU_FOUND - True if ICU found.
# ICU_I18N_FOUND - True if ICU's internationalization library found.
# ICU_INCLUDE_DIRS - Directory to include to get ICU headers
# Note: always include ICU headers as, e.g.,
# unicode/utypes.h
# ICU_LIBRARIES - Libraries to link against for the common ICU
# ICU_I18N_LIBRARIES - Libraries to link against for ICU internationaliation
# (note: in addition to ICU_LIBRARIES)
MARK_AS_ADVANCED
(
ICU_DIR
)
# Look for the header file.
find_path
(
ICU_INCLUDE_DIR
NAMES unicode/utypes.h
DOC
"Include directory for the ICU library"
)
mark_as_advanced
(
ICU_INCLUDE_DIR
)
# Look for the library.
find_library
(
ICU_LIBRARY
NAMES icuuc cygicuuc cygicuuc32
DOC
"Libraries to link against for the common parts of ICU"
)
mark_as_advanced
(
ICU_LIBRARY
)
# Copy the results to the output variables.
if
(
ICU_INCLUDE_DIR AND ICU_LIBRARY
)
set
(
ICU_FOUND 1
)
set
(
ICU_LIBRARIES
${
ICU_LIBRARY
}
)
set
(
ICU_INCLUDE_DIRS
${
ICU_INCLUDE_DIR
}
)
set
(
ICU_VERSION 0
)
set
(
ICU_MAJOR_VERSION 0
)
set
(
ICU_MINOR_VERSION 0
)
FILE
(
READ
"
${
ICU_INCLUDE_DIR
}
/unicode/uversion.h"
_ICU_VERSION_CONENTS
)
STRING
(
REGEX REPLACE
".*#define U_ICU_VERSION_MAJOR_NUM ([0-9]+).*"
"
\\
1"
ICU_MAJOR_VERSION
"
${
_ICU_VERSION_CONENTS
}
"
)
STRING
(
REGEX REPLACE
".*#define U_ICU_VERSION_MINOR_NUM ([0-9]+).*"
"
\\
1"
ICU_MINOR_VERSION
"
${
_ICU_VERSION_CONENTS
}
"
)
set
(
ICU_VERSION
"
${
ICU_MAJOR_VERSION
}
.
${
ICU_MINOR_VERSION
}
"
)
# Look for the ICU internationalization libraries
find_library
(
ICU_I18N_LIBRARY
NAMES icuin icui18n cygicuin cygicuin32
DOC
"Libraries to link against for ICU internationalization"
)
mark_as_advanced
(
ICU_I18N_LIBRARY
)
if
(
ICU_I18N_LIBRARY
)
set
(
ICU_I18N_FOUND 1
)
set
(
ICU_I18N_LIBRARIES
${
ICU_I18N_LIBRARY
}
)
else
(
ICU_I18N_LIBRARY
)
set
(
ICU_I18N_FOUND 0
)
set
(
ICU_I18N_LIBRARIES
)
endif
(
ICU_I18N_LIBRARY
)
else
(
ICU_INCLUDE_DIR AND ICU_LIBRARY
)
set
(
ICU_FOUND 0
)
set
(
ICU_I18N_FOUND 0
)
set
(
ICU_LIBRARIES
)
set
(
ICU_I18N_LIBRARIES
)
set
(
ICU_INCLUDE_DIRS
)
set
(
ICU_VERSION
)
set
(
ICU_MAJOR_VERSION
)
set
(
ICU_MINOR_VERSION
)
endif
(
ICU_INCLUDE_DIR AND ICU_LIBRARY
)
IF
(
ICU_FOUND
)
IF
(
NOT ICU_FIND_QUIETLY
)
MESSAGE
(
STATUS
"Found ICU header files in
${
ICU_INCLUDE_DIRS
}
"
)
MESSAGE
(
STATUS
"Found ICU libraries:
${
ICU_LIBRARIES
}
"
)
ENDIF
(
NOT ICU_FIND_QUIETLY
)
ELSE
(
ICU_FOUND
)
IF
(
ICU_FIND_REQUIRED
)
MESSAGE
(
FATAL_ERROR
"Could not find ICU"
)
ELSE
(
ICU_FIND_REQUIRED
)
MESSAGE
(
STATUS
"Optional package ICU was not found"
)
ENDIF
(
ICU_FIND_REQUIRED
)
ENDIF
(
ICU_FOUND
)
CMakeScripts/FindLibXML++.cmake
0 → 100644
View file @
14366fbc
# - Try to find LibXML++ 2.6
# Once done, this will define
#
# LibXML++_FOUND - system has LibXML++
# LibXML++_INCLUDE_DIRS - the LibXML++ include directories
# LibXML++_LIBRARIES - link these to use LibXML++
include
(
LibFindMacros
)
# Dependencies
libfind_package
(
LibXML++ LibXML2
)
libfind_package
(
LibXML++ Glibmm
)
# Use pkg-config to get hints about paths
libfind_pkg_check_modules
(
LibXML++_PKGCONF libxml++-2.6
)
# Main include dir
find_path
(
LibXML++_INCLUDE_DIR
NAMES libxml++/libxml++.h
PATHS
${
LibXML++_PKGCONF_INCLUDE_DIRS
}
PATH_SUFFIXES libxml++-2.6
)
# Glib-related libraries also use a separate config header, which is in lib dir
find_path
(
LibXML++Config_INCLUDE_DIR
NAMES libxml++config.h
PATHS
${
LibXML++_PKGCONF_INCLUDE_DIRS
}
/usr
PATH_SUFFIXES lib/libxml++-2.6/include
)
# Finally the library itself
find_library
(
LibXML++_LIBRARY
NAMES xml++-2.6
PATHS
${
LibXML++_PKGCONF_LIBRARY_DIRS
}
)
# Set the include dir variables and the libraries and let libfind_process do the rest.
# NOTE: Singular variables for this library, plural for libraries this this lib depends on.
set
(
LibXML++_PROCESS_INCLUDES LibXML++_INCLUDE_DIR LibXML++Config_INCLUDE_DIR LibXML2_INCLUDE_DIRS Glibmm_INCLUDE_DIRS
)
set
(
LibXML++_PROCESS_LIBS LibXML++_LIBRARY LibXML2_LIBRARIES Glibmm_LIBRARIES
)
libfind_process
(
LibXML++
)
CMakeScripts/FindLibXML2.cmake
0 → 100644
View file @
14366fbc
# - Try to find libxml2
# Once done this will define
#
# LibXML2_FOUND - system has xml2
# LibXML2_INCLUDE_DIRS - the xml2 include directory
# LibXML2_LIBRARIES - Link these to use xml2
# LibXML2_DEFINITIONS - Compiler switches required for using xml2
#
# Copyright (c) 2008 Andreas Schneider <mail@cynapses.org>
# Modified for other libraries by Lasse Kärkkäinen <tronic>
#
# Redistribution and use is allowed according to the terms of the New
# BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
#
if
(
LibXML2_LIBRARIES AND LibXML2_INCLUDE_DIRS
)
# in cache already
set
(
LibXML2_FOUND TRUE
)
else
(
LibXML2_LIBRARIES AND LibXML2_INCLUDE_DIRS
)
# use pkg-config to get the directories and then use these values
# in the FIND_PATH() and FIND_LIBRARY() calls
if
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
include
(
UsePkgConfig
)
pkgconfig
(
libxml-2.0 _LibXML2_INCLUDEDIR _LibXML2_LIBDIR _LibXML2_LDFLAGS _LibXML2_CFLAGS
)
else
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
find_package
(
PkgConfig
)
if
(
PKG_CONFIG_FOUND
)
pkg_check_modules
(
_LIBXML2 libxml-2.0
)
endif
(
PKG_CONFIG_FOUND
)
endif
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
find_path
(
LibXML2_INCLUDE_DIR
NAMES
libxml/xpath.h
PATHS
${
_LibXML2_INCLUDEDIR
}
/usr/include
/usr/local/include
/opt/local/include
/sw/include
PATH_SUFFIXES
libxml2
)
find_library
(
LibXML2_LIBRARY
NAMES
xml2
PATHS
${
_LibXML2_LIBDIR
}
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
)
if
(
LibXML2_LIBRARY
)
set
(
LibXML2_FOUND TRUE
)
endif
(
LibXML2_LIBRARY
)
set
(
LibXML2_INCLUDE_DIRS
${
LibXML2_INCLUDE_DIR
}
)
if
(
LibXML2_FOUND
)
set
(
LibXML2_LIBRARIES
${
LibXML2_LIBRARIES
}
${
LibXML2_LIBRARY
}
)
endif
(
LibXML2_FOUND
)
if
(
LibXML2_INCLUDE_DIRS AND LibXML2_LIBRARIES
)
set
(
LibXML2_FOUND TRUE
)
endif
(
LibXML2_INCLUDE_DIRS AND LibXML2_LIBRARIES
)
if
(
LibXML2_FOUND
)
if
(
NOT LibXML2_FIND_QUIETLY
)
message
(
STATUS
"Found libxml2:
${
LibXML2_LIBRARY
}
"
)
endif
(
NOT LibXML2_FIND_QUIETLY
)
else
(
LibXML2_FOUND
)
if
(
LibXML2_FIND_REQUIRED
)
message
(
FATAL_ERROR
"Could not find libxml2"
)
endif
(
LibXML2_FIND_REQUIRED
)
endif
(
LibXML2_FOUND
)
# show the LibXML2_INCLUDE_DIRS and LibXML2_LIBRARIES variables only in the advanced view
mark_as_advanced
(
LibXML2_INCLUDE_DIRS LibXML2_LIBRARIES
)
endif
(
LibXML2_LIBRARIES AND LibXML2_INCLUDE_DIRS
)
CMakeScripts/FindMWEReader.cmake
0 → 100644
View file @
14366fbc
if
(
MWEReader_INCLUDE_DIR AND MWEReader_LIBRARY
)
set
(
MWEReader_FOUND TRUE
)
else
(
MWEReader_INCLUDE_DIR AND MWEReader_LIBRARY
)
find_path
(
MWEReader_INCLUDE_DIR libmwereader/mwe.h /usr/include /usr/local/include
)
find_library
(
MWEReader_LIBRARY NAMES corpus2_mwereader PATHS /usr/lib /usr/local/lib
)
mark_as_advanced
(
MWEReader_LIBRARY
)
mark_as_advanced
(
MWEReader_INCLUDE_DIR
)
if
(
MWEReader_INCLUDE_DIR AND MWEReader_LIBRARY
)
set
(
MWEReader_FOUND TRUE
)
endif
(
MWEReader_INCLUDE_DIR AND MWEReader_LIBRARY
)
endif
(
MWEReader_INCLUDE_DIR AND MWEReader_LIBRARY
)
if
(
MWEReader_FOUND
)
if
(
NOT MWEReader_FIND_QUIETLY
)
message
(
STATUS
"Found MWEReader:
${
MWEReader_LIBRARY
}
"
)
endif
(
NOT MWEReader_FIND_QUIETLY
)
else
(
MWEReader_FOUND
)
if
(
MWEReader_FIND_REQUIRED
)
message
(
FATAL ERROR
" Could not find MWEReader library"
)
else
(
MWEReader_FIND_REQUIRED
)
message
(
STATUS
" MWEReader not found"
)
endif
(
MWEReader_FIND_REQUIRED
)
endif
(
MWEReader_FOUND
)
CMakeScripts/FindSigC++.cmake
0 → 100644
View file @
14366fbc
# - Try to find SigC++-2.0
# Once done, this will define
#
# SigC++_FOUND - system has SigC++
# SigC++_INCLUDE_DIRS - the SigC++ include directories
# SigC++_LIBRARIES - link these to use SigC++
include
(
LibFindMacros
)
# Use pkg-config to get hints about paths
libfind_pkg_check_modules
(
SigC++_PKGCONF sigc++-2.0
)
# Main include dir
find_path
(
SigC++_INCLUDE_DIR
NAMES sigc++/sigc++.h
PATHS
${
SigC++_PKGCONF_INCLUDE_DIRS
}
PATH_SUFFIXES sigc++-2.0
)
# Glib-related libraries also use a separate config header, which is in lib dir
find_path
(
SigC++Config_INCLUDE_DIR
NAMES sigc++config.h
PATHS
${
SigC++_PKGCONF_INCLUDE_DIRS
}
/usr
PATH_SUFFIXES lib/sigc++-2.0/include
)
libfind_library
(
SigC++ sigc 2.0
)
# Set the include dir variables and the libraries and let libfind_process do the rest.
# NOTE: Singular variables for this library, plural for libraries this this lib depends on.
set
(
SigC++_PROCESS_INCLUDES SigC++_INCLUDE_DIR SigC++Config_INCLUDE_DIR
)
set
(
SigC++_PROCESS_LIBS SigC++_LIBRARY
)
libfind_process
(
SigC++
)
CMakeScripts/LibFindMacros.cmake
0 → 100644
View file @
14366fbc
# Works the same as find_package, but forwards the "REQUIRED" and "QUIET" arguments
# used for the current package. For this to work, the first parameter must be the
# prefix of the current package, then the prefix of the new package etc, which are
# passed to find_package.
macro
(
libfind_package PREFIX
)
set
(
LIBFIND_PACKAGE_ARGS
${
ARGN
}
)
if
(
${
PREFIX
}
_FIND_QUIETLY
)
set
(
LIBFIND_PACKAGE_ARGS
${
LIBFIND_PACKAGE_ARGS
}
QUIET
)
endif
(
${
PREFIX
}
_FIND_QUIETLY
)
if
(
${
PREFIX
}
_FIND_REQUIRED
)
set
(
LIBFIND_PACKAGE_ARGS
${
LIBFIND_PACKAGE_ARGS
}
REQUIRED
)
endif
(
${
PREFIX
}
_FIND_REQUIRED
)
find_package
(
${
LIBFIND_PACKAGE_ARGS
}
)
endmacro
(
libfind_package
)
# CMake developers made the UsePkgConfig system deprecated in the same release (2.6)
# where they added pkg_check_modules. Consequently I need to support both in my scripts
# to avoid those deprecated warnings. Here's a helper that does just that.
# Works identically to pkg_check_modules, except that no checks are needed prior to use.
macro
(
libfind_pkg_check_modules PREFIX PKGNAME
)
if
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
include
(
UsePkgConfig
)
pkgconfig
(
${
PKGNAME
}
${
PREFIX
}
_INCLUDE_DIRS
${
PREFIX
}
_LIBRARY_DIRS
${
PREFIX
}
_LDFLAGS
${
PREFIX
}
_CFLAGS
)
else
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
find_package
(
PkgConfig
)
if
(
PKG_CONFIG_FOUND
)
pkg_check_modules
(
${
PREFIX
}
${
PKGNAME
}
)
endif
(
PKG_CONFIG_FOUND
)
endif
(
${
CMAKE_MAJOR_VERSION
}
EQUAL 2 AND
${
CMAKE_MINOR_VERSION
}
EQUAL 4
)
endmacro
(
libfind_pkg_check_modules
)
# Do the final processing once the paths have been detected.
# If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain
# all the variables, each of which contain one include directory.
# Ditto for ${PREFIX}_PROCESS_LIBS and library files.
# Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES.
# Also handles errors in case library detection was required, etc.
macro
(
libfind_process PREFIX
)
# Skip processing if already processed during this run
if
(
NOT
${
PREFIX
}
_FOUND
)
# Start with the assumption that the library was found
set
(
${
PREFIX
}
_FOUND TRUE
)
# Process all includes and set _FOUND to false if any are missing
foreach
(
i
${${
PREFIX
}
_PROCESS_INCLUDES
}
)
if
(
${
i
}
)
set
(
${
PREFIX
}
_INCLUDE_DIRS
${${
PREFIX
}
_INCLUDE_DIRS
}
${${
i
}}
)
mark_as_advanced
(
${
i
}
)
else
(
${
i
}
)
set
(
${
PREFIX
}
_FOUND FALSE
)
endif
(
${
i
}
)
endforeach
(
i
)
# Process all libraries and set _FOUND to false if any are missing
foreach
(
i
${${
PREFIX
}
_PROCESS_LIBS
}
)
if
(
${
i
}
)
set
(
${
PREFIX
}
_LIBRARIES
${${
PREFIX
}
_LIBRARIES
}
${${
i
}}
)
mark_as_advanced
(
${
i
}
)
else
(
${
i
}
)
set
(
${
PREFIX
}
_FOUND FALSE
)
endif
(
${
i
}
)
endforeach
(
i
)
# Print message and/or exit on fatal error
if
(
${
PREFIX
}
_FOUND
)
if
(
NOT
${
PREFIX
}
_FIND_QUIETLY
)
message
(
STATUS
"Found
${
PREFIX
}
${${
PREFIX
}
_VERSION
}
"
)
endif
(
NOT
${
PREFIX
}
_FIND_QUIETLY
)
else
(
${
PREFIX
}
_FOUND
)
if
(
${
PREFIX
}
_FIND_REQUIRED
)
foreach
(
i
${${
PREFIX
}
_PROCESS_INCLUDES
}
${${
PREFIX
}
_PROCESS_LIBS
}
)
message
(
"
${
i
}
=
${${
i
}}
"
)
endforeach
(
i
)
message
(
FATAL_ERROR
"Required library
${
PREFIX
}
NOT FOUND.
\n
Install the library (dev version) and try again. If the library is already installed, use ccmake to set the missing variables manually."
)
endif
(
${
PREFIX
}
_FIND_REQUIRED
)
endif
(
${
PREFIX
}
_FOUND
)
endif
(
NOT
${
PREFIX
}
_FOUND
)
endmacro
(
libfind_process
)
macro
(
libfind_library PREFIX basename
)
set
(
TMP
""
)
if
(
MSVC80
)
set
(
TMP -vc80
)
endif
(
MSVC80
)
if
(
MSVC90
)
set
(
TMP -vc90
)
endif
(
MSVC90
)
set
(
${
PREFIX
}
_LIBNAMES
${
basename
}${
TMP
}
)
if
(
${
ARGC
}
GREATER 2
)
set
(
${
PREFIX
}
_LIBNAMES
${
basename
}${
TMP
}
-
${
ARGV2
}
)
string
(
REGEX REPLACE
"
\\
."
"_"
TMP
${${
PREFIX
}
_LIBNAMES
}
)
set
(
${
PREFIX
}
_LIBNAMES
${${
PREFIX
}
_LIBNAMES
}
${
TMP
}
)
endif
(
${
ARGC
}
GREATER 2
)
find_library
(
${
PREFIX
}
_LIBRARY
NAMES
${${
PREFIX
}
_LIBNAMES
}
PATHS
${${
PREFIX
}
_PKGCONF_LIBRARY_DIRS
}
)
endmacro
(
libfind_library
)
README.md
View file @
14366fbc
# corpus2mwe
Czytnik do MWE w Corpus2 (moduł corpus2mwe w corpus2)
ZALEŻNOŚCI
-
Corpus2
-
libmwereader z WCCLa (gałąź mwe_fix zawiera poprawkę do MWEReadera
*
)
najlepiej zainstalować całego WCCLa, ponieważ moduł libmwereadera nie zawiera
wszystkich zależności
1) Instalacja Corpus2 z gałęzi corpus2_mwe
git clone git@nlp.pwr.edu.pl:corpus2
cd corpus2
git checkout corpus2_mwe
mkdir build
cd build
cmake ..
make -j
sudo make install
sudo ldconfig
cd ../
2) Instalacja WCCLa z gałęzi mwe_fix
sudo apt-get install -y libantlr-dev
git clone git@nlp.pwr.edu.pl:wccl
cd wccl
git checkout mwe_fix
mkdir build
cd build
cmake ..
make -j
sudo make install
sudo ldconfig
cd ../..
3) Instalacja modułu corpus2mwe w Corpus2 (gałąź corpus2_mwe)
cd corpus2mwe
mkdir build
cd build
cmake ..
make -j
sudo make install
sudo ldconfig
UŻYCIE
Czytnik opakowano z użyciem SWIGa. Zawiera jedną klasę o nazwie CclMWEReader,
będącą odpowiednikiem CclRelReadera z Corpus2.
import corpus2mwe
import corpus2
tagset = corpus2.get_named_tagset('nkjp')
ccl_f = "ccl.xml"
rel_f = "ccl.rel.xml"
reader = corpus2mwe.CclMWEReader(ccl_f, rel_f, tagset)
document = reader.read()
Tak załadowany dokument powinien zawierać rozpoznane wielowyrazowce (MWE),
ściągnięte do pojedynczych tokenów lub oznaczone odpowiednim kanałem anotacji.
Wyboru pomiędzy ściąganiem do pojedynczych tokenów a użyciem anotacji można
dokonać wykorzystując metodę
`use_annotations`
. Domyślnie czytnik oznacza
jednostki MWE z użyciem anotacji.