diff --git a/CMakeLists.txt b/CMakeLists.txt index c71f8cc082c79f9255c2f0a2c1d56ae8c53e7643..952c9deab6259b08f73a09e4bdea9aaf9ac8172e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,4 +67,5 @@ add_subdirectory(libpwrutils) add_subdirectory(libcorpus2) add_subdirectory(tagset-tool) add_subdirectory(tests) +# add_subdirectory(swig) diff --git a/CMakeScripts/FindCorpus2.cmake b/CMakeScripts/FindCorpus2.cmake new file mode 100644 index 0000000000000000000000000000000000000000..da710906b8c363ba5f23e3fd1e96408cb5ad38b6 --- /dev/null +++ b/CMakeScripts/FindCorpus2.cmake @@ -0,0 +1,49 @@ +IF (Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY) +#Already in cache + SET(Corpus2_FOUND TRUE) +ELSE (Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY) + FIND_PATH(Corpus2_INCLUDE_DIR libcorpus2/token.h /usr/include /usr/local/include) + + FIND_LIBRARY(Corpus2_LIBRARY corpus2 /usr/lib /usr/local/lib) + + MARK_AS_ADVANCED(Corpus2_LIBRARY) + MARK_AS_ADVANCED(Corpus2_INCLUDE_DIR) + + IF (Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY) + SET(Corpus2_FOUND TRUE) + ENDIF (Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY) + +ENDIF (Corpus2_INCLUDE_DIR AND Corpus2_LIBRARY) + + +IF (Corpus2_FOUND) + set(Corpus2_VERSION 0.0.0) + FIND_FILE(_Corpus2_VERSION_FILE libcorpus2/version.h ${Corpus2_INCLUDE_DIR}) + MARK_AS_ADVANCED(_Corpus2_VERSION_FILE) + IF (_Corpus2_VERSION_FILE) + FILE(READ ${_Corpus2_VERSION_FILE} _Corpus2_VERSION_CONENTS) + STRING(REGEX REPLACE ".*#define LIBCORPUS2_VERSION \\\"([0-9.]+)\\\".*" "\\1" Corpus2_VERSION "${_Corpus2_VERSION_CONENTS}") + ENDIF (_Corpus2_VERSION_FILE) + IF (Corpus2_FIND_VERSION) + IF (Corpus2_VERSION VERSION_LESS Corpus2_FIND_VERSION) + IF (Corpus2_FIND_REQUIRED) + MESSAGE(${_Corpus2_VERSION_FILE}) + MESSAGE(FATAL_ERROR "Corpus2 version too old: ${Corpus2_VERSION}, requested >= ${Corpus2_FIND_VERSION}") + ELSE (Corpus2_FIND_REQUIRED) + IF (NOT Corpus2_FIND_QUIETLY) + MESSAGE(STATUS "Corpus2 version too old: ${Corpus2_VERSION}, requested >= ${Corpus2_FIND_VERSION}") + ENDIF (NOT Corpus2_FIND_QUIETLY) + ENDIF (Corpus2_FIND_REQUIRED) + set(Corpus2_FOUND False) + ENDIF (Corpus2_VERSION VERSION_LESS Corpus2_FIND_VERSION) + ENDIF (Corpus2_FIND_VERSION) + IF (NOT Corpus2_FIND_QUIETLY) + MESSAGE(STATUS "Found libcorpus2 ${Corpus2_VERSION}: ${Corpus2_LIBRARY}") + ENDIF (NOT Corpus2_FIND_QUIETLY) +ELSE (Corpus2_FOUND) + IF (Corpus2_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find libcorpus2") + ELSE (Corpus2_FIND_REQUIRED) + MESSAGE(STATUS "libcorpus2 not found") + ENDIF (Corpus2_FIND_REQUIRED) +ENDIF (Corpus2_FOUND) diff --git a/CMakeScripts/FindPwrUtils.cmake b/CMakeScripts/FindPwrUtils.cmake new file mode 100644 index 0000000000000000000000000000000000000000..858e922e5d0fb3daa4c0ba699a9674b42b9f9a38 --- /dev/null +++ b/CMakeScripts/FindPwrUtils.cmake @@ -0,0 +1,49 @@ +IF (PwrUtils_INCLUDE_DIR AND PwrUtils_LIBRARY) +#already in cache + SET(PwrUtils_FOUND TRUE) +ELSE (PwrUtils_INCLUDE_DIR AND PwrUtils_LIBRARY) + + FIND_PATH(PwrUtils_INCLUDE_DIR libpwrutils/foreach.h /usr/include /usr/local/include ) + + FIND_LIBRARY(PwrUtils_LIBRARY pwrutils /usr/lib /usr/local/lib) + + MARK_AS_ADVANCED(PwrUtils_LIBRARY) + MARK_AS_ADVANCED(PwrUtils_INCLUDE_DIR) + + IF (PwrUtils_INCLUDE_DIR AND PwrUtils_LIBRARY) + SET(PwrUtils_FOUND TRUE) + ENDIF (PwrUtils_INCLUDE_DIR AND PwrUtils_LIBRARY) +ENDIF (PwrUtils_INCLUDE_DIR AND PwrUtils_LIBRARY) + + +IF (PwrUtils_FOUND) + set(PwrUtils_VERSION 0.0.0) + FIND_FILE(_PwrUtils_VERSION_FILE libpwrutils/version.h ${PwrUtils_INCLUDE_DIR}) + MARK_AS_ADVANCED(_PwrUtils_VERSION_FILE) + IF (_PwrUtils_VERSION_FILE) + FILE(READ ${_PwrUtils_VERSION_FILE} _PwrUtils_VERSION_CONENTS) + STRING(REGEX REPLACE ".*#define LIBPWRUTILS_VERSION \\\"([0-9.]+)\\\".*" "\\1" PwrUtils_VERSION "${_PwrUtils_VERSION_CONENTS}") + ENDIF (_PwrUtils_VERSION_FILE) + IF (PwrUtils_FIND_VERSION) + IF (PwrUtils_VERSION VERSION_LESS PwrUtils_FIND_VERSION) + IF (PwrUtils_FIND_REQUIRED) + MESSAGE(${_PwrUtils_VERSION_FILE}) + MESSAGE(FATAL_ERROR "PwrUtils version too old: ${PwrUtils_VERSION}, requested >= ${PwrUtils_FIND_VERSION}") + ELSE (PwrUtils_FIND_REQUIRED) + IF (NOT PwrUtils_FIND_QUIETLY) + MESSAGE(STATUS "PwrUtils version too old: ${PwrUtils_VERSION}, requested >= ${PwrUtils_FIND_VERSION}") + ENDIF (NOT PwrUtils_FIND_QUIETLY) + ENDIF (PwrUtils_FIND_REQUIRED) + set(PwrUtils_FOUND False) + ENDIF (PwrUtils_VERSION VERSION_LESS PwrUtils_FIND_VERSION) + ENDIF (PwrUtils_FIND_VERSION) + IF (NOT PwrUtils_FIND_QUIETLY) + MESSAGE(STATUS "Found libpwrutils ${PwrUtils_VERSION}: ${PwrUtils_LIBRARY}") + ENDIF (NOT PwrUtils_FIND_QUIETLY) +ELSE (PwrUtils_FOUND) + IF (PwrUtils_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find libpwrutils") + ELSE (PwrUtils_FIND_REQUIRED) + MESSAGE(STATUS "libpwrutils not found") + ENDIF (PwrUtils_FIND_REQUIRED) +ENDIF (PwrUtils_FOUND) diff --git a/corpus2data/simple.tagset b/corpus2data/simple.tagset new file mode 100644 index 0000000000000000000000000000000000000000..137d9c74c426465462323458d05bc4ff0d19fb79 --- /dev/null +++ b/corpus2data/simple.tagset @@ -0,0 +1,14 @@ +[ATTR] + +[POS] +noun +adj +adv +verb +qub +other +interp +unk + +[IGN] +unk diff --git a/libcorpus2/io/cclreader.cpp b/libcorpus2/io/cclreader.cpp index eadd580e7dac8c36dd7b7bff4203f96548d90de2..4559ed8e1c3ff8258ca257540a6d612e86ab6a00 100644 --- a/libcorpus2/io/cclreader.cpp +++ b/libcorpus2/io/cclreader.cpp @@ -82,7 +82,7 @@ CclReader::CclReader(const Tagset& tagset, const std::string& filename, bool dis { this->is_owned_.reset(new std::ifstream(filename.c_str(), std::ifstream::in)); - if (this->is_owned_->bad()) { + if (!this->is_owned_->good()) { throw Corpus2Error("File not found!"); } else { diff --git a/libcorpus2/io/fastxces.cpp b/libcorpus2/io/fastxces.cpp index a4813bbc53ef4af326eca28ae3368ba0ded5c1b0..6eaedaf8390214e4c48856b2cc8e054ccdbfa0dc 100644 --- a/libcorpus2/io/fastxces.cpp +++ b/libcorpus2/io/fastxces.cpp @@ -175,7 +175,7 @@ FastXcesReader::FastXcesReader(const Tagset &tagset, const std::string &filename { this->is_owned_.reset(new std::ifstream(filename.c_str(), std::ifstream::in)); - if (this->is_owned_->bad()) { + if (!this->is_owned_->good()) { throw Corpus2Error("File not found!"); } else { diff --git a/libcorpus2/io/rft.cpp b/libcorpus2/io/rft.cpp index 73003a6b8911258a1015e095f3b8008c85738734..ead40217893a217749ec7e8b62a945c5dfdb36e6 100644 --- a/libcorpus2/io/rft.cpp +++ b/libcorpus2/io/rft.cpp @@ -116,7 +116,7 @@ RftReader::RftReader(const Tagset& tagset, const std::string& filename, bool dis , mbt_dialect_(mbt_dialect) { is_owned_.reset(new std::ifstream(filename.c_str(), std::ifstream::in)); - if (this->is_owned_->bad()) { + if (!this->is_owned_->good()) { throw Corpus2Error("File not found!"); } else { diff --git a/libcorpus2/io/xcesreader.cpp b/libcorpus2/io/xcesreader.cpp index bf14d9dc0080a6bc1a0f2a691d834169a0c7a9d1..6db3428d55e407ed64e14112240e747ec180059a 100644 --- a/libcorpus2/io/xcesreader.cpp +++ b/libcorpus2/io/xcesreader.cpp @@ -52,8 +52,7 @@ XcesReader::XcesReader(const Tagset& tagset, const std::string& filename, bool d impl_(new XcesReaderImpl(*this, chunk_buf_, disamb_only, disamb_sh)) { this->is_owned_.reset(new std::ifstream(filename.c_str(), std::ifstream::in)); - - if (this->is_owned_->bad()) { + if (!this->is_owned_->good()) { throw Corpus2Error("File not found!"); } else { diff --git a/libcorpus2/io/xceswriter.cpp b/libcorpus2/io/xceswriter.cpp index 33693ce8dc8ee9cc51dd7de1c7cc693cb6e7ecfc..bf1cd78d9da558270cc0a2d0b8fd4dc217d47fcb 100644 --- a/libcorpus2/io/xceswriter.cpp +++ b/libcorpus2/io/xceswriter.cpp @@ -71,7 +71,10 @@ void XcesWriter::write_chunk(const Chunk &c) } write_sentence(*s); } - if (use_indent_) indent_less(); + if (new_chunk) { //no sentences in chunk + paragraph_head(c); + } + if (use_indent_ && !new_chunk) indent_less(); osi() << "</chunk>\n"; } diff --git a/libcorpus2/lexeme.cpp b/libcorpus2/lexeme.cpp index be0f8aeb7c82a1343c7b6366ff3f96affbbcc718..d5370ed662ee7974505ec9732c904cf6377e0fc6 100644 --- a/libcorpus2/lexeme.cpp +++ b/libcorpus2/lexeme.cpp @@ -33,6 +33,12 @@ Lexeme Lexeme::create(const UnicodeString& lemma, const Tag& tag) return Lexeme(lemma, tag); } +Lexeme Lexeme::create_utf8(const std::string& lemma_utf8, const Tag& tag) +{ + return Lexeme(UnicodeString::fromUTF8(lemma_utf8), tag); +} + + bool Lexeme::is_null() const { return lemma().length() == 0 || tag().is_null(); diff --git a/libcorpus2/lexeme.h b/libcorpus2/lexeme.h index fbf82a0a8957e06ea7dab83fb61f27c6a351ca2e..4c0119ab58d91c9b932be2eba46cf4373272040b 100644 --- a/libcorpus2/lexeme.h +++ b/libcorpus2/lexeme.h @@ -53,6 +53,9 @@ public: /// Helper creation function static Lexeme create(const UnicodeString& lemma, const Tag& tag); + /// Helper creation function, UTF-8 variant + static Lexeme create_utf8(const std::string& lemma_utf8, const Tag& tag); + /// Lemma accessor const UnicodeString& lemma() const { return lemma_; @@ -63,6 +66,11 @@ public: lemma_ = l; } + /// Lemma setter, UTF-8 variant + void set_lemma_utf8(const std::string& lemma_utf8) { + lemma_ = UnicodeString::fromUTF8(lemma_utf8); + } + /// UTF-8 lemma convenience accessor const std::string lemma_utf8() const { return PwrNlp::to_utf8(lemma_); diff --git a/libcorpus2/token.cpp b/libcorpus2/token.cpp index dba032254ca1c2e122be2429cd41d4c5ae776a86..38f3bdb2c4f92ed92691fe33662ac9bfc979dce2 100644 --- a/libcorpus2/token.cpp +++ b/libcorpus2/token.cpp @@ -32,6 +32,12 @@ Token::Token(const UnicodeString &orth, PwrNlp::Whitespace::Enum wa) { } +Token* Token::create_utf8(const std::string& orth_utf8, + PwrNlp::Whitespace::Enum wa /*= PwrNlp::Whitespace::Space*/) +{ + return new Token(UnicodeString::fromUTF8(orth_utf8), wa); +} + Token* Token::clone() const { Token* t = new Token(); diff --git a/libcorpus2/token.h b/libcorpus2/token.h index 13c623fb8895955913de658f8e87225a336cf190..ab0992271655081f2c4d0f8442df0ec4f4c3bd07 100644 --- a/libcorpus2/token.h +++ b/libcorpus2/token.h @@ -54,6 +54,10 @@ public: /// Create a Token with the given orth and whitespace amount Token(const UnicodeString& orth, PwrNlp::Whitespace::Enum wa); + /// Token creation, UTF-8 + static Token* create_utf8(const std::string& orth_utf8, + PwrNlp::Whitespace::Enum wa = PwrNlp::Whitespace::Space); + /// Create a duplicate Token Token* clone() const; @@ -72,6 +76,11 @@ public: orth_ = orth; } + /// Orth setter (UTF-8) + void set_orth_utf8(const std::string& orth_utf8) { + orth_ = UnicodeString::fromUTF8(orth_utf8); + } + /// WA getter const PwrNlp::Whitespace::Enum& wa() const { return wa_; diff --git a/libpwrutils/pathsearch.cpp b/libpwrutils/pathsearch.cpp index c89b7a4bf1af495ca866ffbc62bb7a3656d24ff3..1584d79a14af76d3011954dbd30ca505260a00c9 100644 --- a/libpwrutils/pathsearch.cpp +++ b/libpwrutils/pathsearch.cpp @@ -72,13 +72,24 @@ const std::string& PathSearcherBase::get_path_separator() const } std::string PathSearcherBase::find_file(const std::string& filename, - const std::string& info) + const std::string& info) const { boost::filesystem::path i(filename); + if (i.is_complete()) { + if (boost::filesystem::exists(i) && + !boost::filesystem::is_directory(i)) { + if (verbose_loading_) { + std::cerr << "Found " << info << " file: " + << i.string() << "\n"; + } + return i.string(); + } + return ""; + } foreach (const std::string& s, paths_) { boost::filesystem::path pi = s / i; if (boost::filesystem::exists(pi) && - boost::filesystem::is_regular(pi)) { + !boost::filesystem::is_directory(pi)) { if (verbose_loading_) { std::cerr << "Found " << info << " file: " << pi.string() << "\n"; @@ -90,7 +101,7 @@ std::string PathSearcherBase::find_file(const std::string& filename, } bool PathSearcherBase::open_stream(const std::string& filename, - std::ifstream& ifs, const std::string& info) + std::ifstream& ifs, const std::string& info) const { std::string f = find_file(filename, info); if (!f.empty()) { diff --git a/libpwrutils/pathsearch.h b/libpwrutils/pathsearch.h index 956f68deab9b0282b4b33d5a72c408495a9626a0..c33aaa7e677b5d84bd2f12bfeb71295b90a5b4a7 100644 --- a/libpwrutils/pathsearch.h +++ b/libpwrutils/pathsearch.h @@ -73,7 +73,7 @@ public: * is on. Empty info string suppreses loading info. */ std::string find_file(const std::string& filename, - const std::string& info = ""); + const std::string& info = "") const; /** * Open a file stream for a file in the library search path @@ -83,7 +83,22 @@ public: * is on. Empty info string suppreses loading info. */ bool open_stream(const std::string& filename, std::ifstream& ifs, - const std::string& info = ""); + const std::string& info = "") const; + + /** + * Convenience wrapper around find_file to throw an exception + * when the file is not found. + * Virtual, as it throws the exception defined by the child class. + */ + virtual std::string find_file_or_throw(const std::string& filename, + const std::string& where) const = 0; + + /** + * Wrapper around open_stream to throw an exception when the file is not + * found. Virtual, as it throws the exception defined by the child class. + */ + virtual void open_stream_or_throw(const std::string& filename, + std::ifstream& ifs, const std::string& where) const = 0; /** * Look for files matching a condition. @@ -122,14 +137,14 @@ public: * when the file is not found. */ std::string find_file_or_throw(const std::string& filename, - const std::string& where); + const std::string& where) const; /** * Convenience template wrapper around open_stream to throw an * exception when the file is not found. */ void open_stream_or_throw(const std::string& filename, - std::ifstream& ifs, const std::string& where); + std::ifstream& ifs, const std::string& where) const; }; @@ -158,7 +173,7 @@ private: template<class E> std::string PathSearcher<E>::find_file_or_throw( - const std::string& filename, const std::string& info) + const std::string& filename, const std::string& info) const { std::string fn = find_file(filename, info); if (fn.empty()) { @@ -169,7 +184,7 @@ std::string PathSearcher<E>::find_file_or_throw( template<class E> void PathSearcher<E>::open_stream_or_throw(const std::string& filename, - std::ifstream& ifs, const std::string& info) + std::ifstream& ifs, const std::string& info) const { if (!open_stream(filename, ifs, info)) { throw E(filename, get_search_path_string(), info); diff --git a/swig/CMakeLists.txt b/swig/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..978ece174dff98284913c1d162a075b38422948d --- /dev/null +++ b/swig/CMakeLists.txt @@ -0,0 +1,108 @@ +########## corpus2 wrappers -- SWIG ############### + +PROJECT(corpus2SwigWrap) + +find_package(Corpus2 1.0.8 REQUIRED) +set(CORPUS2_LIBS ${Corpus2_LIBRARY}) + +find_package(PwrUtils 1.0.1 REQUIRED) +set(PWRUTILS_LIBS ${PwrUtils_LIBRARY}) +set(CORPUS2_PWR_LIBS ${CORPUS2_LIBS} ${PWRUTILS_LIBS}) + +include_directories (${Libcorpus2_SOURCE_DIR} "../libcorpus2") +include_directories (${Libpwrutils_SOURCE_DIR} "../libpwrutils") + +link_directories(${Libcorpus2_BINARY_DIR}) + +FIND_PACKAGE(SWIG REQUIRED) +INCLUDE(${SWIG_USE_FILE}) + +find_package(PythonLibs) +find_package(PythonInterp) +# idea taken from pyplot build system +execute_process( + COMMAND + ${PYTHON_EXECUTABLE} -c "from distutils import sysconfig; print sysconfig.get_python_lib(1,0,prefix='${CMAKE_INSTALL_EXEC_PREFIX}')" + OUTPUT_VARIABLE PYTHON_INSTDIR + OUTPUT_STRIP_TRAILING_WHITESPACE +) +message(STATUS "INFO: " "python lib: ${PYTHON_INSTDIR}" ) + +INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_PATH}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- + +SET(CMAKE_SWIG_FLAGS "") +SET(SWIG_SRC_DIR ${SWIG_DIR}/libcorpus) + +# ----------------------------------------------------------------------------- +# corous2 +SET_SOURCE_FILES_PROPERTIES(corpus2.i PROPERTIES CPLUSPLUS ON) +SET_SOURCE_FILES_PROPERTIES(corpus2.i PROPERTIES SWIG_FLAGS "-includeall" ) +SWIG_ADD_MODULE(corpus2 python corpus2.i ) +SWIG_LINK_LIBRARIES(corpus2 ${PYTHON_LIBRARIES} ${PWRUTILS_LIBS}) +SWIG_LINK_LIBRARIES(corpus2 ${PYTHON_LIBRARIES} ${CORPUS2_PWR_LIBS}) + +# ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- + +set_target_properties( + _corpus2 + PROPERTIES + INSTALL_NAME_DIR "${PYTHON_INSTDIR}" +) + +set(PERM_SCRIPTS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE +) + +# ----------------------------------------------------------------------------- +# Install python modules +# ----------------------------------------------------------------------------- + +install( + TARGETS _corpus2 + LIBRARY + DESTINATION ${PYTHON_INSTDIR} + PERMISSIONS ${PERM_SCRIPTS} +) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/corpus2.py + DESTINATION ${PYTHON_INSTDIR} + PERMISSIONS ${PERM_SCRIPTS} +) + +# ----------------------------------------------------------------------------- +# Install SWIG files +# ----------------------------------------------------------------------------- + +install( + FILES libpwrnlperror.i + libcorpus2exception.i + libcorpus2exception.i + libcorpustag.i + libcorpustagset.i + libcorpustagsetmanager.i + libcorpuslexeme.i + libcorpustoken.i + libcorpussentence.i + libcorpuschunk.i + libcorpustokenwriter.i + libcorpustokenreader.i + libcorpusiob.i + libcorpusannotationchannel.i + libcorpusannotatedsentence.i + libcorpusannotationview.i + corpus2.i + DESTINATION ${SWIG_SRC_DIR} + PERMISSIONS ${PERM_SCRIPTS} +) diff --git a/swig/Makefile b/swig/Makefile index 8ca3262b87b9911c64a79f8c4059738dd8de8cbc..9d34763f60a97d79b7445df5e8a06a6ba461b16f 100644 --- a/swig/Makefile +++ b/swig/Makefile @@ -13,7 +13,9 @@ ANTLRLIB=/usr/lib/libantlr-pic.a CPPFLAGS=-fPIC -O2 -CBIN=libcorpustag.o \ +CBIN=libpwrnlperror.o \ + libcorpus2exception.o \ + libcorpustag.o \ libcorpustagset.o \ libcorpustagsetmanager.o \ libcorpuslexeme.o \ @@ -25,9 +27,12 @@ CBIN=libcorpustag.o \ libcorpusiob.o \ libcorpusannotationchannel.o \ libcorpusannotatedsentence.o \ - libcorpusannotationview.o + libcorpusannotationview.o \ + corpus2.o -CBINOUT=_boost_shared_ptr.so \ +CBINOUT=_libpwrnlperror.so \ + _libcorpus2exception.so \ + _boost_shared_ptr.so \ _libcorpustag.so \ _libcorpustagset.so \ _libcorpustagsetmanager.so \ @@ -40,9 +45,12 @@ CBINOUT=_boost_shared_ptr.so \ _libcorpusiob.so \ _libcorpusannotationchannel.so \ _libcorpusannotatedsentence.so \ - _libcorpusannotationview.so + _libcorpusannotationview.so \ + _corpus2.so -CWRAP=boost_shared_ptr_wrap.cxx \ +CWRAP=libpwrnlperror_wrap.cxx \ + libcorpus2exception_wrap.cxx \ + boost_shared_ptr_wrap.cxx \ libcorpustag_wrap.cxx \ libcorpustagset_wrap.cxx \ libcorpustagsetmanager_wrap.cxx \ @@ -55,9 +63,12 @@ CWRAP=boost_shared_ptr_wrap.cxx \ libcorpusiob_wrap.cxx \ libcorpusannotationchannel_wrap.cxx \ libcorpusannotatedsentence_wrap.cxx \ - libcorpusannotationview_wrap.cxx + libcorpusannotationview_wrap.cxx \ + corpus2_wrap.cxx -CWRAPBIN=boost_shared_ptr_wrap.o \ +CWRAPBIN=libpwrnlperror_wrap.o \ + libcorpus2exception_wrap.o \ + boost_shared_ptr_wrap.o \ libcorpustag_wrap.o \ libcorpustagset_wrap.o \ libcorpustagsetmanager_wrap.o \ @@ -70,9 +81,12 @@ CWRAPBIN=boost_shared_ptr_wrap.o \ libcorpusiob_wrap.o \ libcorpusannotationchannel_wrap.o \ libcorpusannotatedsentence_wrap.o \ - libcorpusannotationview_wrap.o + libcorpusannotationview_wrap.o \ + corpus2_wrap.o -PYMODULES=boost_shared_ptr.py \ +PYMODULES=libpwrnlperror.py \ + libcorpus2exception.py \ + boost_shared_ptr.py \ libcorpustag.py \ libcorpustagset.py \ libcorpustagsetmanager.py \ @@ -85,9 +99,12 @@ PYMODULES=boost_shared_ptr.py \ libcorpusiob.py \ libcorpusannotationchannel.py \ libcorpusannotatedsentence.py \ - libcorpusannotationview.py + libcorpusannotationview.py \ + corpus2.py -PYCBIN=boost_shared_ptr.pyc \ +PYCBIN=libpwrnlperror.pyc \ + libcorpus2exception.pyc \ + boost_shared_ptr.pyc \ libcorpustag.pyc \ libcorpustagset.pyc \ libcorpustagsetmanager.pyc \ @@ -100,7 +117,8 @@ PYCBIN=boost_shared_ptr.pyc \ libcorpusiob.pyc \ libcorpusannotationchannel.pyc \ libcorpusannotatedsentence.pyc \ - libcorpusannotationview.pyc + libcorpusannotationview.pyc \ + corpus2.pyc # ----------------------------------------------------------------------------- all:boost_shared_ptr.o $(CBIN) @@ -116,10 +134,25 @@ boost_shared_ptr.o: $(CPP) -c boost_shared_ptr_wrap.cxx -I$(PYTHONDIR) $(CPPFLAGS) $(CPP) -shared boost_shared_ptr_wrap.o -o _boost_shared_ptr.so +# ----------------------------------------------------------------------------- +# PwrNlpError wprapper +# ----------------------------------------------------------------------------- +# PwrNlpError +libpwrnlperror.o: + $(SWIG) $(SWIGOPTS_LANG) libpwrnlperror.i + $(CPP) -c libpwrnlperror_wrap.cxx -I$(PYTHONDIR) $(CPPFLAGS) + $(CPP) -shared libpwrnlperror_wrap.o -o _libpwrnlperror.so # ----------------------------------------------------------------------------- # Corpus2 Wrappers # ----------------------------------------------------------------------------- + +libcorpus2exception.o: + $(SWIG) $(SWIGOPTS_LANG) libcorpus2exception.i + $(CPP) -c libcorpus2exception_wrap.cxx -I$(PYTHONDIR) $(CPPFLAGS) + $(CPP) -shared libcorpus2exception_wrap.o \ + $(PWRUTILBIN) $(CORPUS2BIN) -o _libcorpus2exception.so + # Tag libcorpustag.o: $(SWIG) $(SWIGOPTS_LANG) libcorpustag.i @@ -211,6 +244,15 @@ libcorpusannotationview.o: $(CPP) -shared libcorpusannotationview_wrap.o \ $(PWRUTILBIN) $(CORPUS2BIN) -o _libcorpusannotationview.so +# ----------------------------------------------------------------------------- + +# Corpus2 +corpus2.o: + $(SWIG) $(SWIGOPTS_LANG) corpus2.i + $(CPP) -c corpus2_wrap.cxx -I$(PYTHONDIR) $(CPPFLAGS) + $(CPP) -shared corpus2_wrap.o \ + $(PWRUTILBIN) $(CORPUS2BIN) -o _corpus2.so + # ----------------------------------------------------------------------------- clean: rm -f $(CBIN) $(CBINOUT) $(CWRAP) $(CWRAPBIN) $(PYMODULES) $(PYCBIN) diff --git a/swig/corpus2.i b/swig/corpus2.i new file mode 100644 index 0000000000000000000000000000000000000000..a805a328edbf38ce4cf86f21915a120b9e2d5b60 --- /dev/null +++ b/swig/corpus2.i @@ -0,0 +1,23 @@ +#ifndef SWIG_CORPUS2_I +#define SWIG_CORPUS2_I + +%module corpus2 + +%include "boost_shared_ptr.i" +%include "libcorpus2exception.i" +%include "libcorpusannotatedsentence.i" +%include "libcorpusannotationchannel.i" +%include "libcorpusannotationview.i" +%include "libcorpuschunk.i" +%include "libcorpusiob.i" +%include "libcorpuslexeme.i" +%include "libcorpussentence.i" +%include "libcorpustag.i" +%include "libcorpustagset.i" +%include "libcorpustagsetmanager.i" +%include "libcorpustoken.i" +%include "libcorpustokenreader.i" +%include "libcorpustokenwriter.i" +%include "libpwrnlperror.i" + +#endif /* SWIG_CORPUS2_I */ diff --git a/swig/libcorpus2exception.i b/swig/libcorpus2exception.i new file mode 100644 index 0000000000000000000000000000000000000000..2ee4463ae619ffa4dd80c12e1aec8161682c2369 --- /dev/null +++ b/swig/libcorpus2exception.i @@ -0,0 +1,39 @@ +#ifndef SWIG_LIBCORPUS2_EXCEPTION_I +#define SWIG_LIBCORPUS2_EXCEPTION_I + +%module libcorpus2exception +%{ + #include <libcorpus2/exception.h> +%} + +%include "std_string.i" + +%include "libpwrnlperror.i" + +namespace Corpus2 { + class Corpus2Error : public PwrNlp::PwrNlpError { + public: + Corpus2Error(const std::string &what); + ~Corpus2Error() throw(); + + /* --------------------------------------------------------------------- */ + std::string scope() const; + }; + + class FileNotFound : public Corpus2Error { + public: + FileNotFound(const std::string& filename, const std::string& paths, + const std::string& where); + + ~FileNotFound() throw(); + + /* --------------------------------------------------------------------- */ + std::string info() const; + std::string filename, paths, where; + }; +} + +using namespace std; +using namespace Corpus2; + +#endif /* SWIG_LIBCORPUS2_EXCEPTION_I */ diff --git a/swig/libcorpusannotatedsentence.i b/swig/libcorpusannotatedsentence.i index 0f85282b0414c9b45a9cd734ada6d38ffd926e4f..f7c27b109a0f30fdebc2fb2cfb3184d964abde29 100644 --- a/swig/libcorpusannotatedsentence.i +++ b/swig/libcorpusannotatedsentence.i @@ -8,6 +8,7 @@ %include "libcorpustoken.i" %include "libcorpussentence.i" +%include "libcorpus2exception.i" %include "libcorpusannotationchannel.i" %include "boost_shared_ptr.i" @@ -21,6 +22,12 @@ %template(AnnotatedSentencePtr) boost::shared_ptr<Corpus2::AnnotatedSentence>; namespace Corpus2 { + class MissingAnnotationChannel : public Corpus2Error { + public: + MissingAnnotationChannel(const std::string& name); + ~MissingAnnotationChannel() throw(); + }; // MissingAnnotationChannel + class AnnotatedSentence : public Corpus2::Sentence { public: typedef std::map<std::string, AnnotationChannel> chan_map_t; diff --git a/swig/libcorpuslexeme.i b/swig/libcorpuslexeme.i index 058e8b7d449a9375f0f288543ec67670554413ee..e962845b00b107b7b538c3325819f2bfde3d95c4 100644 --- a/swig/libcorpuslexeme.i +++ b/swig/libcorpuslexeme.i @@ -24,13 +24,13 @@ namespace Corpus2 { Lexeme(const UnicodeString& lemma, const Tag& tag); static Lexeme create(const UnicodeString& lemma, const Tag& tag); - // static Lexeme create_utf8(const std::string& lemma, const Tag& tag); + static Lexeme create_utf8(const std::string& lemma, const Tag& tag); const UnicodeString& lemma() const; const std::string lemma_utf8() const; void set_lemma(const UnicodeString& l); - // void set_lemma_utf8(const std::string& l); + void set_lemma_utf8(const std::string& l); const Tag& tag() const; void set_tag(const Tag& tag); diff --git a/swig/libcorpussentence.i b/swig/libcorpussentence.i index d580b81377394a74bc94a4e777b27d3158fea296..be08a46d7a9a32f0d8b04fa1f40c7d519e6601a4 100644 --- a/swig/libcorpussentence.i +++ b/swig/libcorpussentence.i @@ -47,6 +47,7 @@ namespace Corpus2 { } using namespace std; +using namespace boost; using namespace Corpus2; #endif /* SWIG_LIBCORPUS2_SENTENCE_I */ diff --git a/swig/libcorpustagset.i b/swig/libcorpustagset.i index 8030f6c23e5cd699dacb31b02dc1cd4a2fc5fb78..4b469a69295342a49ff34baec8f50bb2c6464735 100644 --- a/swig/libcorpustagset.i +++ b/swig/libcorpustagset.i @@ -7,6 +7,7 @@ %} %include "libcorpustag.i" +%include "libcorpus2exception.i" %include "std_string.i" %include "std_vector.i" @@ -17,6 +18,36 @@ %template(StdStringVector) std::vector<std::string>; namespace Corpus2 { + class Tagset; + class TagParseError : public Corpus2Error { + public: + TagParseError(const std::string &what, const std::string& val, + const std::string& tag, const std::string& tagset); + + ~TagParseError() throw() {} + + /* --------------------------------------------------------------------- */ + std::string info() const; + std::string val, tag, tagset; + }; // TagParseError + + class TagsetMismatch : public Corpus2Error { + public: + TagsetMismatch(const std::string& where, const Tagset& expected, + const Tagset& actual); + + // TagsetMismatch(const std::string& where, tagset_idx_t expected, + // tagset_idx_t actual); + + ~TagsetMismatch() throw() {} + + /* --------------------------------------------------------------------- */ + std::string info() const; + + /* --------------------------------------------------------------------- */ + // tagset_idx_t expected_id, actual_id; + }; // TagsetMismatch + class Tagset { public: diff --git a/swig/libcorpustagsetmanager.i b/swig/libcorpustagsetmanager.i index 2e0ed4a82b6418c7d88a29c44f659df8c9a5040e..adc0b4c9aa55c8310043c97e3ab6d248c1b8eaac 100644 --- a/swig/libcorpustagsetmanager.i +++ b/swig/libcorpustagsetmanager.i @@ -7,6 +7,7 @@ %} %include "libcorpustagset.i" +%include "libcorpus2exception.i" %include "std_string.i" %include "std_vector.i" @@ -16,6 +17,18 @@ %template(TagsetPtr) boost::shared_ptr<Tagset>; namespace Corpus2 { + class TagsetNotFound : public Corpus2Error { + public: + // explicit TagsetNotFound(const tagset_idx_t id); + ~TagsetNotFound() throw() {} + + /* --------------------------------------------------------------------- */ + std::string info() const; + + /* --------------------------------------------------------------------- */ + // tagset_idx_t id; + }; + class TagsetManager { public: TagsetManager(); @@ -28,6 +41,18 @@ namespace Corpus2 { /* --------------------------------------------------------------------- */ }; + + %exception { + try { + $action + } catch (Corpus2::TagsetNotFound &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } catch (PwrNlp::PwrNlpError &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } + } inline const Tagset& get_named_tagset(const std::string& name); } diff --git a/swig/libcorpustoken.i b/swig/libcorpustoken.i index 079063b5d88d7c26eccbf75e83195f3f6833aff7..5d81bba2580a9fe4a3236f505c70345e51ad73f1 100644 --- a/swig/libcorpustoken.i +++ b/swig/libcorpustoken.i @@ -26,12 +26,14 @@ namespace Corpus2 { Token(); Token(const UnicodeString& orth, PwrNlp::Whitespace::Enum wa); Token* clone() const; + + Token* create_utf8(const std::string& orth_utf8, PwrNlp::Whitespace::Enum wa = PwrNlp::Whitespace::Space); const UnicodeString& orth() const; std::string orth_utf8() const; void set_orth(const UnicodeString& orth); - // void set_orth_utf8(const std::string& orth); + void set_orth_utf8(const std::string& orth); const PwrNlp::Whitespace::Enum& wa() const; void set_wa(const PwrNlp::Whitespace::Enum& wa); diff --git a/swig/libcorpustokenreader.i b/swig/libcorpustokenreader.i index 935ba2159fcdfd9c23e1303ab6cfe52c6db738a2..93043ff8446bdb1adc53001c4da9781f2399dae1 100644 --- a/swig/libcorpustokenreader.i +++ b/swig/libcorpustokenreader.i @@ -30,11 +30,27 @@ namespace Corpus2 { virtual ~TokenReader(); /* --------------------------------------------------------------------- */ + %exception { + try { + $action + } catch (PwrNlp::PwrNlpError &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } + } static TokenReaderPtr create_path_reader( const std::string& class_id, const Tagset& tagset, const std::string& path); + %exception { + try { + $action + } catch (PwrNlp::PwrNlpError &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } + } static TokenReaderPtr create_stream_reader( const std::string& class_id, const Tagset& tagset, diff --git a/swig/libcorpustokenwriter.i b/swig/libcorpustokenwriter.i index 19e914f1ab7c8dc68c9d17b971cfd6c88475a9f7..50ad969ab890b83f4fe22fc2ef5156bddce69932 100644 --- a/swig/libcorpustokenwriter.i +++ b/swig/libcorpustokenwriter.i @@ -39,11 +39,27 @@ namespace Corpus2 { void finish(); /* --------------------------------------------------------------------- */ + %exception { + try { + $action + } catch (PwrNlp::PwrNlpError &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } + } static TokenWriterPtr create_stream_writer( const std::string& class_id_params, std::ostream& os, const Tagset& tagset); + %exception { + try { + $action + } catch (PwrNlp::PwrNlpError &e) { + PyErr_SetString(PyExc_IndexError, e.info().c_str()); + return NULL; + } + } static TokenWriterPtr create_path_writer( const std::string& class_id_params, const std::string& path, diff --git a/swig/libpwrnlperror.i b/swig/libpwrnlperror.i new file mode 100644 index 0000000000000000000000000000000000000000..31b715783ae28e70593b616c22dae756dc55e083 --- /dev/null +++ b/swig/libpwrnlperror.i @@ -0,0 +1,30 @@ +#ifndef SWIG_LIBPWRNLP_PWRNLPERROR_I +#define SWIG_LIBPWRNLP_PWRNLPERROR_I + +%module libpwrnlperror +%{ + #include <libpwrutils/exception.h> +%} + +%include "std_string.i" +%include "std_except.i" + +%nodefaultctor PwrNlp::PwrNlpError; + +namespace PwrNlp { + class PwrNlpError : public std::runtime_error { + public: + PwrNlpError(const std::string &what); + + /* --------------------------------------------------------------------- */ + ~PwrNlpError() throw(); + + virtual std::string info() const; + virtual std::string scope() const; + }; +} + +using namespace std; +using namespace PwrNlp; + +#endif /* SWIG_LIBPWRNLP_PWRNLPERROR_I */ diff --git a/swig/makewrapper.sh b/swig/makewrapper.sh index fdf80c5f917ff9bff414341776ccf953e1e687d3..274daed608341da3aa6e8aa9a2326a2c94102a9b 100755 --- a/swig/makewrapper.sh +++ b/swig/makewrapper.sh @@ -2,7 +2,7 @@ if [ ${#} -eq 0 ] then - echo "Usage: $0 corpus2class" + echo -e "\e[1;31mUsage: $0 corpus2class\e[0m" exit 1 fi