diff --git a/libcorpus2/io/reader.cpp b/libcorpus2/io/reader.cpp index 46a158a4eceaf78eee265785497ca1a4c4811b76..b3b12ac2be5f9d731186de3a486f934dff7aa9e2 100644 --- a/libcorpus2/io/reader.cpp +++ b/libcorpus2/io/reader.cpp @@ -19,9 +19,32 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <boost/algorithm/string.hpp> #include <libcorpus2/ann/annotatedsentence.h> #include <sstream> +#include <libpwrutils/plugin.h> +#include <libcorpus2/util/settings.h> namespace Corpus2 { +namespace detail { +/** + * Declaration of the TokenWriter factory as a singleton Loki object + * factory. The factory instance can be accessed as + * TokenLayerFactory::Instance(). It is assumed that all derived classes + * have the same constructor signature. + */ +typedef Loki::SingletonHolder< + TokenReaderFactory, + Loki::CreateUsingNew, // default, needed to change the item below + Loki::LongevityLifetime::DieAsSmallObjectChild // per libloki docs +> +TokenReaderFactorySingleton; + +TokenReaderFactory& token_reader_factory() +{ + return TokenReaderFactorySingleton::Instance(); +} +} /* ned ns detail */ + + TokenReader::TokenReader(const Tagset& tagset) : tagset_(tagset), tag_parse_mode_(Tagset::ParseDefault), use_annotated_sentences_(false) @@ -76,6 +99,17 @@ boost::shared_ptr<Sentence> TokenReader::make_sentence() const } } +namespace { +std::string guess_plugin_name(const std::string& reader_class_id, int idx) +{ + switch (idx) { + case 0: return reader_class_id + "reader"; + case 1: return reader_class_id; + default: return ""; + } +} +} + boost::shared_ptr<TokenReader> TokenReader::create_path_reader( const std::string& class_id_params, const Tagset& tagset, @@ -86,13 +120,23 @@ boost::shared_ptr<TokenReader> TokenReader::create_path_reader( boost::is_any_of(std::string(","))); std::string class_id = boost::copy_range<std::string>(params[0]); params.erase(params.begin(), params.begin() + 1); - try { - return boost::shared_ptr<TokenReader>( - detail::TokenReaderFactorySingleton::Instance().path_factory.CreateObject( - class_id, tagset, path, params)); - } catch (detail::TokenReaderFactoryException&) { - throw Corpus2Error("Reader class not found: " + class_id); + int plugin_name_idx = 0; + while (plugin_name_idx >=0) { + try { + return boost::shared_ptr<TokenReader>( + detail::TokenReaderFactorySingleton::Instance(). + path_factory.CreateObject(class_id, tagset, path, params)); + } catch (detail::TokenReaderFactoryException&) { + std::string next_plugin = guess_plugin_name(class_id, plugin_name_idx); + if (!next_plugin.empty()) { + PwrNlp::Plugin::load("corpus2", next_plugin, !Path::Instance().get_verbose()); + plugin_name_idx++; + } else { + plugin_name_idx = -1; + } + } } + throw Corpus2Error("Reader class not found: " + class_id); } boost::shared_ptr<TokenReader> TokenReader::create_stream_reader( @@ -105,19 +149,29 @@ boost::shared_ptr<TokenReader> TokenReader::create_stream_reader( boost::is_any_of(std::string(","))); std::string class_id = boost::copy_range<std::string>(params[0]); params.erase(params.begin(), params.begin() + 1); - try { - return boost::shared_ptr<TokenReader>( - detail::TokenReaderFactorySingleton::Instance().stream_factory.CreateObject( - class_id, tagset, stream, params)); - } catch (detail::TokenReaderFactoryException& e) { - std::vector<std::string> ids; - ids = detail::TokenReaderFactorySingleton::Instance().path_factory.RegisteredIds(); - if (std::find(ids.begin(), ids.end(), class_id) == ids.end()) { - throw Corpus2Error("Reader class not found: " + class_id); - } else { - throw Corpus2Error("This reader does not support stream mode: " + class_id); + int plugin_name_idx = 0; + while (plugin_name_idx >=0) { + try { + return boost::shared_ptr<TokenReader>( + detail::TokenReaderFactorySingleton::Instance() + .stream_factory.CreateObject(class_id, tagset, stream, params)); + } catch (detail::TokenReaderFactoryException&) { + std::string next_plugin = guess_plugin_name(class_id, plugin_name_idx); + if (!next_plugin.empty()) { + PwrNlp::Plugin::load("corpus2", next_plugin, !Path::Instance().get_verbose()); + plugin_name_idx++; + } else { + plugin_name_idx = -1; + } } } + std::vector<std::string> ids; + ids = detail::TokenReaderFactorySingleton::Instance().path_factory.RegisteredIds(); + if (std::find(ids.begin(), ids.end(), class_id) == ids.end()) { + throw Corpus2Error("Reader class not found: " + class_id); + } else { + throw Corpus2Error("This reader does not support stream mode: " + class_id); + } } std::vector<std::string> TokenReader::available_reader_types() diff --git a/libcorpus2/io/reader.h b/libcorpus2/io/reader.h index 2c2ddecc11aecffdc7821ef9f9655d693d358b20..e9d01f6e7ecf04c9bbd67c99051bf5ee14bf368e 100644 --- a/libcorpus2/io/reader.h +++ b/libcorpus2/io/reader.h @@ -203,17 +203,9 @@ struct TokenReaderFactory }; /** - * Declaration of the TokenWriter factory as a singleton Loki object - * factory. The factory instance can be accessed as - * TokenLayerFactory::Instance(). It is assumed that all derived classes - * have the same constructor signature. + * Factory singleton accesor */ -typedef Loki::SingletonHolder< - TokenReaderFactory, - Loki::CreateUsingNew, // default, needed to change the item below - Loki::LongevityLifetime::DieAsSmallObjectChild // per libloki docs -> -TokenReaderFactorySingleton; +TokenReaderFactory& token_reader_factory(); /** * Templated TokenReader creation function, stream variant @@ -262,12 +254,12 @@ template <typename T> bool TokenReader::register_reader(const std::string& class_id, const std::string& help) { - bool ret = detail::TokenReaderFactorySingleton::Instance().path_factory.Register( + bool ret = detail::token_reader_factory().path_factory.Register( class_id, detail::path_reader_creator<T>); - bool ret2 = detail::TokenReaderFactorySingleton::Instance().stream_factory.Register( + bool ret2 = detail::token_reader_factory().stream_factory.Register( class_id, detail::stream_reader_creator<T>); if (ret || ret2) { - detail::TokenReaderFactorySingleton::Instance().help[class_id] = help; + detail::token_reader_factory().help[class_id] = help; } return ret; } @@ -276,10 +268,10 @@ template <typename T> bool TokenReader::register_path_reader(const std::string& class_id, const std::string& help) { - bool ret = detail::TokenReaderFactorySingleton::Instance().path_factory.Register( + bool ret = detail::token_reader_factory().path_factory.Register( class_id, detail::path_reader_creator<T>); if (ret) { - detail::TokenReaderFactorySingleton::Instance().help[class_id] = help; + detail::token_reader_factory().help[class_id] = help; } return ret; } diff --git a/libpwrutils/CMakeLists.txt b/libpwrutils/CMakeLists.txt index 56f110ea44f1f9bee4c456346843ec2e55b68417..69a991253aa4c794a5f71ec9cc871f380ca65652 100644 --- a/libpwrutils/CMakeLists.txt +++ b/libpwrutils/CMakeLists.txt @@ -29,6 +29,7 @@ SET(libpwrutils_STAT_SRC exception.cpp whitespace.cpp pathsearch.cpp + plugin.cpp plural.cpp util.cpp ) diff --git a/libpwrutils/plugin.cpp b/libpwrutils/plugin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..157aac408df4f337423674758f2dcdacf0d84505 --- /dev/null +++ b/libpwrutils/plugin.cpp @@ -0,0 +1,78 @@ +#include <libpwrutils/plugin.h> +#include <dlfcn.h> +#include <iostream> + +namespace PwrNlp { +namespace Plugin { + +std::string make_soname(const std::string &scope, const std::string &name) +{ + if (name.size() > 1 && name.find('/') != name.npos) { + return name; + } else { + return "lib" + scope + "_" + name + ".so"; + } +} + +bool load(const std::string &scope, const std::string &name, bool quiet) +{ + std::string soname = make_soname(scope, name); + // std::cerr << "PLUGIN LOAD " << scope << " " << name << " " << soname << "\n"; + // first check if the plugin was already loaded + void* handle = dlopen(soname.c_str(), RTLD_NOW | RTLD_NOLOAD); + if (handle != NULL) { + if (!quiet) { + std::cerr << "Warning: " << scope << " plugin '" << name + << "'' already loaded\n"; + } + return false; + } + // actually load the library + dlerror(); + handle = dlopen(soname.c_str(), RTLD_NOW); + if (handle == NULL) { + if (!quiet) { + const char* dle = dlerror(); + std::cerr << "Error: dlopen error while loading " << scope + << " plugin '" << name << "' (" << soname << "): "; + if (dle != NULL) { + std::cerr << dle << "\n"; + } + } + return false; + } + // run plugin init function if it exiests + typedef void (*init_func_t)(); + init_func_t init_func = reinterpret_cast<init_func_t>( + dlsym(handle, "pwrnlp_plugin_init")); + if (init_func) { + init_func(); + } + if (!quiet) { + std::cerr << "Loaded " << scope << " plugin '" << name << "'\n"; + } + return true; +} + +bool load_check(const std::string &scope, const std::string &name, bool quiet, + boost::function<size_t (void)> counter, const std::string &what) +{ + size_t before = counter(); + if (load(scope, name, quiet)) { + size_t after = counter(); + if (after <= before) { + if (!quiet) { + std::cerr << "Warning: " << scope << " plugin '" + << name << "'' loaded, but" + << what << " count did not increase\n"; + } + return false; + } + return true; + } else { + return false; + } +} + +} /* end ns Plugin */ +} /* end ns PwrNlp */ diff --git a/libpwrutils/plugin.h b/libpwrutils/plugin.h new file mode 100644 index 0000000000000000000000000000000000000000..42595308145acf8ea0d5000d3fd18f3298daed0d --- /dev/null +++ b/libpwrutils/plugin.h @@ -0,0 +1,30 @@ +#ifndef LIBPWRNLP_PLUGIN_H +#define LIBPWRNLP_PLUGIN_H + +#include <boost/function.hpp> + +namespace PwrNlp { +namespace Plugin { + +/** + * Convert a plugin name to a shared library name that is expected to + * contain the plugin. + */ +std::string make_soname(const std::string& scope, const std::string& name); + +/** + * Load a plugin + */ +bool load(const std::string& scope, const std::string& name, bool quiet); + +/** + * Load a plugin, checking if a counter increases after the load, + * and outputting a disgnostic message if it does not + */ +bool load_check(const std::string& scope, const std::string& name, bool quiet, + boost::function<size_t(void)> counter, const std::string& what); + +} /* end ns Plugin */ +} /* end ns PwrNlp */ + +#endif // LIBPWRNLP_PLUGIN_H diff --git a/poliqarp/CMakeLists.txt b/poliqarp/CMakeLists.txt index b1a51a27f732617407c148b7d754c3b0cf4e58a2..4fb5827f991e7e23bb30630d21c465650d5bc953 100644 --- a/poliqarp/CMakeLists.txt +++ b/poliqarp/CMakeLists.txt @@ -7,16 +7,16 @@ include_directories(${PoliqarpLibrary_SOURCE_DIR}) include_directories(${PoliqarpLibrary_BINARY_DIR}/sakura) include_directories(${PoliqarpLibrary_BINARY_DIR}) -add_library(corpus2poliqarp SHARED pqclient.cpp pqreader.cpp) -set_target_properties(corpus2poliqarp PROPERTIES +add_library(corpus2_poliqarpreader SHARED pqclient.cpp pqreader.cpp) +set_target_properties(corpus2_poliqarpreader PROPERTIES VERSION "${c2pq_ver_major}.${c2pq_ver_minor}" SOVERSION ${c2pq_ver_major}) -target_link_libraries(corpus2poliqarp poliqarpc2 corpus2) +target_link_libraries(corpus2_poliqarpreader poliqarpc2 corpus2) add_executable(c2pqtest c2pqtest.cpp) -target_link_libraries(c2pqtest poliqarpc2 corpus2poliqarp corpus2 pwrutils ) +target_link_libraries(c2pqtest poliqarpc2 corpus2_poliqarpreader corpus2 pwrutils ) if(UNIX) - install(TARGETS corpus2poliqarp LIBRARY DESTINATION lib) + install(TARGETS corpus2_poliqarpreader LIBRARY DESTINATION lib) install(TARGETS c2pqtest RUNTIME DESTINATION bin) endif(UNIX) diff --git a/poliqarp/pqreader.cpp b/poliqarp/pqreader.cpp index 400e9b5431e8fa5c4a98179ea2fcc7d173d52cf9..6334b4fb2b09080c0083514794770cd5fcadbe89 100644 --- a/poliqarp/pqreader.cpp +++ b/poliqarp/pqreader.cpp @@ -1,6 +1,17 @@ #include "pqreader.h" #include "pqclient.h" +/* +extern "C" { +void pwrnlp_plugin_init() +{ + std::cerr << "PQINIT\n"; + Corpus2::TokenReader::register_path_reader<Corpus2::PoliqarpReader>( + "poliqarp","token,chunk,sentence"); +} +} +*/ + namespace Corpus2 { bool PoliqarpReader::registered = TokenReader::register_path_reader<PoliqarpReader>( diff --git a/poliqarp/pqreader.h b/poliqarp/pqreader.h index 35acf22e4f7c70845bc363d093dc44b2f6eae762..a8fb40ba06311f2d5913ae53382dd3fe174f44ed 100644 --- a/poliqarp/pqreader.h +++ b/poliqarp/pqreader.h @@ -5,6 +5,11 @@ #include <deque> #include <boost/scoped_ptr.hpp> +/* +extern "C" { +void pwrnlp_plugin_init(); +} +*/ namespace Corpus2 { diff --git a/swig/CMakeLists.txt b/swig/CMakeLists.txt index bf78211a4233aa5fd3b4c97edc8cd9c90c13c5b1..1221f861eebe28ecc83369a949809965a8f616b3 100644 --- a/swig/CMakeLists.txt +++ b/swig/CMakeLists.txt @@ -3,9 +3,6 @@ PROJECT(corpus2SwigWrap) set(LIBS "corpus2" "pwrutils") -if (CORPUS2_BUILD_POLIQARP) - set(LIBS ${LIBS} "corpus2poliqarp") -endif (CORPUS2_BUILD_POLIQARP) include_directories (${corpus2_SOURCE_DIR}) include_directories (${pwrutils_SOURCE_DIR})