/* Copyright (C) 2011 Adam Wardyński, Tomasz Śniatowski, Paweł Kędzia, Adam Radziszewski, Bartosz Broda Part of the WCCL project This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE and COPYING files for more details. */ #ifndef LIBWCCL_WCCLFILE_H #define LIBWCCL_WCCLFILE_H #include <libwccl/values/bool.h> #include <libwccl/values/match.h> #include <libwccl/values/position.h> #include <libwccl/values/strset.h> #include <libwccl/values/tset.h> #include <libwccl/wcclfileopsections.h> #include <libwccl/ops/tagrulesequence.h> #include <libwccl/ops/matchrulesequence.h> #include <libwccl/lexicon/lexicons.h> #include <libwccl/exception.h> #include <libpwrutils/pathsearch.h> namespace Wccl { /** * Class representing contents of parsed WCCL files * - imported lexicons, any typed and untyped named * operator sections, up to one tag rules section, * and up to one match rules section. */ class WcclFile : WcclFileOpSections<UntypedOpSequence>, WcclFileOpSections<OpSequence<StrSet> >, WcclFileOpSections<OpSequence<TSet> >, WcclFileOpSections<OpSequence<Bool> >, WcclFileOpSections<OpSequence<Position> >, WcclFileOpSections<OpSequence<Match> > { public: WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path); ///////////////////// // Untyped and typed operator sections: @X:"sectioname" ( op1; op2 ) ///////////////////// /** * @returns All untyped sections from the WCCL file: @"name" ( anyop0; ...; anyopN ) */ const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); /** * @returns All sections of given type T from the WCCL file: @T:"name" ( op<T>0; ...; op<T>N ) */ template<class T> const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections(); /** * @returns True if the WCCL file contains untyped section of given name. * False otherwise. */ bool has_untyped_section(const std::string& name) const; /** * @returns True if the WCCL file contains section of given name and type T. * False otherwise. */ template<class T> bool has_section(const std::string& name) const; /** * @returns Names of all untyped sections in this WCCL file, in the order they * were found in the file. */ std::vector<std::string> untyped_section_names() const; /** * @returns Names of all sections of given type T from this WCCL file, in the * order they were found in the file. */ template<class T> std::vector<std::string> section_names() const; /** * @returns Untyped section of given name. * @throws InvalidArgument if there is no untyped section of given name. */ UntypedOpSequence& get_untyped_section(const std::string& name); /** * @returns Untyped section of given name (const). * @throws InvalidArgument if there is no untyped section of given name. */ const UntypedOpSequence& get_untyped_section(const std::string& name) const; /** * @returns Section of given name and type T. * @throws InvalidArgument if there is no section of given name and type T. */ template<class T> OpSequence<T>& get_section(const std::string& name); /** * @returns Section of given name and type T (const). * @throws InvalidArgument if there is no section of given name and type T. */ template<class T> const OpSequence<T>& get_section(const std::string& name) const; /** * @returns Shared pointer to an untyped section of the given name. * @throws InvalidArgument if there is no untyped section of given name. */ boost::shared_ptr<UntypedOpSequence> get_untyped_section_ptr(const std::string& name); /** * @returns Shared pointer to an untyped section of the given name (const). * @throws InvalidArgument if there is no untyped section of given name. */ boost::shared_ptr<const UntypedOpSequence> get_untyped_section_ptr(const std::string& name) const; /** * @returns Shared pointer to a section of the given name and type T. * @throws InvalidArgument if there is no section of given name and type T. */ template<class T> boost::shared_ptr<OpSequence<T> > get_section_ptr(const std::string& name); /** * @returns Shared pointer to a section of the given name and type T (const). * @throws InvalidArgument if there is no section of given name and type T. */ template<class T> boost::shared_ptr<const OpSequence<T> > get_section_ptr(const std::string& name) const; /** * @returns Operator from an untyped section of given name, present at given index. * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no untyped section of given name or if idx is out of range. */ FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0); /** * @returns Operator from an untyped section of given name, present at given index (const). * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no untyped section of given name or if idx is out of range. */ const FunctionalOperator& get_untyped_op(const std::string& name, size_t idx = 0) const; /** * @returns Operator from a section of given name and type T, present at given index. * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no section of given name and type T or if idx is out of range. */ template<class T> Operator<T>& get_op(const std::string& name, size_t idx = 0); /** * @returns Operator from a section of given name and type T, present at given index (const). * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no section of given name and type T or if idx is out of range. */ template<class T> const Operator<T>& get_op(const std::string& name, size_t idx = 0) const; /** * @returns Shared pointer to an operator from an untyped section of given name, present at given index. * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no untyped section of given name or if idx is out of range. */ boost::shared_ptr<FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0); /** * @returns Shared pointer to an operator from an untyped section of given name, present at given index (const). * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no untyped section of given name or if idx is out of range. */ boost::shared_ptr<const FunctionalOperator> get_untyped_op_ptr(const std::string& name, size_t idx = 0) const; /** * @returns Shared pointer to an operator from a section of given name and type T, present at given index. * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range. */ template<class T> boost::shared_ptr<Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0); /** * @returns Shared pointer to an operator from a section of given name and type T, present at given index (const). * @note Index is zero-based and default is 0 (no idx given = the first operator in the sequence). * @throws InvalidArgument if there is no section of given name and type T, or if idx is out of range. */ template<class T> boost::shared_ptr<const Operator<T> > get_op_ptr(const std::string& name, size_t idx = 0) const; /** * @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file. * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ UntypedOpSequence::name_op_v_t gen_name_untyped_op_pairs(); /** * @returns Vector of all name-operator pairs from untyped sections, in the order they were found in the file (const). * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ UntypedOpSequence::name_op_v_c_t gen_name_untyped_op_pairs() const; /** * @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file. * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ template<class T> typename OpSequence<T>::name_op_v_t gen_name_op_pairs(); /** * @returns Vector of all name-operator pairs from sections of given type T, in the order they were found in the file (const). * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ template<class T> typename OpSequence<T>::name_op_v_c_t gen_name_op_pairs() const; /** * @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped), * in the order they were found in the file. * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ FunctionalOpSequence::name_op_v_t gen_all_op_pairs(); /** * @returns Vector of all name-operator pairs from all sections of the file (both typed and untyped), * in the order they were found in the file (const). * @note Generated names of operators are derived from section name they are contained within, * suffixed with hyphen and zero-based index. E.g. the first operator in section named "seq" * will have name "seq-0", second operator's name will be "seq-1" etc. */ FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const; /////////////////////////// // Lexicons, import sections: import("path", "name") /////////////////////////// /** * @returns True if there is an imported Lexicon of given name. False otherwise. */ bool has_lexicon(const std::string& name) const; /** * @returns True if there are any Lexicons imported. */ bool has_lexicons() const; /** * @returns Shared pointer to a Lexicon of given name (const). * @throws InvalidArgument if there is no Lexicon of given name imported. */ boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const; /** * @returns Lexicon of given name (const). * @throws InvalidArgument if there is no Lexicon of given name imported. */ const Lexicon& get_lexicon(const std::string& name) const; /** * @returns Shared pointer to collection of all imported Lexicons (const). */ boost::shared_ptr<const Lexicons> get_lexicons_ptr() const; /** * @returns Collection of all imported Lexicons (const). */ const Lexicons& get_lexicons() const; /////////////////////////// // Tag rules section: tag_rules ( rule1; rule2 ) /////////////////////////// /** * @returns True if this WcclFile has tag_rules section. False otherwise. */ bool has_tag_rules() const; /** * @returns The tag rules section from this WCCL file. * @throws WcclError if there is no tag rules section. */ const TagRuleSequence& get_tag_rules() const; /** * @returns The tag rules section from this WCCL file. * @throws WcclError if there is no tag rules section. */ boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr(); /** * @returns The tag rules section from this WCCL file (const). * @throws WcclError if there is no tag rules section. */ boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const; /////////////////////////// // Match rules section: match_rules ( apply1; apply2 ) /////////////////////////// /** * @returns True if this WcclFile has match_rules section. False otherwise. */ bool has_match_rules() const; /** * @returns The match rules section from this WCCL file. * @throws WcclError if there is no match_rules section. */ const Matching::MatchRuleSequence& get_match_rules() const; /** * @returns The match rules section from this WCCL file. * @throws WcclError if there is no match_rules section. */ boost::shared_ptr<Matching::MatchRuleSequence> get_match_rules_ptr(); /** * @returns The match rules section from this WCCL file (const). * @throws WcclError if there is no match_rules section. */ boost::shared_ptr<const Matching::MatchRuleSequence> get_match_rules_ptr() const; /////////////////////////// // Miscelaneous /////////////////////////// friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file); std::string to_string() const; const Corpus2::Tagset& tagset() const; const PwrNlp::PathSearcher<Wccl::FileNotFound> path() const { return path_; } PwrNlp::PathSearcher<Wccl::FileNotFound> path() { return path_; } /////////////////////////// // Internal parser-related methods to add sections. /////////////////////////// void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section); void add_untyped_section(const UntypedOpSequence& section); template<class T> void add_section(const boost::shared_ptr<OpSequence<T> >& section); template<class T> void add_section(const boost::shared_ptr<const OpSequence<T> >& section); template<class T> void add_section(const OpSequence<T>& section); void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon); void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules); void set_match_rules(const boost::shared_ptr<Matching::MatchRuleSequence>& match_rules); private: std::ostream& write_to(std::ostream& ostream) const; std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_; boost::shared_ptr<TagRuleSequence> tag_rules_; boost::shared_ptr<Matching::MatchRuleSequence> match_rules_; boost::shared_ptr<Lexicons> lexicons_; const Corpus2::Tagset& tagset_; PwrNlp::PathSearcher<Wccl::FileNotFound> path_; }; } /* end ns Wccl */ // // Implementation // namespace Wccl { inline WcclFile::WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path) : lexicons_(boost::make_shared<Lexicons>()), tagset_(tagset), path_(":") { path_.set_search_path(search_path); path_.set_verbose(true); } inline const std::vector<boost::shared_ptr<UntypedOpSequence> >& WcclFile::untyped_sections() { return WcclFileOpSections<UntypedOpSequence>::sections(); } template<class T> inline const typename std::vector<boost::shared_ptr<OpSequence<T> > >& WcclFile::sections() { return WcclFileOpSections<OpSequence<T> >::sections(); } inline bool WcclFile::has_untyped_section(const std::string& name) const { return WcclFileOpSections<UntypedOpSequence>::has_section(name); } template<class T> inline bool WcclFile::has_section(const std::string &name) const { return WcclFileOpSections<OpSequence<T> >::has_section(name); } inline std::vector<std::string> WcclFile::untyped_section_names() const { return WcclFileOpSections<UntypedOpSequence>::section_names(); } template<class T> inline std::vector<std::string> WcclFile::section_names() const { return WcclFileOpSections<OpSequence<T> >::section_names(); } inline UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name) { return WcclFileOpSections<UntypedOpSequence>::get_section(name); } inline const UntypedOpSequence& WcclFile::get_untyped_section(const std::string& name) const { return WcclFileOpSections<UntypedOpSequence>::get_section(name); } template<class T> inline OpSequence<T>& WcclFile::get_section(const std::string& name) { return WcclFileOpSections<OpSequence<T> >::get_section(name); } template<class T> inline const OpSequence<T>& WcclFile::get_section(const std::string& name) const { return WcclFileOpSections<OpSequence<T> >::get_section(name); } inline boost::shared_ptr<UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name) { return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name); } inline boost::shared_ptr<const UntypedOpSequence> WcclFile::get_untyped_section_ptr(const std::string& name) const { return WcclFileOpSections<UntypedOpSequence>::get_section_ptr(name); } template<class T> inline typename boost::shared_ptr<OpSequence<T> > WcclFile::get_section_ptr(const std::string& name) { return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name); } template<class T> inline typename boost::shared_ptr<const OpSequence<T> > WcclFile::get_section_ptr(const std::string& name) const { return WcclFileOpSections<OpSequence<T> >::get_section_ptr(name); } inline FunctionalOperator& WcclFile::get_untyped_op(const std::string &name, size_t idx) { return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx); } inline const FunctionalOperator& WcclFile::get_untyped_op(const std::string& name, size_t idx) const { return WcclFileOpSections<UntypedOpSequence>::get_op(name, idx); } template<class T> inline Operator<T>& WcclFile::get_op(const std::string& name, size_t idx) { return WcclFileOpSections<Operator<T> >::get_op(name, idx); } template<class T> inline const Operator<T>& WcclFile::get_op(const std::string& name, size_t idx) const { return WcclFileOpSections<Operator<T> >::get_op(name, idx); } inline boost::shared_ptr<FunctionalOperator> WcclFile::get_untyped_op_ptr( const std::string& name, size_t idx) { return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx); } inline boost::shared_ptr<const FunctionalOperator> WcclFile::get_untyped_op_ptr( const std::string& name, size_t idx) const { return WcclFileOpSections<UntypedOpSequence>::get_op_ptr(name, idx); } template<class T> inline boost::shared_ptr<Operator<T> > WcclFile::get_op_ptr( const std::string& name, size_t idx) { return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx); } template<class T> inline boost::shared_ptr<const Operator<T> > WcclFile::get_op_ptr( const std::string& name, size_t idx) const { return WcclFileOpSections<OpSequence<T> >::get_op_ptr(name, idx); } inline UntypedOpSequence::name_op_v_t WcclFile::gen_name_untyped_op_pairs() { return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs(); } inline UntypedOpSequence::name_op_v_c_t WcclFile::gen_name_untyped_op_pairs() const { return WcclFileOpSections<UntypedOpSequence>::gen_name_op_pairs(); } template<class T> inline typename OpSequence<T>::name_op_v_t WcclFile::gen_name_op_pairs() { return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs(); } template<class T> inline typename OpSequence<T>::name_op_v_c_t WcclFile::gen_name_op_pairs() const { return WcclFileOpSections<OpSequence<T> >::gen_name_op_pairs(); } inline void WcclFile::import_lexicon(const boost::shared_ptr<Lexicon>& lexicon) { lexicons_->insert(lexicon); } inline bool WcclFile::has_lexicon(const std::string& name) const { return lexicons_->has_lexicon(name); } inline bool WcclFile::has_lexicons() const { return lexicons_; } inline boost::shared_ptr<const Lexicon> WcclFile::get_lexicon_ptr(const std::string& name) const { return lexicons_->get_ptr(name); } inline const Lexicon& WcclFile::get_lexicon(const std::string &name) const { return lexicons_->get(name); } inline boost::shared_ptr<const Lexicons> WcclFile::get_lexicons_ptr() const { return lexicons_; } inline const Lexicons& WcclFile::get_lexicons() const { return *lexicons_; } inline void WcclFile::add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section) { WcclFileOpSections<UntypedOpSequence>::append(section); all_sections_.push_back(section); } inline void WcclFile::add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section) { boost::shared_ptr<UntypedOpSequence> s = section->clone(); add_untyped_section(s); } inline void WcclFile::add_untyped_section(const UntypedOpSequence& section) { boost::shared_ptr<UntypedOpSequence> s = section.clone(); add_untyped_section(s); } template<class T> inline void WcclFile::add_section(const boost::shared_ptr<OpSequence<T> >& section) { WcclFileOpSections<OpSequence<T> >::append(section); all_sections_.push_back(section); } template<class T> inline void WcclFile::add_section(const boost::shared_ptr<const OpSequence<T> >& section) { boost::shared_ptr<OpSequence<T> > s = section->clone(); add_section(s); } template<class T> inline void WcclFile::add_section(const OpSequence<T>& section) { boost::shared_ptr<OpSequence<T> > s = section.clone(); add_section(s); } inline bool WcclFile::has_tag_rules() const { return tag_rules_; } inline const TagRuleSequence& WcclFile::get_tag_rules() const { return *get_tag_rules_ptr(); } inline void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules) { if (has_tag_rules()) { throw WcclError("Tag rules already added."); } tag_rules_ = tag_rules; } inline bool WcclFile::has_match_rules() const { return match_rules_; } inline const Matching::MatchRuleSequence& WcclFile::get_match_rules() const { return *get_match_rules_ptr(); } inline void WcclFile::set_match_rules(const boost::shared_ptr<Matching::MatchRuleSequence>& match_rules) { if (has_match_rules()) { throw WcclError("Tag rules already added."); } match_rules_ = match_rules; } inline std::ostream& operator <<(std::ostream& ostream, const WcclFile& wccl_file) { return wccl_file.write_to(ostream); } inline const Corpus2::Tagset& WcclFile::tagset() const { return tagset_; } } /* end ns Wccl */ #endif // LIBWCCL_WCCLFILE_H