diff --git a/libwccl/parser/Parser.cpp b/libwccl/parser/Parser.cpp index 94d6014c6e3b89caa767794a59751df7cc2d874d..389c8a425eafc3ee1f0f2a5d13b4aaf947ec7c53 100644 --- a/libwccl/parser/Parser.cpp +++ b/libwccl/parser/Parser.cpp @@ -108,6 +108,36 @@ boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > Parser::parseSymSetOperator( return parser.parse_sym_set_operator(tagset_); } +// ---------------------------------------------------------------------------- +/** + * @desc Parse a position operator contained in a std::string. Converts the + * string to a stream and calls parseSymSetOperator with it + * @arg str operator string + * @return the parsed operator via a shared pointer + */ +boost::shared_ptr<ANTLRParserResult<Wccl::Position> > Parser::parsePositionOperator( + const std::string& str) const +{ + std::stringstream ss (std::stringstream::in | std::stringstream::out); + ss << str; + + return this->parsePositionOperator(ss); +} + +/** + * @desc Parse a position operator. Runs parse_sym_set_operator rule + * in the parser grammar. + * @arg istr input stream with the operator + * @return the parsed operator via a shared pointer + */ +boost::shared_ptr<ANTLRParserResult<Wccl::Position> > Parser::parsePositionOperator( + std::istream& istr) const +{ + ANTLRLexer lexer(istr); + ANTLRParser parser(lexer); + return parser.parse_position_operator(tagset_); +} + // ---------------------------------------------------------------------------- /** * @desc Parse any operator contained in a std::string. Converts the string to @@ -139,6 +169,7 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( std::stringstream errors; boost::shared_ptr<ANTLRParserResultBase> result; if (!result) { + ss.clear(); ss.seekg(0, std::ios::beg); ANTLRLexer lexer(ss); ANTLRParser parser(lexer); @@ -150,6 +181,7 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( } } if (!result) { + ss.clear(); ss.seekg(0, std::ios::beg); ANTLRLexer lexer(ss); ANTLRParser parser(lexer); @@ -161,6 +193,7 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( } } if (!result) { + ss.clear(); ss.seekg(0, std::ios::beg); ANTLRLexer lexer(ss); ANTLRParser parser(lexer); @@ -171,6 +204,18 @@ boost::shared_ptr<ANTLRParserResultBase> Parser::parseAnyOperator( // ignore, try another type } } + if (!result) { + ss.clear(); + ss.seekg(0, std::ios::beg); + ANTLRLexer lexer(ss); + ANTLRParser parser(lexer); + try { + result = parser.parse_position_operator(tagset_); + } catch (antlr::ANTLRException& e) { + errors << "(as position) " << e.getMessage() << "\n"; + // ignore, try another type + } + } if (!result) { throw ParserException(errors.str()); } diff --git a/libwccl/parser/Parser.h b/libwccl/parser/Parser.h index 58ef0824f37c078a120da705cd35b2cf956f869a..1428c295cd56fb843f0ed0c8c5854c76ea49784c 100644 --- a/libwccl/parser/Parser.h +++ b/libwccl/parser/Parser.h @@ -45,6 +45,13 @@ public: boost::shared_ptr<ANTLRParserResult<Wccl::TSet> > parseSymSetOperator(std::istream&) const; + // --------------------------------------------------------------------------- + // methods for parsing position operators + boost::shared_ptr<ANTLRParserResult<Wccl::Position> > + parsePositionOperator(const std::string&) const; + boost::shared_ptr<ANTLRParserResult<Wccl::Position> > + parsePositionOperator(std::istream&) const; + // --------------------------------------------------------------------------- // methods for parsing any operators boost::shared_ptr<ANTLRParserResultBase> diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index c20554e0872dba586fd16f968d427a2f30780236..ce51571464c30328f588d2063ad5c1ec30ec3f79 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -152,6 +152,21 @@ parse_sym_set_operator } ; +// ---------------------------------------------------------------------------- +// Rules for parsing position operators +// Returns boost::shared_ptr<Wccl::Function<Wccl::Position> > +parse_position_operator + [const Corpus2::Tagset &tagset] + returns [boost::shared_ptr<ANTLRParserResult<Wccl::Position> > res] +{ + res.reset(new ANTLRParserResult<Wccl::Position>()); + boost::shared_ptr<Wccl::Function<Wccl::Position> > op; +} + : op = position_operators [tagset, *res->variables.get()] { + res->op = op; + } +; + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // VALUES @@ -270,7 +285,7 @@ position_literal val.reset(new Wccl::Position(Wccl::Position(Wccl::Position::Nowhere))); } ; -// Constat position value +// Constant position value // Returns boost::shared_ptr<Wccl::Constant<Wccl::Position> > position_value returns [boost::shared_ptr<Wccl::Constant<Wccl::Position> > val] @@ -976,28 +991,28 @@ options { QUOT_MARK options { - paraphrase = "Quota mark"; + paraphrase = "Quote"; } : '\'' ; APOS_MARK options { - paraphrase = "Aposptrophe mark"; + paraphrase = "Apostrophe"; } : '"' ; Q_MARK options { - paraphrase = "Query mark"; + paraphrase = "Question mark"; } : '?' ; E_MARK options { - paraphrase = "Exclamanation mark"; + paraphrase = "Exclamation mark"; } : '!' ; @@ -1011,7 +1026,7 @@ options { TST_PREFIX options { - paraphrase = "Tag set (symbol) prefix"; + paraphrase = "Symset prefix"; } : "$t:" ; @@ -1120,7 +1135,8 @@ options { paraphrase = "Multi line comment"; } : "/*" - ( /* This actually works OK despite the ambiguity that + ( // TODO: test it and add reference to the site it's taken from! + /* This actually works OK despite the ambiguity that '\r' '\n' can be matched in one alternative or by matching '\r' in one iteration and '\n' in another.. But this is really matched just by one rule per (...)* @@ -1145,9 +1161,9 @@ options { : '#' ; -DSEPARATOR -options { - paraphrase = "':-'"; -} - : ":-" -; +//DSEPARATOR +//options { +// paraphrase = "':-'"; +//} +// : ":-" +//; diff --git a/libwccl/values/bool.h b/libwccl/values/bool.h index e0e05c3bf3f556c669b4d62e8813e83022c29d74..fd8a92f527e6bfb80fa3f727bd3efdde9f12a600 100644 --- a/libwccl/values/bool.h +++ b/libwccl/values/bool.h @@ -10,12 +10,14 @@ class Bool : public Value public: WCCL_VALUE_PREAMBLE + typedef bool value_type; + explicit Bool(bool v = false) : val_(v) { } - bool get_value() const { + const bool& get_value() const { return val_; } diff --git a/libwccl/values/position.h b/libwccl/values/position.h index 4b5eadc37ddd500f1c363a52d10820c605ca8761..747beda772a203f7d7cddcceca5edf782db5be6d 100644 --- a/libwccl/values/position.h +++ b/libwccl/values/position.h @@ -19,11 +19,13 @@ public: { } + typedef int value_type; + static const int Nowhere = boost::integer_traits<int>::const_min; static const int Begin = boost::integer_traits<int>::const_min + 1; static const int End = boost::integer_traits<int>::const_max; - int get_value() const { + const int& get_value() const { return val_; } diff --git a/libwccl/values/strset.h b/libwccl/values/strset.h index 49dfa5da688b126b7a3693ffbc45a46c221159ca..911a28d76b2f2b4e9e24631bd0a578d5602620f3 100644 --- a/libwccl/values/strset.h +++ b/libwccl/values/strset.h @@ -14,6 +14,8 @@ public: typedef boost::unordered_set<UnicodeString> set_t; + typedef set_t value_type; + StrSet() : set_() { diff --git a/libwccl/values/tset.h b/libwccl/values/tset.h index 693006648f836870ee8866a7552cac4852f7f91a..0378628128aae708f119a46790c0a8c4de790e48 100644 --- a/libwccl/values/tset.h +++ b/libwccl/values/tset.h @@ -11,6 +11,8 @@ class TSet : public Value public: WCCL_VALUE_PREAMBLE; + typedef Corpus2::Tag value_type; + TSet() : tag_() { diff --git a/libwccl/variables.cpp b/libwccl/variables.cpp index 419c44e210c71441339ca54012602a56a11a9b87..6f51e065595534571b3d9f35548245fa0ed177a8 100644 --- a/libwccl/variables.cpp +++ b/libwccl/variables.cpp @@ -21,7 +21,20 @@ struct delhelper r = v.del<T>(s) || r; } }; +} /* end anon ns */ + +bool Variables::del_any(const std::string &s) +{ + bool rv = false; + typedef boost::mpl::pop_front< types >::type concrete; + // call delhelper::operator()<T> once for each of the allowed + // Value subtypes (but not for Value itself). + boost::mpl::for_each<concrete, boost::mpl::always<boost::mpl::_1> >( + delhelper(*this, s, rv)); + return rv; +} +namespace { struct puthelper { Variables& v; @@ -41,29 +54,72 @@ struct puthelper } } }; - } /* end anon ns */ -bool Variables::del_any(const std::string &s) +void Variables::put_any(const std::string &s, const boost::shared_ptr<Value> &v) { bool rv = false; typedef boost::mpl::pop_front< types >::type concrete; - // call delhelper::operator()<T> once for each of the allowed + // call puthelper::operator()<T> once for each of the allowed // Value subtypes (but not for Value itself). boost::mpl::for_each<concrete, boost::mpl::always<boost::mpl::_1> >( - delhelper(*this, s, rv)); - return rv; + puthelper(*this, s, rv, v)); + if (!rv) throw VariableTypeMismatch(s); } -void Variables::put_any(const std::string &s, const boost::shared_ptr<Value> &v) +namespace { +struct resethelper +{ + const Variables& v; + resethelper(Variables& v): v(v) {} + + template<typename T> + void operator()(const boost::mpl::always<T>&) { + typedef std::pair< std::string, boost::shared_ptr<T> > v_t; + foreach (const v_t& a, v.get_all<T>()) { + *a.second = T(); + } + } +}; +} /* end anon ns */ + + +void Variables::reset_values() { - bool rv = false; typedef boost::mpl::pop_front< types >::type concrete; - // call puthelper::operator()<T> once for each of the allowed - // Value subtypes (but not for Value itself). boost::mpl::for_each<concrete, boost::mpl::always<boost::mpl::_1> >( - puthelper(*this, s, rv, v)); - if (!rv) throw VariableTypeMismatch(s); + resethelper(*this)); +} + +namespace { +struct clonehelper +{ + const Variables& vfrom; + Variables::AccessHelper vto; + clonehelper(const Variables& vfrom, Variables::AccessHelper vto) + : vfrom(vfrom), vto(vto) {} + + template<typename T> + void operator()(const boost::mpl::always<T>&) { + vto.access<T>() = vfrom.get_all<T>(); + typedef typename detail::Vmap<T>::map_t::value_type value_type; + foreach (const value_type& a, vto.access<T>()) { + vto.vars.put(a.first, *a.second); + } + } +}; +template<> inline +void clonehelper::operator()(const boost::mpl::always<Value>&) { + vto.access<Value>() = vfrom.get_all<Value>(); +} +} /* end anon ns */ + +Variables* Variables::clone() const +{ + Variables* copy = new Variables; + boost::mpl::for_each<types, boost::mpl::always<boost::mpl::_1> >( + clonehelper(*this, Variables::AccessHelper(*copy))); + return copy; } } /* end ns Wccl */ diff --git a/libwccl/variables.h b/libwccl/variables.h index c0dcab79cda4de1b6f3643e38ebd2a1f276918e1..35a99d31cc1642de729d8a4dc78c1cae2247d4e5 100644 --- a/libwccl/variables.h +++ b/libwccl/variables.h @@ -31,8 +31,9 @@ namespace detail { template<typename T> class Vmap { -protected: +public: typedef std::map< std::string, boost::shared_ptr<T> > map_t; +protected: Vmap() : map_() {} boost::shared_ptr<T> get(const std::string& s) const { typename map_t::const_iterator i = map_.find(s); @@ -151,7 +152,7 @@ class Variables : detail::Vmap<Value> , detail::Vmap<Position> , detail::Vmap<StrSet> , detail::Vmap<TSet> -// , boost::noncopyable + , boost::noncopyable { public: /// Valid value types, should match the inheritance. @@ -173,12 +174,31 @@ public: return detail::Vmap<T>::map_.size(); } - void reset_values(); //set all values to default value + /** Set all values to their default value. + * + * Effectively iterates through all variables and assigns them their + * respective type's default-constructed value. + */ + void reset_values(); - //template<typename T> - //const map_t& all_variables() const + /** Per-type all variables accesor. + * + * Allows iterating through all variables of a given type (or all variables + * if the type is Value). Values may be modified, the variable names or + * shared pointers themselves cannot, use put etc. for that. + */ + template<typename T> + const typename detail::Vmap<T>::map_t get_all() const { + BOOST_MPL_ASSERT(( boost::mpl::count<types, T> )); + return detail::Vmap<T>::map_; + } - //clone + /** Variables cloning. + * + * A clone has the same variable names, with distinct underlying Value + * objects, and the same values in these objects. + */ + Variables* clone() const; /** Get a variable. * @@ -195,6 +215,38 @@ public: return detail::Vmap<T>::get(s); } + /** Get a variable, throwing version. + * + * Returns a valid pointer to the variable with the given name, as get(), + * or throws if it is not found. Never returns NULL. + * + * May throw either VariableTypeMismatch or InvalidVariableName. + */ + template<typename T> + boost::shared_ptr<T> get_or_throw(const std::string& s) const { + BOOST_MPL_ASSERT(( boost::mpl::count<types, T> )); + boost::shared_ptr<T> r = detail::Vmap<T>::get(s); + if (r) { + return r; + } else { + if (detail::Vmap<Value>::get(s)) { + throw VariableTypeMismatch(s); + } else { + throw InvalidVariableName(s); + } + } + } + + /** Convenience function to get the actual underlying Value of a variable. + * + * Will throw on errors like get_or_throw would. Returns whatever the + * Value referenced returns in its get_value. + */ + template<typename T> + const typename T::value_type& get_value(const std::string& s) const { + return get_or_throw<T>(s)->get_value(); + } + /** Create a "fast" accessor for a variable by name. * * Returns a special object which is valid for use in get_fast, which when @@ -319,6 +371,25 @@ public: */ template<typename T> void set(const std::string& s, const T& v); + + struct AccessHelper; + friend struct Variables::AccessHelper; + struct AccessHelper + { + template<typename T> + typename detail::Vmap<T>::map_t& access() { + return vars.get_all_nonconst<T>(); + } + Variables& vars; + private: + AccessHelper(Variables& v) : vars(v) {} + friend class Variables; + }; +private: + template<typename T> + typename detail::Vmap<T>::map_t& get_all_nonconst() { + return detail::Vmap<T>::map_; + } }; /* implementation */ @@ -363,7 +434,7 @@ void Variables::put(const std::string& s, const boost::shared_ptr<T>& v) { template<typename T> inline bool Variables::del(const std::string &s) { - //BOOST_MPL_ASSERT(( boost::mpl::count<types, T> )); + BOOST_MPL_ASSERT(( boost::mpl::count<types, T> )); if (detail::Vmap<T>::map_.erase(s)) { bool was_in_values = detail::Vmap<Value>::map_.erase(s); assert(was_in_values); @@ -380,7 +451,8 @@ bool Variables::del<Value>(const std::string &s) } template<typename T> inline -void Variables::set(const std::string& s, const T& v) { +void Variables::set(const std::string& s, const T& v) +{ BOOST_MPL_ASSERT(( boost::mpl::count<types, T> )); boost::shared_ptr<T> p = get<T>(s); if (p) { @@ -389,6 +461,7 @@ void Variables::set(const std::string& s, const T& v) { put(s, v); } } + } /* end ns Wccl */ #endif // LIBWCCL_VARIABLES_H diff --git a/tests/varaccess.cpp b/tests/varaccess.cpp index ad4ff33293cf3d49b206d4aa7ddb839e54b7a45a..dd5d3b09225daea56531965ca9e7247548bd9a16 100644 --- a/tests/varaccess.cpp +++ b/tests/varaccess.cpp @@ -12,7 +12,7 @@ BOOST_AUTO_TEST_SUITE(varaccess); struct VAfx { - Variables v; + boost::shared_ptr<Variables> v; VAfx() { Variables v2; v2.put("a", new Bool(true)); @@ -21,7 +21,7 @@ struct VAfx v2.put("bb", new Bool(true)); v2.put("aa", new Position(1)); v2.put("aaa", new Position(2)); - v = v2; + v.reset(v2.clone()); } }; @@ -34,19 +34,19 @@ BOOST_FIXTURE_TEST_CASE(access, VAfx) vnames.push_back("c"); vnames.push_back("bb"); foreach (const std::string vn, vnames) { - VariableAccessor<Bool> a1 = v.create_accessor<Bool>(vn); - BOOST_CHECK(v.get_fast(a1) == v.get<Bool>(vn)); - v.set("a", Bool(false)); - BOOST_CHECK(v.get_fast(a1) == v.get<Bool>(vn)); - v.put("a", Bool(true)); - BOOST_CHECK(v.get_fast(a1) == v.get<Bool>(vn)); + VariableAccessor<Bool> a1 = v->create_accessor<Bool>(vn); + BOOST_CHECK(v->get_fast(a1) == v->get<Bool>(vn)); + v->set("a", Bool(false)); + BOOST_CHECK(v->get_fast(a1) == v->get<Bool>(vn)); + v->put("a", Bool(true)); + BOOST_CHECK(v->get_fast(a1) == v->get<Bool>(vn)); } } BOOST_FIXTURE_TEST_CASE(badaccess, VAfx) { - BOOST_CHECK_THROW(v.create_accessor<Bool>("asd"), InvalidVariableName); - BOOST_CHECK_THROW(v.create_accessor<Bool>("aaa"), VariableTypeMismatch); + BOOST_CHECK_THROW(v->create_accessor<Bool>("asd"), InvalidVariableName); + BOOST_CHECK_THROW(v->create_accessor<Bool>("aaa"), VariableTypeMismatch); } diff --git a/tests/variables.cpp b/tests/variables.cpp index 79cfa09d4c8bcaf0494c3c8052084e10a1c4da48..d39cf0f0a439f7670dfa5208cce3f4479a1c1d5f 100644 --- a/tests/variables.cpp +++ b/tests/variables.cpp @@ -80,6 +80,10 @@ BOOST_FIXTURE_TEST_CASE(get, Vfix) BOOST_CHECK(v.get<Position>("p3")); BOOST_CHECK(v.get<Value>("p3")); BOOST_CHECK(!v.get<Bool>("p3")); + BOOST_CHECK_THROW(v.get_or_throw<Bool>("b9"), InvalidVariableName); + BOOST_CHECK_THROW(v.get_or_throw<Position>("b1"), VariableTypeMismatch); + BOOST_CHECK_EQUAL(v.get_or_throw<Bool>("b2"), v.get<Bool>("b2")); + BOOST_CHECK_EQUAL(v.get_or_throw<Position>("p2"), v.get<Position>("p2")); } BOOST_FIXTURE_TEST_CASE(get_put, Vfix) @@ -141,5 +145,67 @@ BOOST_FIXTURE_TEST_CASE(del, Vfix) BOOST_CHECK(!v.get<Value>("p2")); } +BOOST_FIXTURE_TEST_CASE(get_all, Vfix) +{ + std::set<std::string> names; + foreach (const detail::Vmap<Value>::map_t::value_type& a, v.get_all<Bool>()) { + names.insert(a.first); + } + std::set<std::string> expected; + expected.insert("b1"); + expected.insert("b2"); + BOOST_CHECK_EQUAL_COLLECTIONS(names.begin(), names.end(), expected.begin(), expected.end()); + names.clear(); + foreach (const detail::Vmap<Value>::map_t::value_type& a, v.get_all<Value>()) { + names.insert(a.first); + } + expected.insert("p1"); + expected.insert("p2"); + expected.insert("p3"); + BOOST_CHECK_EQUAL_COLLECTIONS(names.begin(), names.end(), expected.begin(), expected.end()); +} + +BOOST_FIXTURE_TEST_CASE(reset_values, Vfix) +{ + v.reset_values(); + BOOST_CHECK_EQUAL(v.get_value<Bool>("b1"), false); + BOOST_CHECK_EQUAL(v.get_value<Bool>("b2"), false); + BOOST_CHECK_EQUAL(v.get_value<Position>("p1"), 0); + BOOST_CHECK_EQUAL(v.get_value<Position>("p2"), 0); + BOOST_CHECK_EQUAL(v.get_value<Position>("p3"), 0); +} + +BOOST_FIXTURE_TEST_CASE(clone, Vfix) +{ + boost::shared_ptr<Variables> copy(v.clone()); + BOOST_CHECK_EQUAL(v.size<Value>(), copy->size<Value>()); + BOOST_CHECK_EQUAL(v.size<Bool>(), copy->size<Bool>()); + BOOST_CHECK_EQUAL(v.size<Position>(), copy->size<Position>()); + foreach (const detail::Vmap<Value>::map_t::value_type& a, v.get_all<Value>()) { + boost::shared_ptr<Value> orig = a.second; + std::string name = a.first; + boost::shared_ptr<Value> other = copy->get<Value>(name); + BOOST_REQUIRE(other); + BOOST_CHECK(orig != other); + BOOST_CHECK_EQUAL(orig->to_raw_string(), other->to_raw_string()); + } + foreach (const detail::Vmap<Bool>::map_t::value_type& a, v.get_all<Bool>()) { + boost::shared_ptr<Bool> orig = a.second; + std::string name = a.first; + boost::shared_ptr<Bool> other = copy->get<Bool>(name); + BOOST_REQUIRE(other); + BOOST_CHECK(orig != other); + BOOST_CHECK_EQUAL(orig->get_value(), other->get_value()); + } + foreach (const detail::Vmap<Position>::map_t::value_type& a, v.get_all<Position>()) { + boost::shared_ptr<Position> orig = a.second; + std::string name = a.first; + boost::shared_ptr<Position> other = copy->get<Position>(name); + BOOST_REQUIRE(other); + BOOST_CHECK(orig != other); + BOOST_CHECK_EQUAL(orig->get_value(), other->get_value()); + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/wcclparser/main.cpp b/wcclparser/main.cpp index c048ec83d51f0d002a710b34f804af51966c185d..f1ddd3e7a31e5c73427c19cff5ef67f935387e36 100644 --- a/wcclparser/main.cpp +++ b/wcclparser/main.cpp @@ -98,6 +98,7 @@ bool process_line(const std::string& line, Parser& parser) boost::shared_ptr<const Wccl::Value> retVal; boost::shared_ptr<ANTLRParserResultBase> retOp; boost::shared_ptr<Corpus2::Sentence> sentence; + sentence.reset(new Corpus2::Sentence); Wccl::SentenceContext sc(sentence); try { @@ -136,6 +137,7 @@ bool process_line(const std::string& line, Parser& parser) int main(int argc, char** argv) { std::string tagset_load = "kipi"; + std::string query = ""; bool quiet = false; using boost::program_options::value; @@ -143,13 +145,15 @@ int main(int argc, char** argv) desc.add_options() ("tagset,t", value(&tagset_load), "Tagset to use\n") + ("query,Q", value(&query), + "Query to run (disables interactive mode)\n") ("quiet,q", value(&quiet)->zero_tokens(), "Suppress messages\n") ("help,h", "Show help") ; boost::program_options::variables_map vm; boost::program_options::positional_options_description p; - p.add("tagset", -1); + p.add("query", -1); try { boost::program_options::store( @@ -176,6 +180,11 @@ int main(int argc, char** argv) Parser parser(tagset); + if (!query.empty()) { + process_line(query, parser); + return 0; + } + if (clear_screen()) { // }