Skip to content
Snippets Groups Projects
Commit 30a04d67 authored by Pawel Orlowicz's avatar Pawel Orlowicz
Browse files

added: lexicons can have duplicated keys

parent 6773b6fd
Branches
No related merge requests found
......@@ -25,23 +25,30 @@ or FITNESS FOR A PARTICULAR PURPOSE.
namespace Wccl {
const UnicodeString& Lexicon::translate(const UnicodeString &key) const
boost::shared_ptr<StrSet> Lexicon::translate(const UnicodeString &key) const
{
static UnicodeString empty;
boost::shared_ptr<StrSet> ret_set = boost::make_shared<StrSet>();
map_t::const_iterator i = map_.find(key);
if (i == map_.end()) {
return empty;
return ret_set;
}
BOOST_FOREACH (const UnicodeString& s, i->second){
ret_set->insert(s);
}
return i->second;
return ret_set;
}
boost::shared_ptr<StrSet> Lexicon::translate(const StrSet& set) const
{
boost::shared_ptr<StrSet> ret_set = boost::make_shared<StrSet>();
BOOST_FOREACH (const UnicodeString& s, set.get_value()) {
const UnicodeString& v = translate(s);
if (!v.isEmpty()) {
ret_set->insert(v);
boost::shared_ptr<StrSet> v = translate(s);
if (!v->empty()) {
BOOST_FOREACH(const UnicodeString& s, v->contents()){
ret_set->insert(s);
}
}
}
return ret_set;
......@@ -49,16 +56,13 @@ boost::shared_ptr<StrSet> Lexicon::translate(const StrSet& set) const
void Lexicon::insert(const UnicodeString& key, const UnicodeString& value)
{
if (has_key(key)) {
throw InvalidArgument("key", "Duplicated key in lexicon: " + PwrNlp::to_utf8(key));
}
if (key.isEmpty()) {
throw InvalidArgument("key", "Empty key string in lexicon.");
}
if (value.isEmpty()) {
throw InvalidArgument("value", "Empty value string in lexicon.");
}
map_[key] = value;
map_[key].push_back(value);
}
} /* end ns Wccl */
......@@ -29,7 +29,7 @@ namespace Wccl {
class Lexicon : boost::noncopyable
{
public:
typedef boost::unordered_map<UnicodeString, UnicodeString> map_t;
typedef boost::unordered_map<UnicodeString, std::vector<UnicodeString> > map_t;
Lexicon(const std::string& name, const std::string& file_name)
: name_(name),
......@@ -43,7 +43,7 @@ public:
* @returns Value assigned to the given key, if present.
* Empty UnicodeString if the key was not present.
*/
const UnicodeString& translate(const UnicodeString& key) const;
boost::shared_ptr<StrSet> translate(const UnicodeString& key) const;
/**
......
......@@ -32,7 +32,7 @@ namespace Wccl {
/* --------------------------------------------------------------------- */
const UnicodeString& translate(const UnicodeString& key) const;
boost::shared_ptr<StrSet> translate(const UnicodeString& key) const;
// boost::shared_ptr<StrSet> translate(const StrSet& set) const;
// TODO
......
by part
och interj
ach interj
dla prep
bez prep
z prep
dziś adv
wczoraj adv
by part2
uważać verb1
import("indecl.lex", "indecl") // import file as "indecl"
@"indecl" ( // gets the label from the lexicon
lex(base[0], "indecl");
lex("dziś", "indecl");
lex("by", "indecl");
lex("marchewka", "indecl");
lex(["by", "kot", "marchewka"], "indecl");
lex(["by", "dla"], "indecl");
lex(["by", "kot", "marchewka", "wczoraj"], "indecl")
)
wholeWccl=indecl.wccl
sentence=t01.xml
tagset=nkjp
---
[]
["verb1"]
---
[]
["adv"]
---
[]
["part", "part2"]
---
[]
[]
---
[]
["part", "part2"]
---
[]
["part", "part2", "prep"]
---
[]
["part", "part2", "adv"]
---
......@@ -28,6 +28,8 @@ or FITNESS FOR A PARTICULAR PURPOSE.
#include <libwccl/parser/Parser.h>
#include <libwccl/ops/funexeccontext.h>
#include <libwccl/wcclfile.h>
#include <fstream>
#include <boost/filesystem/fstream.hpp>
#include <iostream>
......@@ -66,6 +68,8 @@ void test_one_item_actual(const compare_test& c)
std::string separators = "=";
std::string tagset_name = "kipi";
std::string sentence_filename;
std::string wholeWccl;
boost::shared_ptr<Wccl::WcclFile> wcclFile;
std::string line;
int line_no = 0;
while (ifs_in.good() && line != "---") {
......@@ -78,6 +82,8 @@ void test_one_item_actual(const compare_test& c)
tagset_name = fields[1];
} else if (fields[0] == "sentence") {
sentence_filename = fields[1];
} else if (fields[0] == "wholeWccl") {
wholeWccl = fields[1];
}
}
}
......@@ -103,6 +109,7 @@ void test_one_item_actual(const compare_test& c)
std::string operator_string, expected_output;
int lexCounter = 0;
while (ifs_in.good()) {
++line_no;
std::getline(ifs_in, line);
......@@ -113,7 +120,20 @@ void test_one_item_actual(const compare_test& c)
Wccl::Parser parser(tagset);
boost::shared_ptr<Wccl::FunctionalOperator> parsed;
try {
parsed = parser.parseAnyOperator(operator_string);
if(wholeWccl.size() == 0){
parsed = parser.parseAnyOperator(operator_string);
}else{
path wholeWcclPath = c.search_path / wholeWccl;
wcclFile = parser.parseWcclFileFromPath(wholeWcclPath.string(), c.search_path.string());
parsed = wcclFile->get_untyped_op_ptr("indecl", lexCounter);
operator_string = "operator number ";
char lexCounterStr[10];
sprintf(lexCounterStr, "%d", lexCounter);
operator_string += lexCounterStr;
operator_string += " defined in file: ";
operator_string += wholeWcclPath.string();
lexCounter++;
}
} catch (Wccl::WcclError& e) {
std::cerr << e.info() << "\n---\n" << operator_string << "\n---\n";
throw;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment