Skip to content
Snippets Groups Projects
Commit fad29978 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski
Browse files

Merge branch 'feat_remove_attr' into 'develop'

Remove attribute feature for token metadata

See merge request !19
parents 26b375b7 9a117791
Branches develop
No related tags found
1 merge request!19Remove attribute feature for token metadata
Pipeline #4566 passed
...@@ -40,6 +40,14 @@ public: ...@@ -40,6 +40,14 @@ public:
void set_attribute(const std::string& name, const std::string& value); void set_attribute(const std::string& name, const std::string& value);
/**
* Remove attribute having a given key. Returns whether removed
* (will return false if no attribute of the given key exists).
*/
bool remove_attribute(const std::string& key) {
return (attributes_.erase(key) > 0);
}
const attr_map_t& attributes() const { const attr_map_t& attributes() const {
return attributes_; return attributes_;
} }
......
...@@ -29,6 +29,7 @@ namespace Corpus2 { ...@@ -29,6 +29,7 @@ namespace Corpus2 {
bool has_attribute(const std::string& name) const; bool has_attribute(const std::string& name) const;
std::string get_attribute(const std::string& name) const; std::string get_attribute(const std::string& name) const;
void set_attribute(const std::string& name, const std::string& value); void set_attribute(const std::string& name, const std::string& value);
bool remove_attribute(const std::string& key);
typedef std::map<std::string, std::string> attr_map_t; typedef std::map<std::string, std::string> attr_map_t;
const attr_map_t& attributes() const; const attr_map_t& attributes() const;
......
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE chunkList SYSTEM "ccl.dtd">
<chunkList>
<chunk id="ch1">
<sentence id="s1">
<tok>
<orth>Kofeina</orth>
<lex disamb="1"><base>kofeina</base><ctag>subst:sg:nom:f</ctag></lex>
<prop key="e01">http://plwordnet.pwr.wroc.pl/wordnet/synset/15470</prop>
<prop key="sense:ukb:syns_id">15470</prop>
<prop key="sense:ukb:syns_rank">15470/30.0403943450</prop>
<prop key="sense:ukb:unitsstr">kofeina.1(24:sbst)</prop>
</tok>
</sentence>
</chunk>
</chunkList>
import cclutils as ccl
def remove_attribute(token, key):
"""
Remove attribute of a token.
Args:
token (Corpus2.token)
key (object): Attribute name, automatically casted to string.
Returns:
bool value informing about the status of the removal operation: `True` if
attribute has been removed. If token has no specified key or any
metadata at all, `False` will be returned.
"""
if token.has_metadata():
metadata = token.get_metadata()
return metadata.remove_attribute(key)
return False
test_doc = "data/ccl03.xml"
rm_prop = "e01"
doc = ccl.read(test_doc)
tok = [
t for p in doc.paragraphs() for s in p.sentences() for t in s.tokens()
].pop()
# initial check
assert ccl.get_attribute(tok, rm_prop) == "http://plwordnet.pwr.wroc.pl/wordnet/synset/15470"
# test
assert remove_attribute(tok, rm_prop) == True
assert remove_attribute(tok, "not-existing-prop") == False
assert ccl.get_attribute(tok, rm_prop, None) == None
print("All tests passed!")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment