Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
631c5f87
Commit
631c5f87
authored
14 years ago
by
Adam Wardynski
Browse files
Options
Downloads
Patches
Plain Diff
matching_categories(Tag) method for TSet.
parent
50444b26
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
libwccl/values/tset.cpp
+7
-0
7 additions, 0 deletions
libwccl/values/tset.cpp
libwccl/values/tset.h
+10
-0
10 additions, 0 deletions
libwccl/values/tset.h
tests/values.cpp
+21
-0
21 additions, 0 deletions
tests/values.cpp
with
38 additions
and
0 deletions
libwccl/values/tset.cpp
+
7
−
0
View file @
631c5f87
#include
<libwccl/values/tset.h>
#include
<libpwrutils/foreach.h>
#include
<libpwrutils/bitset.h>
#include
<sstream>
namespace
Wccl
{
...
...
@@ -34,6 +35,12 @@ int TSet::categories_count(const Corpus2::Tagset& tagset) const
return
cats
;
}
int
TSet
::
matching_categories
(
const
Corpus2
::
Tag
&
tag
)
const
{
const
Corpus2
::
Tag
&
masked
=
tag_
.
get_masked
(
tag
);
return
PwrNlp
::
count_bits_set
(
masked
.
get_pos
())
+
PwrNlp
::
count_bits_set
(
masked
.
get_values
());
}
void
TSet
::
insert_symbol
(
const
Corpus2
::
Tagset
&
tagset
,
const
std
::
string
&
s
)
{
tag_
.
combine_with
(
tagset
.
parse_symbol
(
s
));
...
...
This diff is collapsed.
Click to expand it.
libwccl/values/tset.h
+
10
−
0
View file @
631c5f87
...
...
@@ -89,6 +89,16 @@ public:
*/
int
categories_count
(
const
Corpus2
::
Tagset
&
tagset
)
const
;
/**
* @return How many categories present in the supplied tag match with
* this symbol set.
* @warning The underlying assumption is that the supplied tag has at most
* 1 value per category. Otherwise the value will be incorrect.
* @note The symbol set may have partially defined categories. Only values
* present in this symbol set count when matching values in the tag.
*/
int
matching_categories
(
const
Corpus2
::
Tag
&
tag
)
const
;
void
combine_with
(
const
Corpus2
::
Tag
&
other
)
{
tag_
.
combine_with
(
other
);
}
...
...
This diff is collapsed.
Click to expand it.
tests/values.cpp
+
21
−
0
View file @
631c5f87
...
...
@@ -68,40 +68,61 @@ BOOST_AUTO_TEST_CASE(tset_ops)
{
TSet
s1
,
s2
;
const
Corpus2
::
Tagset
&
tagset
=
Corpus2
::
get_named_tagset
(
"kipi"
);
Corpus2
::
Tag
subst_tag
=
tagset
.
parse_tag
(
"subst:sg:nom:f"
,
false
)[
0
];
Corpus2
::
Tag
adj_tag
=
tagset
.
parse_tag
(
"adj:pl:acc:m3:pos"
,
false
)[
0
];
BOOST_CHECK
(
s1
.
equals
(
s2
));
BOOST_CHECK
(
s1
.
is_subset_of
(
s2
));
BOOST_CHECK
(
s2
.
is_subset_of
(
s1
));
BOOST_CHECK
(
!
s1
.
intersects
(
s2
));
BOOST_CHECK_EQUAL
(
0
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
0
,
s1
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
0
,
s1
.
matching_categories
(
adj_tag
));
s1
.
insert_symbol
(
tagset
,
"subst"
);
BOOST_CHECK_EQUAL
(
1
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
1
,
s1
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
0
,
s1
.
matching_categories
(
adj_tag
));
BOOST_CHECK
(
!
s1
.
equals
(
s2
));
BOOST_CHECK
(
!
s1
.
is_subset_of
(
s2
));
BOOST_CHECK
(
s2
.
is_subset_of
(
s1
));
BOOST_CHECK
(
!
s1
.
intersects
(
s2
));
s2
.
insert_symbol
(
tagset
,
"pl"
);
BOOST_CHECK_EQUAL
(
1
,
s2
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
0
,
s2
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
1
,
s2
.
matching_categories
(
adj_tag
));
BOOST_CHECK
(
!
s1
.
equals
(
s2
));
BOOST_CHECK
(
!
s1
.
is_subset_of
(
s2
));
BOOST_CHECK
(
!
s2
.
is_subset_of
(
s1
));
BOOST_CHECK
(
!
s1
.
intersects
(
s2
));
s2
.
insert_symbol
(
tagset
,
"subst"
);
BOOST_CHECK_EQUAL
(
2
,
s2
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
1
,
s2
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
1
,
s2
.
matching_categories
(
adj_tag
));
BOOST_CHECK
(
!
s1
.
equals
(
s2
));
BOOST_CHECK
(
s1
.
is_subset_of
(
s2
));
BOOST_CHECK
(
!
s2
.
is_subset_of
(
s1
));
BOOST_CHECK
(
s1
.
intersects
(
s2
));
s1
.
insert_symbol
(
tagset
,
"pl"
);
BOOST_CHECK_EQUAL
(
2
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
1
,
s1
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
1
,
s1
.
matching_categories
(
adj_tag
));
BOOST_CHECK
(
s1
.
equals
(
s2
));
BOOST_CHECK
(
s1
.
is_subset_of
(
s2
));
BOOST_CHECK
(
s2
.
is_subset_of
(
s1
));
BOOST_CHECK
(
s1
.
intersects
(
s2
));
s1
.
insert_symbol
(
tagset
,
"sg"
);
BOOST_CHECK_EQUAL
(
2
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
2
,
s1
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
1
,
s1
.
matching_categories
(
adj_tag
));
s1
.
insert_symbol
(
tagset
,
"f"
);
BOOST_CHECK_EQUAL
(
3
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
3
,
s1
.
matching_categories
(
subst_tag
));
BOOST_CHECK_EQUAL
(
1
,
s1
.
matching_categories
(
adj_tag
));
s1
.
insert_symbol
(
tagset
,
"adj"
);
BOOST_CHECK_EQUAL
(
3
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
3
,
s1
.
categories_count
(
tagset
));
BOOST_CHECK_EQUAL
(
2
,
s1
.
matching_categories
(
adj_tag
));
}
BOOST_AUTO_TEST_CASE
(
position_ops
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment