Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WCCL
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
WCCL
Commits
f6a79879
Commit
f6a79879
authored
14 years ago
by
Adam Wardyński
Browse files
Options
Downloads
Patches
Plain Diff
Affix operator (returning prefixes or suffixes of given length)
parent
201e00b0
Branches
Branches containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
libwccl/CMakeLists.txt
+2
-1
2 additions, 1 deletion
libwccl/CMakeLists.txt
libwccl/ops/affix.cpp
+44
-0
44 additions, 0 deletions
libwccl/ops/affix.cpp
libwccl/ops/affix.h
+59
-0
59 additions, 0 deletions
libwccl/ops/affix.h
tests/strsetfunctions.cpp
+60
-0
60 additions, 0 deletions
tests/strsetfunctions.cpp
with
165 additions
and
1 deletion
libwccl/CMakeLists.txt
+
2
−
1
View file @
f6a79879
...
...
@@ -16,9 +16,10 @@ set(LIBS ${LIBS} ${Boost_LIBRARIES})
SET
(
libwccl_STAT_SRC
exception.cpp
ops/and.cpp
ops/affix.cpp
ops/formatters.cpp
ops/logicalpredicate.cpp
ops/nor.cpp
ops/nor.cpp
ops/or.cpp
ops/predicate.cpp
ops/tolower.cpp
...
...
This diff is collapsed.
Click to expand it.
libwccl/ops/affix.cpp
0 → 100644
+
44
−
0
View file @
f6a79879
#include
<libwccl/ops/affix.h>
#include
<sstream>
#include
<boost/foreach.hpp>
#define foreach BOOST_FOREACH
namespace
Wccl
{
std
::
string
Affix
::
to_string
(
const
Corpus2
::
Tagset
&
tagset
)
const
{
std
::
stringstream
str
;
str
<<
operator_name
(
tagset
)
<<
"("
<<
strset_expr_
->
to_string
(
tagset
)
<<
", "
<<
affix_length_
<<
")"
;
return
str
.
str
();
}
std
::
string
Affix
::
to_raw_string
()
const
{
std
::
stringstream
str
;
str
<<
raw_operator_name
()
<<
"("
<<
strset_expr_
->
to_raw_string
()
<<
", "
<<
affix_length_
<<
")"
;
return
str
.
str
();
}
Affix
::
BaseRetValPtr
Affix
::
apply_internal
(
const
SentenceContext
&
context
)
const
{
if
(
affix_length_
==
0
)
{
return
strset_expr_
->
apply
(
context
);
}
const
boost
::
shared_ptr
<
StrSet
>&
set
=
strset_expr_
->
apply
(
context
);
boost
::
shared_ptr
<
StrSet
>
a_set
=
boost
::
shared_ptr
<
StrSet
>
(
new
StrSet
());
if
(
affix_length_
<
0
)
{
foreach
(
const
UnicodeString
&
s
,
set
->
contents
())
{
a_set
->
insert
(
UnicodeString
(
s
).
remove
(
0
,
s
.
length
()
+
affix_length_
));
}
}
else
{
foreach
(
const
UnicodeString
&
s
,
set
->
contents
())
{
UnicodeString
prefixed
(
s
);
prefixed
.
truncate
(
affix_length_
);
a_set
->
insert
(
prefixed
);
}
}
return
a_set
;
}
}
/* end ns Wccl */
This diff is collapsed.
Click to expand it.
libwccl/ops/affix.h
0 → 100644
+
59
−
0
View file @
f6a79879
#ifndef LIBWCCL_OPS_AFFIX_H
#define LIBWCCL_OPS_AFFIX_H
#include
<boost/shared_ptr.hpp>
#include
<libwccl/values/strset.h>
#include
<libwccl/ops/functions.h>
namespace
Wccl
{
/**
* Operator that takes a set of strings and returns a new
* set with corresponding values that are prefixes or
* suffixes of given length
*/
class
Affix
:
public
Function
<
StrSet
>
{
public:
typedef
boost
::
shared_ptr
<
Function
<
StrSet
>
>
StrSetFunctionPtr
;
Affix
(
const
StrSetFunctionPtr
&
strset_expr
,
int
affix_length
)
:
strset_expr_
(
strset_expr
),
affix_length_
(
affix_length
)
{
BOOST_ASSERT
(
strset_expr_
);
}
/**
* String representation of the operator in form of:
* "affix(strset_expr_string)"
*/
virtual
std
::
string
to_string
(
const
Corpus2
::
Tagset
&
tagset
)
const
;
/**
* String representation of conditional operator in form of:
* "affix(strset_expr_raw_string)"
* This version does not require tagset, but may be inclomplete
* and/or contain internal info.
*/
virtual
std
::
string
to_raw_string
()
const
;
virtual
const
std
::
string
raw_operator_name
()
const
{
return
"affix"
;
}
protected
:
const
StrSetFunctionPtr
strset_expr_
;
const
int
affix_length_
;
typedef
FunctionBase
::
BaseRetValPtr
BaseRetValPtr
;
/**
* Get a string set from the argument expression and return copy of the set
* with all strings converted into prefixes or suffixes of given length
*/
virtual
BaseRetValPtr
apply_internal
(
const
SentenceContext
&
context
)
const
;
};
}
/* end ns Wccl */
#endif // LIBWCCL_OPS_AFFIX_H
This diff is collapsed.
Click to expand it.
tests/strsetfunctions.cpp
+
60
−
0
View file @
f6a79879
...
...
@@ -8,6 +8,7 @@
#include
<libwccl/sentencecontext.h>
#include
<libwccl/ops/tolower.h>
#include
<libwccl/ops/toupper.h>
#include
<libwccl/ops/affix.h>
#include
<libwccl/ops/constant.h>
using
namespace
Wccl
;
...
...
@@ -29,6 +30,7 @@ struct StrSetFix
strset
.
insert
(
"some1325numbers"
);
strset
.
insert
(
"ALLUPPER"
);
strset
.
insert
(
"kIdSpEeChLoL"
);
strset
.
insert
(
"short"
);
strset_expr
.
reset
(
new
Constant
<
StrSet
>
(
strset
));
}
...
...
@@ -49,12 +51,70 @@ BOOST_FIXTURE_TEST_CASE(lower, StrSetFix)
lowerset
.
insert
(
"some1325numbers"
);
lowerset
.
insert
(
"allupper"
);
lowerset
.
insert
(
"kidspeechlol"
);
lowerset
.
insert
(
"short"
);
ToLower
to_lower
(
strset_expr
);
BOOST_CHECK
(
lowerset
.
equals
(
*
to_lower
.
apply
(
sc
)));
}
BOOST_FIXTURE_TEST_CASE
(
upper
,
StrSetFix
)
{
StrSet
upperset
;
upperset
.
insert
(
"ALLLOWER"
);
upperset
.
insert
(
"FIRSTCAPITAL"
);
upperset
.
insert
(
"PASCALCASE"
);
upperset
.
insert
(
"CAMELCASE"
);
upperset
.
insert
(
"SOME1325NUMBERS"
);
upperset
.
insert
(
"ALLUPPER"
);
upperset
.
insert
(
"KIDSPEECHLOL"
);
upperset
.
insert
(
"SHORT"
);
ToUpper
to_upper
(
strset_expr
);
BOOST_CHECK
(
upperset
.
equals
(
*
to_upper
.
apply
(
sc
)));
}
BOOST_FIXTURE_TEST_CASE
(
prefix
,
StrSetFix
)
{
StrSet
prefixset
;
prefixset
.
insert
(
"alllowe"
);
prefixset
.
insert
(
"Firstca"
);
prefixset
.
insert
(
"PascalC"
);
prefixset
.
insert
(
"camelCa"
);
prefixset
.
insert
(
"some132"
);
prefixset
.
insert
(
"ALLUPPE"
);
prefixset
.
insert
(
"kIdSpEe"
);
prefixset
.
insert
(
"short"
);
Affix
prefix
(
strset_expr
,
7
);
BOOST_CHECK
(
prefixset
.
equals
(
*
prefix
.
apply
(
sc
)));
}
BOOST_FIXTURE_TEST_CASE
(
suffix
,
StrSetFix
)
{
StrSet
suffixset
;
suffixset
.
insert
(
"lllower"
);
suffixset
.
insert
(
"capital"
);
suffixset
.
insert
(
"calCase"
);
suffixset
.
insert
(
"melCase"
);
suffixset
.
insert
(
"numbers"
);
suffixset
.
insert
(
"LLUPPER"
);
suffixset
.
insert
(
"EeChLoL"
);
suffixset
.
insert
(
"short"
);
Affix
suffix
(
strset_expr
,
-
7
);
BOOST_CHECK
(
suffixset
.
equals
(
*
suffix
.
apply
(
sc
)));
}
BOOST_FIXTURE_TEST_CASE
(
affix_0
,
StrSetFix
)
{
Affix
affix_0
(
strset_expr
,
0
);
BOOST_CHECK
(
strset
.
equals
(
*
affix_0
.
apply
(
sc
)));
}
BOOST_FIXTURE_TEST_CASE
(
lower_locale
,
StrSetFix
)
{
//I'm not sure if I can guarantee this test will pass
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment