Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
37fa00cb
Commit
37fa00cb
authored
14 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
simple io test to make sure slight xces writer refactoring does not break things
parent
3722ddc3
Branches
Branches containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
libcorpus2/io/xcescommon.cpp
+17
-4
17 additions, 4 deletions
libcorpus2/io/xcescommon.cpp
libcorpus2/io/xcescommon.h
+6
-0
6 additions, 0 deletions
libcorpus2/io/xcescommon.h
tests/CMakeLists.txt
+1
-0
1 addition, 0 deletions
tests/CMakeLists.txt
tests/io.cpp
+74
-0
74 additions, 0 deletions
tests/io.cpp
with
98 additions
and
4 deletions
libcorpus2/io/xcescommon.cpp
+
17
−
4
View file @
37fa00cb
...
...
@@ -46,9 +46,8 @@ namespace {
}
}
void
token_as_xces_xml
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
Token
&
t
,
int
indent
,
bool
output_disamb
/* = false */
,
bool
sort
/* = false */
,
bool
whitespace_info
/* false */
)
void
token_as_xces_xml_head
(
std
::
ostream
&
os
,
const
Token
&
t
,
int
indent
,
bool
whitespace_info
/* false */
)
{
if
(
t
.
wa
()
==
PwrNlp
::
Whitespace
::
None
)
{
osi
(
os
,
indent
)
<<
"<ns/>
\n
"
;
...
...
@@ -59,7 +58,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
osi
(
os
,
indent
)
<<
"<tok ws=
\"
"
<<
PwrNlp
::
Whitespace
::
to_string
(
t
.
wa
())
<<
"
\"
>
\n
"
;
}
++
indent
;
}
void
token_as_xces_xml_body
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
Token
&
t
,
int
indent
,
bool
output_disamb
/* = false */
,
bool
sort
/* = false */
)
{
osi
(
os
,
indent
)
<<
"<orth>"
;
encode_xml_entities_into
(
os
,
t
.
orth_utf8
());
os
<<
"</orth>
\n
"
;
...
...
@@ -80,6 +84,15 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
os
<<
s
;
}
}
}
void
token_as_xces_xml
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
Token
&
t
,
int
indent
,
bool
output_disamb
/* = false */
,
bool
sort
/* = false */
,
bool
whitespace_info
/* false */
)
{
token_as_xces_xml_head
(
os
,
t
,
indent
,
whitespace_info
);
++
indent
;
token_as_xces_xml_body
(
os
,
tagset
,
t
,
indent
,
output_disamb
,
sort
);
--
indent
;
osi
(
os
,
indent
)
<<
"</tok>
\n
"
;
}
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/io/xcescommon.h
+
6
−
0
View file @
37fa00cb
...
...
@@ -29,6 +29,12 @@ void token_as_xces_xml(std::ostream& os, const Tagset& tagset,
const
Token
&
t
,
int
indent
,
bool
output_disamb
=
false
,
bool
sort
=
false
,
bool
whitespace_info
=
false
);
void
token_as_xces_xml_head
(
std
::
ostream
&
os
,
const
Token
&
t
,
int
indent
,
bool
whitespace_info
/* false */
);
void
token_as_xces_xml_body
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
Token
&
t
,
int
indent
,
bool
output_disamb
/* = false */
,
bool
sort
/* = false */
);
/**
* Output a xml-encoded version of the given string into the given ostream.
* The default XML entity substitutions are made: less than, greater than,
...
...
This diff is collapsed.
Click to expand it.
tests/CMakeLists.txt
+
1
−
0
View file @
37fa00cb
...
...
@@ -8,6 +8,7 @@ add_executable( tests
main.cpp
ann_basic.cpp
basic.cpp
io.cpp
tag_split.cpp
tagset_parse.cpp
)
...
...
This diff is collapsed.
Click to expand it.
tests/io.cpp
0 → 100644
+
74
−
0
View file @
37fa00cb
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include
<boost/test/unit_test.hpp>
#include
<set>
#include
<libpwrutils/foreach.h>
#include
<libpwrutils/bitset.h>
#include
<libcorpus2/tagsetmanager.h>
#include
<libcorpus2/io/xcesreader.h>
#include
<libcorpus2/io/writer.h>
namespace
{
static
char
swiatopoglad
[]
=
"<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
?>
\n
"
"<!DOCTYPE cesAna SYSTEM
\"
xcesAnaIPI.dtd
\"
>
\n
"
"<cesAna xmlns:xlink=
\"
http://www.w3.org/1999/xlink
\"
version=
\"
1.0
\"
type=
\"
lex disamb
\"
>
\n
"
"<chunkList>
\n
"
"<chunk id=
\"
ch51
\"
type=
\"
tok
\"
>
\n
"
"<chunk type=
\"
s
\"
>
\n
"
"<tok>
\n
"
"<orth>Uważam</orth>
\n
"
"<lex disamb=
\"
1
\"
><base>uważać</base><ctag>fin:sg:pri:imperf</ctag></lex>
\n
"
"</tok>
\n
"
"<ns/>
\n
"
"<tok>
\n
"
"<orth>,</orth>
\n
"
"<lex disamb=
\"
1
\"
><base>,</base><ctag>interp</ctag></lex>
\n
"
"</tok>
\n
"
"<tok>
\n
"
"<orth>że</orth>
\n
"
"<lex disamb=
\"
1
\"
><base>że</base><ctag>conj</ctag></lex>
\n
"
"</tok>
\n
"
"<tok>
\n
"
"<orth>światopogląd</orth>
\n
"
"<lex><base>światopogląd</base><ctag>subst:sg:acc:m3</ctag></lex>
\n
"
"<lex disamb=
\"
1
\"
><base>światopogląd</base><ctag>subst:sg:nom:m3</ctag></lex>
\n
"
"</tok>
\n
"
"</chunk>
\n
"
"</chunk>
\n
"
"</chunkList>
\n
"
"</cesAna>
\n
"
;
}
BOOST_AUTO_TEST_SUITE
(
io
)
BOOST_AUTO_TEST_CASE
(
iobase
)
{
const
Corpus2
::
Tagset
&
tagset
=
Corpus2
::
get_named_tagset
(
"kipi"
);
std
::
stringstream
ssin
;
ssin
<<
swiatopoglad
;
Corpus2
::
XcesReader
xr
(
tagset
,
ssin
);
boost
::
shared_ptr
<
Corpus2
::
Chunk
>
chunk
=
xr
.
get_next_chunk
();
std
::
stringstream
ss
;
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
w
(
Corpus2
::
TokenWriter
::
create
(
"xces,flat"
,
ss
,
tagset
));
w
->
write_chunk
(
*
chunk
);
w
->
finish
();
BOOST_CHECK_EQUAL
(
ss
.
str
(),
swiatopoglad
);
}
BOOST_AUTO_TEST_SUITE_END
();
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment