Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
9d901818
Commit
9d901818
authored
14 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
add a simple premorph writer io class
parent
07d6f1b8
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
libcorpus2/CMakeLists.txt
+1
-0
1 addition, 0 deletions
libcorpus2/CMakeLists.txt
libcorpus2/io/premorphwriter.cpp
+107
-0
107 additions, 0 deletions
libcorpus2/io/premorphwriter.cpp
libcorpus2/io/premorphwriter.h
+56
-0
56 additions, 0 deletions
libcorpus2/io/premorphwriter.h
with
164 additions
and
0 deletions
libcorpus2/CMakeLists.txt
+
1
−
0
View file @
9d901818
...
...
@@ -52,6 +52,7 @@ SET(libcorpus2_STAT_SRC
token.cpp
io/orthwriter.cpp
io/plainwriter.cpp
io/premorphwriter.cpp
io/reader.cpp
io/rft.cpp
io/sax.cpp
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/io/premorphwriter.cpp
0 → 100644
+
107
−
0
View file @
9d901818
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#include
<libcorpus2/io/premorphwriter.h>
#include
<libpwrutils/foreach.h>
namespace
Corpus2
{
bool
PremorphWriter
::
registered
=
TokenWriter
::
register_writer
<
PremorphWriter
>
(
"premorph"
,
"chunk"
);
PremorphWriter
::
PremorphWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
string_range_vector
&
params
)
:
TokenWriter
(
os
,
tagset
,
params
),
cid_
(
0
),
force_chunk_
(
false
)
{
foreach
(
const
string_range
&
param
,
params
)
{
std
::
string
p
=
boost
::
copy_range
<
std
::
string
>
(
param
);
if
(
p
==
"chunk"
)
{
force_chunk_
=
true
;
}
}
do_header
();
}
PremorphWriter
::~
PremorphWriter
()
{
finish
();
}
void
PremorphWriter
::
write_token
(
const
Token
&
t
)
{
os
()
<<
PwrNlp
::
Whitespace
::
to_whitespace
(
t
.
wa
())
<<
t
.
orth_utf8
();
}
void
PremorphWriter
::
write_sentence
(
const
Sentence
&
s
)
{
os
()
<<
"<chunk type=
\"
s
\"
>"
;
if
(
!
s
.
tokens
().
empty
())
{
os
()
<<
s
[
0
]
->
orth_utf8
();
}
for
(
size_t
i
=
1
;
i
<
s
.
tokens
().
size
();
++
i
)
{
write_token
(
*
s
[
i
]);
}
os
()
<<
"</chunk>
\n
"
;
}
void
PremorphWriter
::
write_chunk
(
const
Chunk
&
c
)
{
paragraph_head
(
c
);
foreach
(
const
Sentence
*
s
,
c
.
sentences
())
{
write_sentence
(
*
s
);
}
os
()
<<
"</chunk>
\n
"
;
}
void
PremorphWriter
::
do_header
()
{
os
()
<<
"<?xml version=
\"
1.0
\"
encoding=
\"
UTF-8
\"
?>
\n
"
;
os
()
<<
"<!DOCTYPE cesAna SYSTEM
\"
xcesAnaIPI.dtd
\"
>
\n
"
;
os
()
<<
"<cesAna"
;
os
()
<<
" xmlns:xlink=
\"
http://www.w3.org/1999/xlink
\"
"
;
os
()
<<
" version=
\"
1.0
\"
type=
\"
premorph
\"
>
\n
"
;
os
()
<<
"<chunkList>
\n
"
;
if
(
force_chunk_
)
{
paragraph_head
();
}
}
void
PremorphWriter
::
do_footer
()
{
if
(
force_chunk_
)
{
os
()
<<
"</chunk>
\n
"
;
}
os
()
<<
"</chunkList>
\n
"
;
os
()
<<
"</cesAna>
\n
"
;
}
void
PremorphWriter
::
paragraph_head
()
{
os
()
<<
"<chunk id=
\"
ch"
<<
++
cid_
<<
"
\"
"
<<
" type=
\"
p
\"
>
\n
"
;
}
void
PremorphWriter
::
paragraph_head
(
const
Chunk
&
c
)
{
os
()
<<
"<chunk"
;
foreach
(
const
Chunk
::
attr_map_t
::
value_type
&
v
,
c
.
attributes
())
{
os
()
<<
" "
<<
v
.
first
<<
"=
\"
"
<<
v
.
second
<<
"
\"
"
;
}
os
()
<<
">
\n
"
;
}
}
/* end ns Corpus2 */
This diff is collapsed.
Click to expand it.
libcorpus2/io/premorphwriter.h
0 → 100644
+
56
−
0
View file @
9d901818
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_IO_PREMORPHWRITER_H
#define LIBCORPUS2_IO_PREMORPHWRITER_H
#include
<libcorpus2/io/writer.h>
namespace
Corpus2
{
class
PremorphWriter
:
public
TokenWriter
{
public:
PremorphWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
string_range_vector
&
params
);
~
PremorphWriter
();
void
write_token
(
const
Token
&
t
);
void
write_sentence
(
const
Sentence
&
s
);
void
write_chunk
(
const
Chunk
&
c
);
protected:
void
do_header
();
void
do_footer
();
void
paragraph_head
();
void
paragraph_head
(
const
Chunk
&
c
);
int
cid_
;
bool
force_chunk_
;
static
bool
registered
;
};
}
/* end ns Corpus2 */
#endif // LIBCORPUS2_IO_PREMORPHWRITER_H
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment