Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
a7ae417f
Commit
a7ae417f
authored
11 years ago
by
Adam Radziszewski
Browse files
Options
Downloads
Patches
Plain Diff
new writer: line (simple chunk line drawings)
parent
dce1ad83
Branches
Branches containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
CMakeLists.txt
+1
-1
1 addition, 1 deletion
CMakeLists.txt
libcorpus2/CMakeLists.txt
+1
-0
1 addition, 0 deletions
libcorpus2/CMakeLists.txt
libcorpus2/io/linewriter.cpp
+118
-0
118 additions, 0 deletions
libcorpus2/io/linewriter.cpp
libcorpus2/io/linewriter.h
+44
-0
44 additions, 0 deletions
libcorpus2/io/linewriter.h
with
164 additions
and
1 deletion
CMakeLists.txt
+
1
−
1
View file @
a7ae417f
...
@@ -2,7 +2,7 @@ PROJECT(Corpus2Library)
...
@@ -2,7 +2,7 @@ PROJECT(Corpus2Library)
set
(
corpus2_ver_major
"1"
)
set
(
corpus2_ver_major
"1"
)
set
(
corpus2_ver_minor
"3"
)
set
(
corpus2_ver_minor
"3"
)
set
(
corpus2_ver_patch
"
3
"
)
set
(
corpus2_ver_patch
"
4
"
)
cmake_minimum_required
(
VERSION 2.8.0
)
cmake_minimum_required
(
VERSION 2.8.0
)
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/CMakeLists.txt
+
1
−
0
View file @
a7ae417f
...
@@ -58,6 +58,7 @@ SET(libcorpus2_STAT_SRC
...
@@ -58,6 +58,7 @@ SET(libcorpus2_STAT_SRC
io/helpers.cpp
io/helpers.cpp
io/fastxces.cpp
io/fastxces.cpp
io/iob-chan.cpp
io/iob-chan.cpp
io/linewriter.cpp
io/nonewriter.cpp
io/nonewriter.cpp
io/orthwriter.cpp
io/orthwriter.cpp
io/pathwriter.cpp
io/pathwriter.cpp
...
...
This diff is collapsed.
Click to expand it.
libcorpus2/io/linewriter.cpp
0 → 100644
+
118
−
0
View file @
a7ae417f
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
*/
#include
<libcorpus2/io/linewriter.h>
#include
<libcorpus2/io/linewriter.h>
#include
<libcorpus2/ann/annotatedsentence.h>
#include
<iomanip>
#include
<boost/foreach.hpp>
namespace
Corpus2
{
bool
LineWriter
::
registered
=
TokenWriter
::
register_writer
<
LineWriter
>
(
"line"
);
LineWriter
::
LineWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
string_range_vector
&
params
)
:
TokenWriter
(
os
,
tagset
,
params
)
{
}
void
LineWriter
::
write_token
(
const
Token
&
t
)
{
os
()
<<
t
.
orth_utf8
();
}
void
LineWriter
::
write_sentence
(
const
Sentence
&
s
)
{
const
std
::
string
tok_name
(
"Tokens"
);
const
std
::
string
empty_char
(
" "
);
const
std
::
string
chunk_char
(
"
\xe2\x94\x80
"
);
//const std::string head_char("\xe2\x94\x81"); // hardcoded utf-8
const
std
::
string
head_char
(
"
\xe2\x95\x90
"
);
// hardcoded utf-8
const
AnnotatedSentence
*
as
=
dynamic_cast
<
const
AnnotatedSentence
*>
(
&
s
);
// get longest channel name for padding
int
name_padding
=
tok_name
.
length
();
if
(
as
)
{
BOOST_FOREACH
(
const
AnnotatedSentence
::
chan_map_t
::
value_type
&
vt
,
as
->
all_channels
())
{
const
int
that_len
=
vt
.
first
.
length
();
if
(
that_len
>
name_padding
)
{
name_padding
=
that_len
;
}
}
}
// dump token orths and remember orth lengths
std
::
vector
<
int
>
orth_lens
;
os
()
<<
std
::
left
<<
std
::
setw
(
name_padding
)
<<
tok_name
;
BOOST_FOREACH
(
const
Token
*
t
,
s
.
tokens
())
{
os
()
<<
" "
<<
t
->
orth_utf8
();
orth_lens
.
push_back
(
t
->
orth
().
length
());
}
os
()
<<
"
\n
"
;
// dump channel line representations
if
(
as
)
{
AnnotatedSentence
*
hax
=
const_cast
<
AnnotatedSentence
*>
(
as
);
// sorry
BOOST_FOREACH
(
const
AnnotatedSentence
::
chan_map_t
::
value_type
&
vt
,
hax
->
all_channels
())
{
os
()
<<
std
::
left
<<
std
::
setw
(
name_padding
)
<<
vt
.
first
;
// use IOB2 representation internally
AnnotationChannel
&
chan
=
hax
->
get_channel
(
vt
.
first
);
chan
.
make_iob_from_segments
();
IOB
::
Enum
last_tag
=
IOB
::
O
;
// write line representation
for
(
int
idx
=
0
;
idx
<
chan
.
size
();
idx
++
)
{
IOB
::
Enum
this_tag
=
chan
.
get_iob_at
(
idx
);
if
(
last_tag
==
IOB
::
O
||
this_tag
==
IOB
::
O
)
{
os
()
<<
" "
;
}
else
{
os
()
<<
chunk_char
;
}
last_tag
=
chan
.
get_iob_at
(
idx
);
std
::
string
now
(
" "
);
if
(
last_tag
!=
IOB
::
O
)
{
if
(
chan
.
is_head_at
(
idx
))
{
now
=
head_char
;
}
else
{
now
=
chunk_char
;
}
}
for
(
int
line_pos
=
orth_lens
[
idx
];
line_pos
>
0
;
line_pos
--
)
{
os
()
<<
now
;
}
last_tag
=
this_tag
;
}
os
()
<<
"
\n
"
;
}
}
os
()
<<
"
\n
"
;
}
void
LineWriter
::
write_chunk
(
const
Chunk
&
c
)
{
BOOST_FOREACH
(
const
Sentence
::
Ptr
s
,
c
.
sentences
())
{
write_sentence
(
*
s
);
}
os
()
<<
"
\n
"
;
}
}
/* end ns Corpus2 */
This diff is collapsed.
Click to expand it.
libcorpus2/io/linewriter.h
0 → 100644
+
44
−
0
View file @
a7ae417f
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
*/
#ifndef LIBCORPUS2_IO_LINEWRITER_H
#define LIBCORPUS2_IO_LINEWRITER_H
#include
<libcorpus2/io/writer.h>
namespace
Corpus2
{
/**
* A writer that produces simple text (UTF-8) line drawing representation
* of syntactic annotation in channels. */
class
LineWriter
:
public
TokenWriter
{
public:
LineWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
string_range_vector
&
params
);
void
write_token
(
const
Token
&
t
);
void
write_sentence
(
const
Sentence
&
t
);
void
write_chunk
(
const
Chunk
&
c
);
static
bool
registered
;
};
}
/* end ns Corpus2 */
#endif // LIBCORPUS2_IO_STATWRITER_H
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment