Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
4
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
b138dbb5
Commit
b138dbb5
authored
13 years ago
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
revamp plaintex writer: plain-er format
parent
62e5839d
Branches
Branches containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libcorpus2/io/plainwriter.cpp
+34
-9
34 additions, 9 deletions
libcorpus2/io/plainwriter.cpp
libcorpus2/io/plainwriter.h
+7
-0
7 additions, 0 deletions
libcorpus2/io/plainwriter.h
with
41 additions
and
9 deletions
libcorpus2/io/plainwriter.cpp
+
34
−
9
View file @
b138dbb5
...
@@ -20,38 +20,63 @@ or FITNESS FOR A PARTICULAR PURPOSE.
...
@@ -20,38 +20,63 @@ or FITNESS FOR A PARTICULAR PURPOSE.
namespace
Corpus2
{
namespace
Corpus2
{
bool
PlainWriter
::
registered
=
TokenWriter
::
register_writer
<
PlainWriter
>
(
bool
PlainWriter
::
registered
=
TokenWriter
::
register_writer
<
PlainWriter
>
(
"plain"
);
"plain"
,
"nows,no_disamb_info,disamb_only,ds"
);
PlainWriter
::
PlainWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
PlainWriter
::
PlainWriter
(
std
::
ostream
&
os
,
const
Tagset
&
tagset
,
const
string_range_vector
&
params
)
const
string_range_vector
&
params
)
:
TokenWriter
(
os
,
tagset
,
params
)
:
TokenWriter
(
os
,
tagset
,
params
),
ws_
(
true
),
disamb_
(
true
)
,
disamb_only_
(
false
)
{
{
foreach
(
const
string_range
&
param
,
params
)
{
std
::
string
p
=
boost
::
copy_range
<
std
::
string
>
(
param
);
if
(
p
==
"nows"
)
{
ws_
=
false
;
}
else
if
(
p
==
"no_disamb_info"
)
{
disamb_
=
false
;
}
else
if
(
p
==
"disamb_only"
)
{
disamb_only_
=
true
;
}
else
if
(
p
==
"ds"
)
{
disamb_
=
false
;
disamb_only_
=
true
;
}
}
}
}
void
PlainWriter
::
write_token
(
const
Token
&
t
)
void
PlainWriter
::
write_token
(
const
Token
&
t
)
{
{
os
()
<<
t
.
orth_utf8
()
<<
"
\n
"
;
os
()
<<
t
.
orth_utf8
();
if
(
ws_
)
{
os
()
<<
"
\t
"
<<
PwrNlp
::
Whitespace
::
to_string
(
t
.
wa
());
}
os
()
<<
"
\n
"
;
foreach
(
const
Lexeme
&
lex
,
t
.
lexemes
())
{
foreach
(
const
Lexeme
&
lex
,
t
.
lexemes
())
{
os
()
<<
"
\t
"
<<
lex
.
lemma_utf8
()
<<
"
\t
"
if
(
!
disamb_only_
||
lex
.
is_disamb
())
{
<<
tagset
().
tag_to_string
(
lex
.
tag
())
<<
"
\n
"
;
os
()
<<
"
\t
"
<<
lex
.
lemma_utf8
()
<<
"
\t
"
<<
tagset
().
tag_to_string
(
lex
.
tag
());
if
(
disamb_
)
{
if
(
lex
.
is_disamb
())
{
os
()
<<
"
\t
"
;
os
()
<<
"disamb"
;
}
}
os
()
<<
"
\n
"
;
}
}
}
}
}
void
PlainWriter
::
write_sentence
(
const
Sentence
&
s
)
void
PlainWriter
::
write_sentence
(
const
Sentence
&
s
)
{
{
os
()
<<
"[[[
\n
"
;
foreach
(
const
Token
*
t
,
s
.
tokens
())
{
foreach
(
const
Token
*
t
,
s
.
tokens
())
{
write_token
(
*
t
);
write_token
(
*
t
);
}
}
os
()
<<
"
]]]
\n
"
;
os
()
<<
"
\n
"
;
}
}
void
PlainWriter
::
write_chunk
(
const
Chunk
&
c
)
void
PlainWriter
::
write_chunk
(
const
Chunk
&
c
)
{
{
os
()
<<
"[[[<<<
\n\n
"
;
foreach
(
const
boost
::
shared_ptr
<
Sentence
>&
s
,
c
.
sentences
())
{
foreach
(
const
boost
::
shared_ptr
<
Sentence
>&
s
,
c
.
sentences
())
{
write_sentence
(
*
s
);
write_sentence
(
*
s
);
}
}
os
()
<<
"
>>>]]]
\n
\n
"
;
os
()
<<
"
\n
"
;
}
}
}
/* end ns Corpus2 */
}
/* end ns Corpus2 */
This diff is collapsed.
Click to expand it.
libcorpus2/io/plainwriter.h
+
7
−
0
View file @
b138dbb5
...
@@ -34,6 +34,13 @@ public:
...
@@ -34,6 +34,13 @@ public:
void
write_chunk
(
const
Chunk
&
c
);
void
write_chunk
(
const
Chunk
&
c
);
static
bool
registered
;
static
bool
registered
;
private:
bool
ws_
;
bool
disamb_
;
bool
disamb_only_
;
};
};
}
/* end ns Corpus2 */
}
/* end ns Corpus2 */
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment