Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
corpus2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Analysers
corpus2
Commits
5082e27d
Commit
5082e27d
authored
Jun 1, 2011
by
ilor
Browse files
Options
Downloads
Patches
Plain Diff
simple corpus2 pqlib wrapper and test executable, wip
parent
6aed7643
No related branches found
No related tags found
No related merge requests found
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
poliqarp/CMakeLists.txt
+7
-1
7 additions, 1 deletion
poliqarp/CMakeLists.txt
poliqarp/c2pqtest.cpp
+21
-0
21 additions, 0 deletions
poliqarp/c2pqtest.cpp
poliqarp/pqclient.cpp
+215
-0
215 additions, 0 deletions
poliqarp/pqclient.cpp
poliqarp/pqclient.h
+52
-5
52 additions, 5 deletions
poliqarp/pqclient.h
with
295 additions
and
6 deletions
poliqarp/CMakeLists.txt
+
7
−
1
View file @
5082e27d
PROJECT
(
Corpus2Poliqarp
)
cmake_minimum_required
(
VERSION 2.8.0
)
include_directories
(
"/usr/local/include/sakura/"
)
include_directories
(
${
PoliqarpLibrary_SOURCE_DIR
}
/sakura
)
include_directories
(
${
PoliqarpLibrary_SOURCE_DIR
}
)
include_directories
(
${
PoliqarpLibrary_BINARY_DIR
}
/sakura
)
include_directories
(
${
PoliqarpLibrary_BINARY_DIR
}
)
add_library
(
corpus2poliqarp SHARED pqclient.cpp
)
add_executable
(
c2pqtest c2pqtest.cpp
)
target_link_libraries
(
corpus2poliqarp libpoliqarp corpus2
)
target_link_libraries
(
c2pqtest libpoliqarp corpus2poliqarp corpus2 pwrutils
)
This diff is collapsed.
Click to expand it.
poliqarp/c2pqtest.cpp
0 → 100644
+
21
−
0
View file @
5082e27d
#include
"pqclient.h"
#include
<libcorpus2/tagsetmanager.h>
#include
<iostream>
#include
<libcorpus2/io/writer.h>
int
main
(
int
argc
,
char
**
argv
)
{
std
::
cerr
<<
"C2PQTEST
\n
"
;
if
(
argc
<
3
)
return
2
;
const
Corpus2
::
Tagset
&
tagset
=
Corpus2
::
get_named_tagset
(
"kipi"
);
std
::
cerr
<<
argv
[
1
]
<<
"
\n
"
;
std
::
cerr
<<
argv
[
2
]
<<
"
\n
"
;
Corpus2
::
PoliqarpClient
pqc
(
tagset
,
argv
[
1
]);
pqc
.
compile_query
(
argv
[
2
]);
pqc
.
execute_query
();
boost
::
shared_ptr
<
Corpus2
::
TokenWriter
>
writer
;
writer
=
Corpus2
::
TokenWriter
::
create_stream_writer
(
"plain"
,
std
::
cout
,
tagset
);
while
(
Corpus2
::
Token
*
t
=
pqc
.
get_next_focus_token
())
{
writer
->
write_token_dispose
(
t
);
}
}
This diff is collapsed.
Click to expand it.
poliqarp/pqclient.cpp
+
215
−
0
View file @
5082e27d
#include
"pqclient.h"
#include
<boost/make_shared.hpp>
extern
"C"
{
void
async_notify_new_results
(
void
*
session
)
{
}
}
namespace
Corpus2
{
PoliqarpClient
::
PoliqarpClient
(
const
Tagset
&
tagset
,
const
std
::
string
path
)
:
tagset_
(
tagset
)
{
query_compiled_
=
false
;
poliqarp_error
error
=
poliqarp_error_none
;
if
(
poliqarp_create
(
""
,
&
error
)
!=
0
)
{
throw
Corpus2Error
(
poliqarp_error_message_get
(
&
error
));
}
progress_init
(
&
progress_
);
count_so_far_
=
0
;
err_
=
0
;
if
(
poliqarp_open_corpus
(
&
corpus_
,
path
.
c_str
(),
&
progress_
,
&
error
)
==
-
1
)
{
throw
Corpus2Error
(
poliqarp_error_message_get
(
&
error
));
}
else
{
poliqarp_create_match_buffer
(
&
buffer_
,
1000
);
}
poliqarp_corpus_info
cinfo
;
poliqarp_get_corpus_info
(
&
corpus_
,
&
cinfo
);
corpus_size_
=
cinfo
.
num_segments
;
curr_chunk_doc_id_
=
0
;
}
PoliqarpClient
::~
PoliqarpClient
()
{
poliqarp_close_corpus
(
&
corpus_
);
poliqarp_destroy_match_buffer
(
&
buffer_
);
if
(
query_compiled_
)
{
poliqarp_destroy_query
(
&
query_
);
query_compiled_
=
false
;
}
poliqarp_destroy
();
};
void
PoliqarpClient
::
compile_query
(
const
std
::
string
&
q
)
{
count_so_far_
=
0
;
last_query_
=
q
;
if
(
query_compiled_
)
{
poliqarp_destroy_query
(
&
query_
);
query_compiled_
=
false
;
}
poliqarp_error
error
=
poliqarp_error_none
;
if
(
q
.
empty
())
{
throw
Corpus2Error
(
"EmptyQuery"
);
}
else
if
(
poliqarp_create_query
(
&
query_
,
q
.
c_str
(),
&
corpus_
,
0
,
NULL
,
NULL
,
&
error
)
==
-
1
)
{
throw
Corpus2Error
(
std
::
string
(
"QueryFailed: "
)
+
poliqarp_error_message_get
(
&
error
));
}
else
{
query_compiled_
=
true
;
}
}
void
PoliqarpClient
::
reset_query
()
{
compile_query
(
last_query_
);
}
void
PoliqarpClient
::
execute_query
()
{
if
(
query_compiled_
)
{
poliqarp_forget
(
&
buffer_
);
if
(
poliqarp_produce
(
&
buffer_
,
1000
,
&
query_
,
&
progress_
,
NULL
,
0
,
1000
))
{
throw
Corpus2Error
(
"query execution error"
);
}
if
(
poliqarp_get_match_buffer_info
(
&
buffer_
,
&
info_
))
{
throw
Corpus2Error
(
"buffer read error"
);
}
count_so_far_
+=
buffer_
.
used
;
buffer_pos_
=
0
;
}
else
{
throw
Corpus2Error
(
"Query not compiled"
);
}
}
bool
PoliqarpClient
::
next_match
(
poliqarp_match
&
match
)
{
if
(
info_
.
used
>
0
)
{
if
(
buffer_pos_
<
info_
.
used
)
{
poliqarp_get_match
(
&
buffer_
,
&
match
,
buffer_pos_
++
);
return
true
;
}
else
if
(
info_
.
used
==
buffer_
.
capacity
)
{
poliqarp_forget
(
&
buffer_
);
execute_query
();
if
(
info_
.
used
>
0
)
{
poliqarp_get_match
(
&
buffer_
,
&
match
,
buffer_pos_
++
);
return
true
;
}
}
}
return
false
;
}
Token
*
PoliqarpClient
::
get_next_focus_token
()
{
poliqarp_match
match
;
if
(
next_match
(
match
))
{
return
get_token
(
match
.
focus
);
}
else
{
return
NULL
;
}
};
Sentence
::
Ptr
PoliqarpClient
::
get_next_match_sequence
()
{
poliqarp_match
match
;
if
(
next_match
(
match
))
{
return
get_token_range
(
match
.
start
,
match
.
end
);
}
else
{
return
Sentence
::
Ptr
();
}
}
Token
*
PoliqarpClient
::
get_token
(
size_t
pos
)
{
poliqarp_segment
segment
;
poliqarp_segment_info
info
;
poliqarp_interpretation_set
set
;
poliqarp_interpretation_set_info
sinfo
;
poliqarp_get_segment
(
&
segment
,
&
corpus_
,
pos
);
poliqarp_get_segment_info
(
&
segment
,
&
info
);
poliqarp_get_disambiguated_interpretations
(
&
segment
,
&
set
);
poliqarp_get_interpretation_set_info
(
&
set
,
&
sinfo
);
std
::
auto_ptr
<
Token
>
res
(
new
Token
());
if
(
!
info
.
space_before
)
{
res
->
set_wa
(
PwrNlp
::
Whitespace
::
Space
);
}
res
->
set_orth_utf8
(
info
.
text
);
for
(
size_t
i
=
0
;
i
<
sinfo
.
size
;
i
++
)
{
poliqarp_interpretation
interp
;
poliqarp_interpretation_info
iinfo
;
poliqarp_get_interpretation
(
&
set
,
&
interp
,
i
);
poliqarp_get_interpretation_info
(
&
interp
,
&
iinfo
);
Tag
tag
=
tagset_
.
parse_simple_tag
(
iinfo
.
tag
);
res
->
add_lexeme
(
Lexeme
(
UnicodeString
::
fromUTF8
(
iinfo
.
base
),
tag
));
}
return
res
.
release
();
}
Sentence
::
Ptr
PoliqarpClient
::
get_next_sequence
(
bool
whole_sentence
)
{
Sentence
::
Ptr
sentence
;
if
(
info_
.
used
>
0
)
{
if
(
buffer_pos_
<
info_
.
used
)
{
struct
poliqarp_match
poli_match
;
poliqarp_get_match
(
&
buffer_
,
&
poli_match
,
buffer_pos_
++
);
curr_chunk_doc_id_
=
poli_match
.
document
;
if
(
whole_sentence
)
{
//sentence = get_token_range(poli_match.withinStart, poli_match.withinEnd);
}
else
{
sentence
=
get_token_range
(
poli_match
.
start
,
poli_match
.
end
);
}
}
else
{
execute_query
();
sentence
=
get_next_sequence
(
whole_sentence
);
}
}
return
sentence
;
}
Sentence
::
Ptr
PoliqarpClient
::
get_token_range
(
size_t
from
,
size_t
to
)
{
Sentence
::
Ptr
s
=
boost
::
make_shared
<
Sentence
>
();
for
(
size_t
j
=
from
;
j
<
to
;
j
++
)
{
s
->
append
(
get_token
(
j
));
}
return
s
;
}
size_t
PoliqarpClient
::
get_count_of_matches_so_far
()
{
return
count_so_far_
;
}
size_t
PoliqarpClient
::
only_count_results
()
{
//countSoFar = 0;
if
(
query_compiled_
)
{
while
(
poliqarp_produce
(
&
buffer_
,
1000
,
&
query_
,
&
progress_
,
NULL
,
0
,
1000
)
&&
poliqarp_get_match_buffer_info
(
&
buffer_
,
&
info_
)
==
0
&&
info_
.
used
>
0
)
{
count_so_far_
+=
info_
.
used
;
poliqarp_forget
(
&
buffer_
);
}
}
return
count_so_far_
;
}
size_t
PoliqarpClient
::
get_corpus_size
()
const
{
return
corpus_size_
;
};
size_t
PoliqarpClient
::
get_corpus_pos
()
const
{
if
(
query_compiled_
)
{
return
query_
.
last_context
.
index
;
}
else
{
return
0
;
}
};
}
This diff is collapsed.
Click to expand it.
poliqarp/pqclient.h
+
52
−
5
View file @
5082e27d
...
...
@@ -3,22 +3,69 @@
#include
<boost/utility.hpp>
extern
"C"
{
#define this this_
#include
<poliqarp.h>
#undef this
void
async_notify_new_results
(
void
*
session
);
}
#include
<libcorpus2/chunk.h>
namespace
Corpus2
{
class
PoliqarpClient
:
boost
::
noncopyable
{
public:
PoliqarpClient
(
const
std
::
string
path
);
PoliqarpClient
(
const
Tagset
&
tagset
,
const
std
::
string
path
);
~
PoliqarpClient
();
void
reload_corpus
(
const
std
::
string
&
path
);
void
restart
();
int
execute_query
();
int
compile_query
(
const
std
::
string
&
q
);
void
compile_query
(
const
std
::
string
&
q
);
void
reset_query
();
void
execute_query
();
int
has_error
();
bool
next_match
(
poliqarp_match
&
match
);
Token
*
get_next_focus_token
();
Sentence
::
Ptr
get_next_match_sequence
();
Token
*
get_token
(
size_t
pos
);
Sentence
::
Ptr
get_token_range
(
size_t
from
,
size_t
to
);
Sentence
::
Ptr
get_next_sequence
(
bool
whole_sentence
);
size_t
get_count_of_matches_so_far
();
size_t
only_count_results
();
size_t
get_corpus_size
()
const
;
size_t
get_corpus_pos
()
const
;
private:
const
Tagset
&
tagset_
;
boost
::
shared_ptr
<
Sentence
>
match_
;
boost
::
shared_ptr
<
Chunk
>
document_
;
size_t
buffer_pos_
;
size_t
count_so_far_
;
int
err_
;
std
::
string
last_error_
;
std
::
string
last_query_
;
bool
quiet_
;
char
*
corpusname_
;
char
*
querytext_
;
bool
tags_context_
;
bool
tags_match_
;
bool
query_compiled_
;
size_t
corpus_size_
;
size_t
curr_chunk_doc_id_
;
struct
poliqarp_corpus
corpus_
;
struct
poliqarp_query
query_
;
struct
poliqarp_match_buffer
buffer_
;
struct
poliqarp_match_buffer_info
info_
;
progress_t
progress_
;
void
*
exception_data_
;
};
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment