Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Commits
888e0f11
Commit
888e0f11
authored
4 years ago
by
Mateusz Klimaszewski
Browse files
Options
Downloads
Patches
Plain Diff
Fix bug with double predictions.
parent
4386185d
Branches
Branches containing commit
Tags
Tags containing commit
2 merge requests
!11
Fix bug with double predictions - develop to master merge.
,
!10
Double prediction fix
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
combo/main.py
+1
-2
1 addition, 2 deletions
combo/main.py
combo/predict.py
+21
-28
21 additions, 28 deletions
combo/predict.py
with
22 additions
and
30 deletions
combo/main.py
+
1
−
2
View file @
888e0f11
...
...
@@ -151,8 +151,7 @@ def run(_):
if
FLAGS
.
input_file
==
"
-
"
:
use_dataset_reader
=
False
predictor
.
without_sentence_embedding
=
True
if
use_dataset_reader
:
predictor
.
line_to_conllu
=
True
predictor
.
line_to_conllu
=
False
if
FLAGS
.
silent
:
logging
.
getLogger
(
"
allennlp.common.params
"
).
disabled
=
True
manager
=
allen_predict
.
_PredictManager
(
...
...
This diff is collapsed.
Click to expand it.
combo/predict.py
+
21
−
28
View file @
888e0f11
import
logging
import
os
from
typing
import
List
,
Union
,
Tuple
from
typing
import
List
,
Union
,
Dict
,
Any
import
conllu
import
numpy
as
np
from
allennlp
import
data
as
allen_data
,
common
,
models
from
allennlp.common
import
util
...
...
@@ -26,7 +25,7 @@ class COMBO(predictor.Predictor):
dataset_reader
:
allen_data
.
DatasetReader
,
tokenizer
:
allen_data
.
Tokenizer
=
tokenizers
.
WhitespaceTokenizer
(),
batch_size
:
int
=
32
,
line_to_conllu
:
bool
=
Fals
e
)
->
None
:
line_to_conllu
:
bool
=
Tru
e
)
->
None
:
super
().
__init__
(
model
,
dataset_reader
)
self
.
batch_size
=
batch_size
self
.
vocab
=
model
.
vocab
...
...
@@ -57,18 +56,21 @@ class COMBO(predictor.Predictor):
if
len
(
sentence
)
==
0
:
return
[]
example
=
sentence
[
0
]
sentences
=
sentence
if
isinstance
(
example
,
str
)
or
isinstance
(
example
,
list
):
sentences
=
[]
for
sentences_batch
in
util
.
lazy_groups_of
(
sentence
,
self
.
batch_size
):
sentences_batch
=
self
.
predict_batch_json
([
self
.
_to_input_json
(
s
)
for
s
in
sentences_batch
])
sentences
.
extend
(
sentences_batch
)
return
sentences
result
=
[]
sentences
=
[
self
.
_to_input_json
(
s
)
for
s
in
sentences
]
for
sentences_batch
in
util
.
lazy_groups_of
(
sentences
,
self
.
batch_size
):
sentences_batch
=
self
.
predict_batch_json
(
sentences_batch
)
result
.
extend
(
sentences_batch
)
return
result
elif
isinstance
(
example
,
data
.
Sentence
):
sentences
=
[]
for
sentences_batch
in
util
.
lazy_groups_of
(
sentence
,
self
.
batch_size
):
sentences_batch
=
self
.
predict_batch_instance
([
self
.
_to_input_instance
(
s
)
for
s
in
sentences_batch
])
sentences
.
extend
(
sentences_batch
)
return
sentences
result
=
[]
sentences
=
[
self
.
_to_input_instance
(
s
)
for
s
in
sentences
]
for
sentences_batch
in
util
.
lazy_groups_of
(
sentences
,
self
.
batch_size
):
sentences_batch
=
self
.
predict_batch_instance
(
sentences_batch
)
result
.
extend
(
sentences_batch
)
return
result
else
:
raise
ValueError
(
"
List must have either sentences as str, List[str] or Sentence object.
"
)
else
:
...
...
@@ -79,36 +81,27 @@ class COMBO(predictor.Predictor):
sentences
=
[]
predictions
=
super
().
predict_batch_instance
(
instances
)
for
prediction
,
instance
in
zip
(
predictions
,
instances
):
tree
,
sentence_embedding
=
self
.
predict
_instance_as_tree
(
instance
)
tree
,
sentence_embedding
=
self
.
_
predict
ions_as_tree
(
prediction
,
instance
)
sentence
=
conllu2sentence
(
tree
,
sentence_embedding
)
sentences
.
append
(
sentence
)
return
sentences
@overrides
def
predict_batch_json
(
self
,
inputs
:
List
[
common
.
JsonDict
])
->
List
[
data
.
Sentence
]:
sentences
=
[]
instances
=
self
.
_batch_json_to_instances
(
inputs
)
predictions
=
self
.
predict_batch_instance
(
instances
)
for
prediction
,
instance
in
zip
(
predictions
,
instances
):
tree
,
sentence_embedding
=
self
.
predict_instance_as_tree
(
instance
)
sentence
=
conllu2sentence
(
tree
,
sentence_embedding
)
sentences
.
append
(
sentence
)
sentences
=
self
.
predict_batch_instance
(
instances
)
return
sentences
@overrides
def
predict_instance
(
self
,
instance
:
allen_data
.
Instance
,
serialize
:
bool
=
True
)
->
data
.
Sentence
:
tree
,
sentence_embedding
=
self
.
predict_instance_as_tree
(
instance
)
predictions
=
super
().
predict_instance
(
instance
)
tree
,
sentence_embedding
=
self
.
_predictions_as_tree
(
predictions
,
instance
)
return
conllu2sentence
(
tree
,
sentence_embedding
)
@overrides
def
predict_json
(
self
,
inputs
:
common
.
JsonDict
)
->
data
.
Sentence
:
instance
=
self
.
_json_to_instance
(
inputs
)
tree
,
sentence_embedding
=
self
.
predict_instance_as_tree
(
instance
)
return
conllu2sentence
(
tree
,
sentence_embedding
)
def
predict_instance_as_tree
(
self
,
instance
:
allen_data
.
Instance
)
->
Tuple
[
conllu
.
TokenList
,
List
[
float
]]:
predictions
=
super
().
predict_instance
(
instance
)
return
self
.
_predictions_as_tree
(
predictions
,
instance
)
return
self
.
predict_instance
(
instance
)
@overrides
def
_json_to_instance
(
self
,
json_dict
:
common
.
JsonDict
)
->
allen_data
.
Instance
:
...
...
@@ -143,7 +136,7 @@ class COMBO(predictor.Predictor):
def
_to_input_instance
(
self
,
sentence
:
data
.
Sentence
)
->
allen_data
.
Instance
:
return
self
.
_dataset_reader
.
text_to_instance
(
sentence2conllu
(
sentence
))
def
_predictions_as_tree
(
self
,
predictions
,
i
nstance
):
def
_predictions_as_tree
(
self
,
predictions
:
Dict
[
str
,
Any
],
instance
:
allen_data
.
I
nstance
):
tree
=
instance
.
fields
[
"
metadata
"
][
"
input
"
]
field_names
=
instance
.
fields
[
"
metadata
"
][
"
field_names
"
]
tree_tokens
=
[
t
for
t
in
tree
if
isinstance
(
t
[
"
id
"
],
int
)]
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment