Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Commits
1abcab10
Commit
1abcab10
authored
3 years ago
by
martynawiacek
Browse files
Options
Downloads
Patches
Plain Diff
moved try/except to combo predictor
parent
e060a26d
1 merge request
!41
Add try/catch clause for sentences with large number of wordpieces.
Pipeline
#4333
passed with stage
in 6 minutes and 51 seconds
Changes
2
Pipelines
3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py
+17
-25
17 additions, 25 deletions
...ken_indexers/pretrained_transformer_mismatched_indexer.py
combo/predict.py
+7
-1
7 additions, 1 deletion
combo/predict.py
with
24 additions
and
26 deletions
combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py
+
17
−
25
View file @
1abcab10
import
logging
import
sys
from
typing
import
Optional
,
Dict
,
Any
,
List
,
Tuple
from
allennlp
import
data
from
allennlp.data
import
token_indexers
,
tokenizers
,
IndexedTokenList
,
vocabulary
from
overrides
import
overrides
logger
=
logging
.
getLogger
(
__name__
)
@data.TokenIndexer.register
(
"
pretrained_transformer_mismatched_fixed
"
)
class
PretrainedTransformerMismatchedIndexer
(
token_indexers
.
PretrainedTransformerMismatchedIndexer
):
...
...
@@ -35,33 +32,28 @@ class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransforme
Method is overridden in order to raise an error while the number of tokens needed to embed a sentence exceeds the
maximal input of a model.
"""
try
:
self
.
_matched_indexer
.
_add_encoding_to_vocabulary_if_needed
(
vocabulary
)
self
.
_matched_indexer
.
_add_encoding_to_vocabulary_if_needed
(
vocabulary
)
wordpieces
,
offsets
=
self
.
_allennlp_tokenizer
.
intra_word_tokenize
(
[
t
.
ensure_text
()
for
t
in
tokens
])
wordpieces
,
offsets
=
self
.
_allennlp_tokenizer
.
intra_word_tokenize
(
[
t
.
ensure_text
()
for
t
in
tokens
])
if
len
(
wordpieces
)
>
self
.
_tokenizer
.
max_len_single_sentence
:
raise
ValueError
(
"
Following sentence consists of more wordpiece tokens that the model can process:
\n
"
+
\
"
"
.
join
([
str
(
x
)
for
x
in
tokens
[:
10
]])
+
"
...
\n
"
+
\
f
"
Maximal input:
{
self
.
_tokenizer
.
max_len_single_sentence
}
\n
"
+
\
f
"
Current input:
{
len
(
wordpieces
)
}
"
)
if
len
(
wordpieces
)
>
self
.
_tokenizer
.
max_len_single_sentence
:
raise
ValueError
(
"
Following sentence consists of more wordpiece tokens that the model can process:
\n
"
+
\
"
"
.
join
([
str
(
x
)
for
x
in
tokens
[:
10
]])
+
"
...
\n
"
+
\
f
"
Maximal input:
{
self
.
_tokenizer
.
max_len_single_sentence
}
\n
"
+
\
f
"
Current input:
{
len
(
wordpieces
)
}
"
)
offsets
=
[
x
if
x
is
not
None
else
(
-
1
,
-
1
)
for
x
in
offsets
]
offsets
=
[
x
if
x
is
not
None
else
(
-
1
,
-
1
)
for
x
in
offsets
]
output
:
IndexedTokenList
=
{
"
token_ids
"
:
[
t
.
text_id
for
t
in
wordpieces
],
"
mask
"
:
[
True
]
*
len
(
tokens
),
# for original tokens (i.e. word-level)
"
type_ids
"
:
[
t
.
type_id
for
t
in
wordpieces
],
"
offsets
"
:
offsets
,
"
wordpiece_mask
"
:
[
True
]
*
len
(
wordpieces
),
# for wordpieces (i.e. subword-level)
}
output
:
IndexedTokenList
=
{
"
token_ids
"
:
[
t
.
text_id
for
t
in
wordpieces
],
"
mask
"
:
[
True
]
*
len
(
tokens
),
# for original tokens (i.e. word-level)
"
type_ids
"
:
[
t
.
type_id
for
t
in
wordpieces
],
"
offsets
"
:
offsets
,
"
wordpiece_mask
"
:
[
True
]
*
len
(
wordpieces
),
# for wordpieces (i.e. subword-level)
}
return
self
.
_matched_indexer
.
_postprocess_output
(
output
)
except
ValueError
as
value_error
:
logger
.
error
(
value_error
)
sys
.
exit
(
1
)
return
self
.
_matched_indexer
.
_postprocess_output
(
output
)
class
PretrainedTransformerIndexer
(
token_indexers
.
PretrainedTransformerIndexer
):
...
...
This diff is collapsed.
Click to expand it.
combo/predict.py
+
7
−
1
View file @
1abcab10
import
logging
import
os
import
sys
from
typing
import
List
,
Union
,
Dict
,
Any
import
numpy
as
np
...
...
@@ -48,7 +49,12 @@ class COMBO(predictor.Predictor):
:param sentence: sentence(s) representation
:return: Sentence or List[Sentence] depending on the input
"""
return
self
.
predict
(
sentence
)
try
:
return
self
.
predict
(
sentence
)
except
Exception
as
e
:
logger
.
error
(
e
)
logger
.
error
(
'
Exiting.
'
)
sys
.
exit
(
1
)
def
predict
(
self
,
sentence
:
Union
[
str
,
List
[
str
],
List
[
List
[
str
]],
List
[
data
.
Sentence
]]):
if
isinstance
(
sentence
,
str
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment