Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Commits
ebc411b0
Commit
ebc411b0
authored
2 years ago
by
Piotr
Committed by
Lukasz Pszenny
1 year ago
Browse files
Options
Downloads
Patches
Plain Diff
LAMBO Tokenization skeleton.
parent
21b65381
Branches
Branches containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
combo/main.py
+2
-2
2 additions, 2 deletions
combo/main.py
combo/predict.py
+7
-2
7 additions, 2 deletions
combo/predict.py
combo/utils/lambo.py
+13
-0
13 additions, 0 deletions
combo/utils/lambo.py
with
22 additions
and
4 deletions
combo/main.py
+
2
−
2
View file @
ebc411b0
...
...
@@ -86,8 +86,8 @@ flags.DEFINE_integer(name="batch_size", default=1,
flags
.
DEFINE_boolean
(
name
=
"
silent
"
,
default
=
True
,
help
=
"
Silent prediction to file (without printing to console).
"
)
flags
.
DEFINE_enum
(
name
=
"
predictor_name
"
,
default
=
"
combo-spacy
"
,
enum_values
=
[
"
combo
"
,
"
combo-spacy
"
,
"
lambo
"
],
help
=
"
Use predictor with whitespace
or
spacy tokenizer.
"
)
enum_values
=
[
"
combo
"
,
"
combo-spacy
"
,
"
combo-
lambo
"
],
help
=
"
Use predictor with whitespace
,
spacy
or LAMBO
tokenizer.
"
)
def
run
(
_
):
...
...
This diff is collapsed.
Click to expand it.
combo/predict.py
+
7
−
2
View file @
ebc411b0
...
...
@@ -10,15 +10,15 @@ from allennlp.data import tokenizers
from
allennlp.predictors
import
predictor
from
overrides
import
overrides
from
combo
import
data
from
combo.data
import
sentence2conllu
,
tokens2conllu
,
conllu2sentence
from
combo.utils
import
download
,
graph
from
combo.utils
import
download
,
graph
,
lambo
logger
=
logging
.
getLogger
(
__name__
)
@predictor.Predictor.register
(
"
combo
"
)
@predictor.Predictor.register
(
"
combo-spacy
"
,
constructor
=
"
with_spacy_tokenizer
"
)
@predictor.Predictor.register
(
"
combo-lambo
"
,
constructor
=
"
with_lambo_tokenizer
"
)
class
COMBO
(
predictor
.
Predictor
):
def
__init__
(
self
,
...
...
@@ -230,6 +230,11 @@ class COMBO(predictor.Predictor):
def
with_spacy_tokenizer
(
cls
,
model
:
models
.
Model
,
dataset_reader
:
allen_data
.
DatasetReader
):
return
cls
(
model
,
dataset_reader
,
tokenizers
.
SpacyTokenizer
())
@classmethod
def
with_lambo_tokenizer
(
cls
,
model
:
models
.
Model
,
dataset_reader
:
allen_data
.
DatasetReader
):
return
cls
(
model
,
dataset_reader
,
lambo
.
LamboTokenizer
())
@classmethod
def
from_pretrained
(
cls
,
path
:
str
,
tokenizer
=
tokenizers
.
SpacyTokenizer
(),
...
...
This diff is collapsed.
Click to expand it.
combo/utils/lambo.py
0 → 100644
+
13
−
0
View file @
ebc411b0
from
typing
import
List
from
allennlp.data.tokenizers.tokenizer
import
Tokenizer
from
allennlp.data.tokenizers.token_class
import
Token
class
LamboTokenizer
(
Tokenizer
):
def
__init__
(
self
,
language
:
str
=
"
??
"
,)
->
None
:
self
.
language
=
language
def
tokenize
(
self
,
text
:
str
)
->
List
[
Token
]:
#TODO
return
None
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment