Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Commits
05aab344
Commit
05aab344
authored
1 year ago
by
Maja Jablonska
Browse files
Options
Downloads
Patches
Plain Diff
Minor fixes
parent
b28ead01
1 merge request
!46
Merge COMBO 3.0 into master
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
combo/models/__init__.py
+1
-0
1 addition, 0 deletions
combo/models/__init__.py
combo/predict.py
+19
-23
19 additions, 23 deletions
combo/predict.py
requirements.txt
+0
-1
0 additions, 1 deletion
requirements.txt
tests/data/data_readers/test_conll.py
+0
-6
0 additions, 6 deletions
tests/data/data_readers/test_conll.py
with
20 additions
and
30 deletions
combo/models/__init__.py
+
1
−
0
View file @
05aab344
...
...
@@ -8,3 +8,4 @@ from .lemma import LemmatizerModel
from
.combo_model
import
ComboModel
from
.morpho
import
MorphologicalFeatures
from
.model
import
Model
from
.archival
import
*
\ No newline at end of file
This diff is collapsed.
Click to expand it.
combo/predict.py
+
19
−
23
View file @
05aab344
...
...
@@ -230,26 +230,22 @@ class COMBO(Predictor):
dataset_reader
:
DatasetReader
):
return
cls
(
model
,
dataset_reader
,
tokenizers
.
SpacyTokenizer
())
# @classmethod
# def from_pretrained(cls, path: str, tokenizer=tokenizers.SpacyTokenizer(),
# batch_size: int = 1024,
# cuda_device: int = -1):
# util.import_module_and_submodules("combo.commands")
# util.import_module_and_submodules("combo.models")
# util.import_module_and_submodules("combo.training")
#
# if os.path.exists(path):
# model_path = path
# else:
# try:
# logger.debug("Downloading model.")
# model_path = download.download_file(path)
# except Exception as e:
# logger.error(e)
# raise e
#
# archive = models.load_archive(model_path, cuda_device=cuda_device)
# model = archive.model
# dataset_reader = DatasetReader.from_params(
# archive.config["dataset_reader"])
# return cls(model, dataset_reader, tokenizer, batch_size)
@classmethod
def
from_pretrained
(
cls
,
path
:
str
,
tokenizer
=
tokenizers
.
SpacyTokenizer
(),
batch_size
:
int
=
1024
,
cuda_device
:
int
=
-
1
):
if
os
.
path
.
exists
(
path
):
model_path
=
path
else
:
try
:
logger
.
debug
(
"
Downloading model.
"
)
model_path
=
download
.
download_file
(
path
)
except
Exception
as
e
:
logger
.
error
(
e
)
raise
e
archive
=
models
.
load_archive
(
model_path
,
cuda_device
=
cuda_device
)
model
=
archive
.
model
dataset_reader
=
DatasetReader
.
from_params
(
archive
.
config
[
"
dataset_reader
"
])
return
cls
(
model
,
dataset_reader
,
tokenizer
,
batch_size
)
This diff is collapsed.
Click to expand it.
requirements.txt
+
0
−
1
View file @
05aab344
...
...
@@ -9,7 +9,6 @@ importlib-resources~=5.12.0
overrides
~=7.3.1
torch
~=2.0.0
torchtext
~=0.15.1
lambo
~=2.0.0
numpy
~=1.24.1
pytorch-lightning
~=2.0.01
requests
~=2.28.2
...
...
This diff is collapsed.
Click to expand it.
tests/data/data_readers/test_conll.py
+
0
−
6
View file @
05aab344
import
unittest
from
combo.data
import
ConllDatasetReader
from
torch.utils.data
import
DataLoader
class
ConllDatasetReaderTest
(
unittest
.
TestCase
):
...
...
@@ -10,11 +9,6 @@ class ConllDatasetReaderTest(unittest.TestCase):
tokens
=
[
token
for
token
in
reader
(
'
conll_test_file.txt
'
)]
self
.
assertEqual
(
len
(
tokens
),
6
)
def
test_read_all_tokens_data_loader
(
self
):
reader
=
ConllDatasetReader
(
coding_scheme
=
'
IOB2
'
)
loader
=
DataLoader
(
reader
(
'
conll_test_file.txt
'
),
batch_size
=
16
)
print
(
next
(
iter
(
loader
)))
def
test_tokenize_correct_tokens
(
self
):
reader
=
ConllDatasetReader
(
coding_scheme
=
'
IOB2
'
)
token
=
next
(
iter
(
reader
(
'
conll_test_file.txt
'
)))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment