Skip to content
Snippets Groups Projects
Commit 9d339859 authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski
Browse files

Add herberta configuration.

parent ac65eb69
Branches
Tags
2 merge requests!4Documentation,!3Herbert configuration and AllenNLP 1.2.0 update.
## Installation ## Installation
### HERBERTA notes:
Install herberta transformers package **before** running command below
Clone this repository and run: Clone this repository and run:
```bash ```bash
python setup.py develop python setup.py develop
......
...@@ -54,12 +54,12 @@ class GradientDescentTrainer(training.GradientDescentTrainer): ...@@ -54,12 +54,12 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
batch_callbacks: List[training.BatchCallback] = None, batch_callbacks: List[training.BatchCallback] = None,
epoch_callbacks: List[training.EpochCallback] = None, distributed: bool = False, local_rank: int = 0, epoch_callbacks: List[training.EpochCallback] = None, distributed: bool = False, local_rank: int = 0,
world_size: int = 1, num_gradient_accumulation_steps: int = 1, world_size: int = 1, num_gradient_accumulation_steps: int = 1,
opt_level: Optional[str] = None) -> None: use_amp: bool = False) -> None:
super().__init__(model, optimizer, data_loader, patience, validation_metric, validation_data_loader, num_epochs, super().__init__(model, optimizer, data_loader, patience, validation_metric, validation_data_loader, num_epochs,
serialization_dir, checkpointer, cuda_device, grad_norm, grad_clipping, serialization_dir, checkpointer, cuda_device, grad_norm, grad_clipping,
learning_rate_scheduler, momentum_scheduler, tensorboard_writer, moving_average, learning_rate_scheduler, momentum_scheduler, tensorboard_writer, moving_average,
batch_callbacks, epoch_callbacks, distributed, local_rank, world_size, batch_callbacks, epoch_callbacks, distributed, local_rank, world_size,
num_gradient_accumulation_steps, opt_level) num_gradient_accumulation_steps, use_amp)
# TODO extract param to constructor (+ constructor method?) # TODO extract param to constructor (+ constructor method?)
self.validate_every_n = 5 self.validate_every_n = 5
...@@ -125,7 +125,8 @@ class GradientDescentTrainer(training.GradientDescentTrainer): ...@@ -125,7 +125,8 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
self.model, self.model,
val_loss, val_loss,
val_reg_loss, val_reg_loss,
num_batches, num_batches=num_batches,
batch_loss=None,
reset=True, reset=True,
world_size=self._world_size, world_size=self._world_size,
cuda_device=self.cuda_device, cuda_device=self.cuda_device,
...@@ -231,7 +232,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer): ...@@ -231,7 +232,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
world_size: int = 1, world_size: int = 1,
num_gradient_accumulation_steps: int = 1, num_gradient_accumulation_steps: int = 1,
opt_level: Optional[str] = None, opt_level: Optional[str] = None,
no_grad: List[str] = None, use_amp: bool = False,
optimizer: common.Lazy[optimizers.Optimizer] = None, optimizer: common.Lazy[optimizers.Optimizer] = None,
learning_rate_scheduler: common.Lazy[learning_rate_schedulers.LearningRateScheduler] = None, learning_rate_scheduler: common.Lazy[learning_rate_schedulers.LearningRateScheduler] = None,
momentum_scheduler: common.Lazy[momentum_schedulers.MomentumScheduler] = None, momentum_scheduler: common.Lazy[momentum_schedulers.MomentumScheduler] = None,
...@@ -258,8 +259,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer): ...@@ -258,8 +259,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
distributed=distributed, distributed=distributed,
world_size=world_size, world_size=world_size,
num_gradient_accumulation_steps=num_gradient_accumulation_steps, num_gradient_accumulation_steps=num_gradient_accumulation_steps,
opt_level=opt_level, use_amp=use_amp,
no_grad=no_grad,
optimizer=optimizer, optimizer=optimizer,
learning_rate_scheduler=learning_rate_scheduler, learning_rate_scheduler=learning_rate_scheduler,
momentum_scheduler=momentum_scheduler, momentum_scheduler=momentum_scheduler,
......
...@@ -3,17 +3,17 @@ from setuptools import find_packages, setup ...@@ -3,17 +3,17 @@ from setuptools import find_packages, setup
REQUIREMENTS = [ REQUIREMENTS = [
'absl-py==0.9.0', 'absl-py==0.9.0',
'allennlp==1.0.0', 'allennlp==1.1.0',
'conllu==2.3.2', 'conllu==2.3.2',
'dataclasses-json==0.5.2', 'dataclasses-json==0.5.2',
'joblib==0.14.1', 'joblib==0.14.1',
'jsonnet==0.15.0', 'jsonnet==0.15.0',
'requests==2.23.0', 'requests==2.23.0',
'overrides==3.0.0', 'overrides==3.1.0',
'tensorboard==2.1.0', 'tensorboard==2.1.0',
'torch>=1.5.0,<1.6.0', 'torch==1.6.0',
'tqdm==4.43.0', 'tqdm==4.43.0',
'transformers==2.9.1', 'transformers>=3.0.0,<3.1.0',
'urllib3==1.24.2', 'urllib3==1.24.2',
] ]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment