Skip to content
Snippets Groups Projects
Commit 9d339859 authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski
Browse files

Add herberta configuration.

parent ac65eb69
2 merge requests!4Documentation,!3Herbert configuration and AllenNLP 1.2.0 update.
This commit is part of merge request !3. Comments created here will be created in the context of that merge request.
## Installation
### HERBERTA notes:
Install herberta transformers package **before** running command below
Clone this repository and run:
```bash
python setup.py develop
......
......@@ -54,12 +54,12 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
batch_callbacks: List[training.BatchCallback] = None,
epoch_callbacks: List[training.EpochCallback] = None, distributed: bool = False, local_rank: int = 0,
world_size: int = 1, num_gradient_accumulation_steps: int = 1,
opt_level: Optional[str] = None) -> None:
use_amp: bool = False) -> None:
super().__init__(model, optimizer, data_loader, patience, validation_metric, validation_data_loader, num_epochs,
serialization_dir, checkpointer, cuda_device, grad_norm, grad_clipping,
learning_rate_scheduler, momentum_scheduler, tensorboard_writer, moving_average,
batch_callbacks, epoch_callbacks, distributed, local_rank, world_size,
num_gradient_accumulation_steps, opt_level)
num_gradient_accumulation_steps, use_amp)
# TODO extract param to constructor (+ constructor method?)
self.validate_every_n = 5
......@@ -125,7 +125,8 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
self.model,
val_loss,
val_reg_loss,
num_batches,
num_batches=num_batches,
batch_loss=None,
reset=True,
world_size=self._world_size,
cuda_device=self.cuda_device,
......@@ -231,7 +232,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
world_size: int = 1,
num_gradient_accumulation_steps: int = 1,
opt_level: Optional[str] = None,
no_grad: List[str] = None,
use_amp: bool = False,
optimizer: common.Lazy[optimizers.Optimizer] = None,
learning_rate_scheduler: common.Lazy[learning_rate_schedulers.LearningRateScheduler] = None,
momentum_scheduler: common.Lazy[momentum_schedulers.MomentumScheduler] = None,
......@@ -258,8 +259,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
distributed=distributed,
world_size=world_size,
num_gradient_accumulation_steps=num_gradient_accumulation_steps,
opt_level=opt_level,
no_grad=no_grad,
use_amp=use_amp,
optimizer=optimizer,
learning_rate_scheduler=learning_rate_scheduler,
momentum_scheduler=momentum_scheduler,
......
......@@ -3,17 +3,17 @@ from setuptools import find_packages, setup
REQUIREMENTS = [
'absl-py==0.9.0',
'allennlp==1.0.0',
'allennlp==1.1.0',
'conllu==2.3.2',
'dataclasses-json==0.5.2',
'joblib==0.14.1',
'jsonnet==0.15.0',
'requests==2.23.0',
'overrides==3.0.0',
'overrides==3.1.0',
'tensorboard==2.1.0',
'torch>=1.5.0,<1.6.0',
'torch==1.6.0',
'tqdm==4.43.0',
'transformers==2.9.1',
'transformers>=3.0.0,<3.1.0',
'urllib3==1.24.2',
]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment