Fix crash when running infererence on a gpu.
Fix cpu/gpu tensors mismatch.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/nlp_ws/_service.py", line 383, in __call__
self._process_task(result, out_file, props, body)
File "/usr/local/lib/python3.6/dist-packages/nlp_ws/_service.py", line 448, in _process_task
result['response'] = self._wrk.process(in_file, opts, out_file)
File "/home/worker/src/worker.py", line 51, in process
process_ccl(input_file, output_file, tagset, self.ner)
File "/home/worker/src/ccl_processing.py", line 16, in process_ccl
for name in ner_model.process_text(sent_str):
File "/usr/local/lib/python3.6/dist-packages/poldeepner2/models.py", line 176, in process_text
predictions = self.process(sentences)
File "/usr/local/lib/python3.6/dist-packages/poldeepner2/models.py", line 139, in process
logits = self.model(input_ids, labels=None, labels_mask=None, valid_mask=valid_ids)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/poldeepner2/model/xlmr_for_token_classification.py", line 42, in forward
transformer_out, _ = self.model(inputs_ids, features_only=True)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/fairseq/models/roberta/model.py", line 104, in forward
x, extra = self.decoder(src_tokens, features_only, return_all_hiddens, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/fairseq/models/roberta/model.py", line 351, in forward
x, extra = self.extract_features(src_tokens, return_all_hiddens=return_all_hiddens)
File "/usr/local/lib/python3.6/dist-packages/fairseq/models/roberta/model.py", line 359, in extract_features
last_state_only=not return_all_hiddens,
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/fairseq/modules/transformer_sentence_encoder.py", line 190, in forward
x = self.embed_tokens(tokens)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/sparse.py", line 147, in forward
self.norm_type, self.scale_grad_by_freq, self.sparse)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 1913, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Input, output and indices must be on the current device
To be completely honest, I have no idea why it still needs to be done after initializing XLMRForTokenClassification and its state dict on GPU.