diff --git a/.idea/combolightning.iml b/.idea/combolightning.iml index 3578d353db0efcadc8984eedebbe1b853bb604f9..d633123868d23bb98bad31e53c7a5e392146ecad 100644 --- a/.idea/combolightning.iml +++ b/.idea/combolightning.iml @@ -4,7 +4,7 @@ <content url="file://$MODULE_DIR$"> <sourceFolder url="file://$MODULE_DIR$/combo" isTestSource="false" /> </content> - <orderEntry type="jdk" jdkName="combo" jdkType="Python SDK" /> + <orderEntry type="jdk" jdkName="combo-newest" jdkType="Python SDK" /> <orderEntry type="sourceFolder" forTests="false" /> </component> <component name="TestRunnerService"> diff --git a/.idea/misc.xml b/.idea/misc.xml index d1d59e67aa31cc624fcf2a8623e0114ab43ee103..51ebff076c8521ef53256c78a18fb678083da3d4 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> <project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="combo" project-jdk-type="Python SDK" /> + <component name="ProjectRootManager" version="2" project-jdk-name="combo-newest" project-jdk-type="Python SDK" /> </project> \ No newline at end of file diff --git a/README.md b/README.md index c7055dc58bb4df08a32fe985fdc2d5fdb91ae829..63bc2afaea01db7edd555f64e81126ae824c37d1 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,27 @@ combo --mode train --training_data_path <training conllu> --validation_data_path You can find more examples in ```docs/Training.md``` +## Perform Named Entity Recognition with COMBO + +COMBO has a NER module. Currently, three languages are supported: English (base and large version), Polish (base and large version) and Spanish (base version). However, it is possible to train your own NER models. Refer to documentation for more information [here](docs/Ner_docs.md). + +Performance of the pretrained models: + +| name | dataset | language | F1 score <br/> on devset | F1 score <br/> on testset | Precision <br/> on devset | Precision <br/> on testset | Recall <br/> on devset | Recall <br/> on testset | +|-----------|-----------------------------------------------------------------------------------|----------|--------------------------|---------------------------|---------------------------|----------------------------|------------------------|-------------------------| +| pl_base | [kpwe-n82](https://github.com/CLARIN-PL/PolDeepNer/tree/master/poldeepner/data ) | polish | 73,58 | 72,39 | 73,58 | 72,84 | 73,58 | 71,94 | +| pl_large | [kpwe-n82](https://github.com/CLARIN-PL/PolDeepNer/tree/master/poldeepner/data ) | polish | 74,97 | 74,34 | 74,70 | 74,59 | 75,24 | 74,089 | +| eng_base | [ConLL03](https://github.com/ZihanWangKi/CrossWeigh/tree/master/data ) | english | 95,25 | 92,06 | 95,20 | 92,42 | 95,30 | 91,69 | +| eng_large | [ConLL03](https://github.com/ZihanWangKi/CrossWeigh/tree/master/data ) | english | 95,17 | 92,11 | 95,04 | 92,49 | 95,30 | 91,73 | +| es_base | [ConLL02](https://www.clips.uantwerpen.be/conll2002/ner/ ) | spanish | 85,48 | 87,38 | 85,89 | 87,01 | 85,08 | 87,74 | + +More granular data about performance can be found [here](https://docs.google.com/spreadsheets/d/1RbcID6Yq-cNXrUmN4Ogujt_6_t6XKlIVjaPXEzBkVvU/edit?usp=sharing) + +There are two example notebooks that show how to use the NER module: +- Training a NER model [here](notebooks/NER_training.ipynb) +- Using a pretrained NER model [here](notebooks/NER_inference.ipynb) + + ## COMBO tutorial We encourage you to use the [beginner's tutorial](https://colab.research.google.com/drive/1-yYwOb9uOTygGhHdaJK_LKedHf_RnvYa) (colab notebook). diff --git a/combo/data/tokenizers/token.py b/combo/data/tokenizers/token.py index 319a5bd873bba1140d26f899d292d42f524b4317..e0726e417bee7db18a4edbfb33131d645d026e42 100644 --- a/combo/data/tokenizers/token.py +++ b/combo/data/tokenizers/token.py @@ -33,7 +33,8 @@ class Token: "semrel", "embeddings", "text_id", - "type_id" + "type_id", + "ner_tag" ] text: Optional[str] @@ -53,6 +54,7 @@ class Token: embeddings: Dict[str, List[float]] text_id: Optional[int] type_id: Optional[int] + ner_tag: Optional[str] def __init__(self, text: str = None, @@ -71,7 +73,9 @@ class Token: semrel: str = None, embeddings: Dict[str, List[float]] = None, text_id: int = None, - type_id: int = None,) -> None: + type_id: int = None, + ner_tag: str = None) -> None: + _assert_none_or_type(text, str) self.text = text @@ -88,6 +92,7 @@ class Token: self.subwords = subwords if subwords else [] self.multiword = multiword self.semrel = semrel + self.ner_tag = ner_tag if embeddings is None: # what? @@ -154,6 +159,7 @@ class Token: f"(embeddings: {self.embeddings}) " f"(text_id: {self.text_id}) " f"(type_id: {self.type_id}) " + f"(ner_tag: {self.ner_tag}) " ) @classmethod diff --git a/combo/ner_modules/NerDataModule.py b/combo/ner_modules/NerDataModule.py new file mode 100644 index 0000000000000000000000000000000000000000..f8ccc9545dcb763006ceb8c8bcbc11ebdbbf251b --- /dev/null +++ b/combo/ner_modules/NerDataModule.py @@ -0,0 +1,100 @@ +import pytorch_lightning as pl +from torch.utils.data import DataLoader +from .data.NerDataset import NerDataset +from pathlib import Path +from .data.NerTokenizer import NerTokenizer + + +class NerDataModule(pl.LightningDataModule): + """ + NerDataModule is a PyTorch Lightning DataModule designed for Named Entity Recognition (NER) tasks. + It handles data loading and preprocessing for training, validation, and testing datasets. + + Attributes: + ---------- + path_data : Path + The path to the directory containing NER dataset files. + tokenizer : NerTokenizer + An instance of the NerTokenizer class used for tokenizing the text data. + batch_size : int, optional + The batch size for data loading. Default is 32. + encoding : str, optional + The character encoding to use when reading dataset files. Default is 'utf-8'. + num_workers : int, optional + The number of CPU processes to use for data loading. Default is 1. + + """ + def __init__(self, + path_data: Path, + tokenizer: NerTokenizer, + batch_size: int = 32, + encoding: str = 'utf-8', + num_workers: int = 1): + super().__init__() + + # Dataloader variables + self.num_workers = num_workers + self.batch_size = batch_size + + # Dataset variables + self.path_data = path_data + self.tokenizer = tokenizer + self.encoding = encoding + + # Data + self.train_dataset = None + self.test_dataset = None + self.dev_dataset = None + + def setup(self, + stage=None) -> None: + """ + Set up datasets for training, validation, and testing. + """ + # Checking whether there is train data inside directory + file_path = self.path_data / "train.txt" + if file_path.is_file(): + self.train_dataset = NerDataset(tokenizer=self.tokenizer, + file_path=file_path, + encoding=self.encoding) + + # Checking whether there is dev data inside directory + file_path = self.path_data / "dev.txt" + if file_path.is_file(): + self.dev_dataset = NerDataset(tokenizer=self.tokenizer, + file_path=file_path, + encoding=self.encoding) + + # Checking whether there is test data inside directory + file_path = self.path_data / "test.txt" + if file_path.is_file(): + self.test_dataset = NerDataset(tokenizer=self.tokenizer, + file_path=file_path, + encoding=self.encoding) + + def train_dataloader(self) -> DataLoader: + """ + Returns a PyTorch DataLoader for the training dataset. + """ + return DataLoader(self.train_dataset, + batch_size=self.batch_size, + shuffle=True, + num_workers=self.num_workers) + + def val_dataloader(self) -> DataLoader: + """ + Returns a PyTorch DataLoader for the validation dataset. + """ + return DataLoader(self.dev_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers) + + def test_dataloader(self) -> DataLoader: + """ + Returns a PyTorch DataLoader for the testing dataset. + """ + return DataLoader(self.test_dataset, + batch_size=self.batch_size, + shuffle=False, + num_workers=self.num_workers) diff --git a/combo/ner_modules/NerModel.py b/combo/ner_modules/NerModel.py new file mode 100644 index 0000000000000000000000000000000000000000..6b35c50058f905a8b09fa30934174f4bf31d54d0 --- /dev/null +++ b/combo/ner_modules/NerModel.py @@ -0,0 +1,303 @@ +import pytorch_lightning as pl +from .metrics.MetricHolder import MetricHolder +import torch.nn as nn +import torch +from .data.SpecialTokens import SpecialTokens +from .embedders.BertEmbedder import BertEmbedder +from .embedders.ComboCharEmbedder import ComboCharEmbedder +from .classifiers.VanillaClassifier import VanillaClassifier +from .classifiers.CrfClassifier import CrfClassifier +from .utils.download import download_file +from .utils.constructors import construct_loss_from_config +from pathlib import Path +import json +from transformers import logging +# Fixing the issue with transformers BERT some weights being not used and causing warnings +logging.set_verbosity_error() + +CHAR_PAD_TOKEN, CHAR_START_TOKEN, CHAR_END_TOKEN, CHAR_UNKNOWN_TOKEN, PAD_TOKEN, START_TOKEN, END_TOKEN = \ + SpecialTokens.CHAR_PAD_TOKEN, SpecialTokens.CHAR_START_TOKEN, SpecialTokens.CHAR_END_TOKEN, \ + SpecialTokens.CHAR_UNKNOWN_TOKEN, SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +class NerModel(pl.LightningModule): + def __init__(self, + loss_fn, + char_to_id_map: dict = None, + label_to_id_map: dict = None, + config: dict = None): + super().__init__() + + ### Define architecture ### + # Define Bert Embedder + bert_embedder_params = config["model"]["bert_embedder"] + self.bert_embedder = BertEmbedder(use_start_end_token=config["data"]["use_start_end_token"], + **bert_embedder_params) + + # Define Character Embedder + if config["data"]["use_char_level_embeddings"]: + if config["model"]["char_embedder"]["type"] == "combo": + char_embedding_dim = config["model"]["char_embedder"]["char_embedding_dim"] + self.char_embedder = ComboCharEmbedder(vocab_size=len(char_to_id_map), + char_embedding_dim=char_embedding_dim, + padding_idx=char_to_id_map[CHAR_PAD_TOKEN]) + else: + self.char_embedder = None + + # Define Classifier + bert_output_dim = self.bert_embedder.output_dimension + char_output_dim = self.char_embedder.output_dimension if self.char_embedder is not None else 0 + input_dim = bert_output_dim + char_output_dim + classifier_params = config["model"]["classifier"] + if config["model"]["classifier"]["type"] == "vanilla": + self.classifier = VanillaClassifier(label_to_id_map=label_to_id_map, + input_dim=input_dim, + to_tag_space=classifier_params["to_tag_space"]) + elif config["model"]["classifier"]["type"] == "crf": + self.classifier = CrfClassifier(label_to_id_map=label_to_id_map, + input_dim=input_dim, + to_tag_space=classifier_params["to_tag_space"]) + if "dropout" in config["model"]: + self.dropout = nn.Dropout(p=config["model"]["dropout"]) + else: + self.dropout = None + ### End of architecture definition ### + + # Define metric holder + self.train_metrics_holder = MetricHolder(label_to_idx=label_to_id_map) + self.val_metrics_holder = MetricHolder(label_to_idx=label_to_id_map) + self.test_metrics_holder = MetricHolder(label_to_idx=label_to_id_map) + + # Define optimization parameters + self.learning_rate = config["learning_rate"] + self.loss_fn = "crf" if config["model"]["classifier"]["type"] == "crf" else loss_fn + + # Save label to id map + self.label_to_idx_map = label_to_id_map + + # save config + self.config = config + + def forward(self, batch): + input_ids = batch["input_ids"] + attention_mask = batch["attention_mask"] + word_ids = batch["word_ids"] if "word_ids" in batch else None + + # get BERT embeddings + outputs = self.bert_embedder(input_ids=input_ids, + attention_mask=attention_mask, + word_ids=word_ids) + # apply dropout + if self.dropout: + outputs = self.dropout(outputs) + + # adding character embeddings to the output of BERT + if self.char_embedder: + char_ids = batch["char_ids"] + char_embeddings = self.char_embedder(char_ids) + outputs = torch.cat((outputs, char_embeddings), dim=2) + + # Apply the classifier layer to obtain logits + if self.loss_fn == "crf": + _, logits = self.classifier(encoder_outputs=outputs) + else: + logits = self.classifier(outputs) + + return logits, outputs + + def training_step(self, batch, batch_idx): + loss, preds, labels = self._common_step(batch) + + self.train_metrics_holder.update(predictions=preds, + targets=labels) + + self.log(f"train_loss", loss, on_step=True, on_epoch=False, prog_bar=True, logger=True) + return loss + + def on_train_epoch_end(self): + # compute metrics with bio tags + metrics_dict = self.train_metrics_holder.get_metrics(stage="train") + metrics_dict["epoch"] = float(self.current_epoch + 1) + + # log metrics + self.log_dict(metrics_dict, on_step=False, on_epoch=True, prog_bar=True, logger=True) + + # reset all metrics + self.train_metrics_holder.reset() + + def validation_step(self, batch, batch_idx): + loss, preds, labels = self._common_step(batch) + + self.val_metrics_holder.update(predictions=preds, + targets=labels) + + self.log(f"validation_loss", loss, on_step=True, on_epoch=False, prog_bar=True, logger=True) + return loss + + def on_validation_epoch_end(self): + # compute metrics with bio tags + metrics_dict = self.val_metrics_holder.get_metrics(stage="validation") + metrics_dict["epoch"] = float(self.current_epoch + 1) + + # log metrics + self.log_dict(metrics_dict, on_step=False, on_epoch=True, prog_bar=True, logger=True) + + # reset all metrics + self.val_metrics_holder.reset() + + def test_step(self, batch, batch_idx): + loss, preds, labels = self._common_step(batch) + + self.test_metrics_holder.update(predictions=preds, + targets=labels) + return loss + + def on_test_epoch_end(self): + # compute metrics with bio tags + metrics_dict = self.test_metrics_holder.get_metrics(stage="test") + metrics_dict["epoch"] = float(self.current_epoch + 1) + + # log metrics + self.log_dict(metrics_dict, on_step=False, on_epoch=True, prog_bar=True, logger=True) + + print(self.test_metrics_holder.get_classification_report()) + + # reset all metrics + self.test_metrics_holder.reset() + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), + lr=self.learning_rate) + + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, + mode="max", + patience=3, + factor=0.1, + threshold_mode="abs", + threshold=0.002) + return {"optimizer": optimizer, + "lr_scheduler": scheduler, + "monitor": "validation_f1"} + + def _common_step(self, batch): + labels = batch["labels_ids"] + prediction_mask = batch["prediction_mask"] + + # Forward pass + logits, encoder_outputs = self.forward(batch) + + # if CRF is used, we don't need to calculate the loss + if self.loss_fn != "crf": + # Get predictions + preds = torch.argmax(logits, dim=2) + # Permute the logits to conform loss requirements (batch_size, numb_classes, sequence_length) + logits = logits.permute(0, 2, 1) + # Change labels of PAD START and END tokens to -100 + labels = self.mask_labels(labels,prediction_mask) + # Calculate the loss + loss = self.loss_fn(logits, labels) + else: + loss = self.classifier.neg_log_likelihood(encoder_outputs, labels) + # Change labels of PAD START and END tokens to -100 + labels = self.mask_labels(labels, prediction_mask) + preds = logits + + return loss, preds, labels + + @staticmethod + def mask_labels(labels, + prediction_mask): + # for torch gradient computation we need to do it on copy + label_copy = labels.clone() + label_copy[prediction_mask == 0] = -100 + return label_copy + + def predict(self, batch): + # Forward pass + logits, _ = self.forward(batch) + prediction_mask = batch["prediction_mask"] + + if self.loss_fn != "crf": + # Get predictions + preds = torch.argmax(logits, dim=2) + else: + preds = logits + + batch_size, sequence_length = preds.shape + result = [[preds[i, j].item() for j in range(sequence_length) if prediction_mask[i, j] == 1] for i in range(batch_size)] + return result + + @classmethod + def get(cls, model_name): + """ + Downloads and loads pretrained model from MinIO + """ + # Downloading files and loading jsons + PATH_CONFIG = Path(download_file(model_name, "config.json")) + config = json.load(open(PATH_CONFIG)) + + try: + PATH_CHAR_TO_ID = Path(download_file(model_name, "char_to_id.json")) + char_to_id = json.load(open(PATH_CHAR_TO_ID)) + except: + char_to_id = {} + + PATH_LABEL_TO_ID = Path(download_file(model_name, "label_to_id.json")) + label_to_id = json.load(open(PATH_LABEL_TO_ID)) + + PATH_MODEL = Path(download_file(model_name, "best_model.ckpt")) + # define model + loss_fn = construct_loss_from_config(config=config, + label_to_id=label_to_id) + + if torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + + # load model + model = NerModel.load_from_checkpoint(PATH_MODEL, + loss_fn=loss_fn, + char_to_id_map=char_to_id, + map_location=torch.device(device), + label_to_id_map=label_to_id, + config=config) + return model + + @classmethod + def load_from_disc(cls, + folder_path: Path): + """ + Loads pretrained model from disc + """ + # Loading jsons + PATH_CONFIG = folder_path / "config.json" + config = json.load(open(PATH_CONFIG)) + + try: + PATH_CHAR_TO_ID = folder_path / "char_to_id.json" + char_to_id = json.load(open(PATH_CHAR_TO_ID)) + except: + char_to_id = {} + + PATH_LABEL_TO_ID = folder_path / "label_to_id.json" + label_to_id = json.load(open(PATH_LABEL_TO_ID)) + + PATH_MODEL = folder_path / "best_model.ckpt" + # define model + loss_fn = construct_loss_from_config(config=config, + label_to_id=label_to_id) + + if torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + + # load model + model = NerModel.load_from_checkpoint(PATH_MODEL, + loss_fn=loss_fn, + char_to_id_map=char_to_id, + map_location=torch.device(device), + label_to_id_map=label_to_id, + config=config) + return model diff --git a/combo/ner_modules/__init__.py b/combo/ner_modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/callbacks/FixedProgressBar.py b/combo/ner_modules/callbacks/FixedProgressBar.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c262d118fd39b1c3c7f2bbf609cce6cda85c45 --- /dev/null +++ b/combo/ner_modules/callbacks/FixedProgressBar.py @@ -0,0 +1,72 @@ +import sys +from pytorch_lightning.callbacks import TQDMProgressBar + + +class FixedProgressBar(TQDMProgressBar): + """ + A custom progress bar for PyTorch Lightning which addresses potential compatibility issues with some IDEs that + do not display the default progress bar properly. + + This progress bar inherits from the default TQDMProgressBar provided by PyTorch Lightning and modifies the + initialization of the progress bars for validation, prediction, and testing. + + Example Usage + -------------- + ```python + + callbacks = [] + callbacks.append(FixedProgressBar()) + + # add any other callbacks if needed + + trainer = pl.Trainer(callbacks=callbacks, + **params) + -------------- + """ + def init_validation_tqdm(self): + """ + Initialize the progress bar for validation phase. + + Returns + ---------- + bar : tqdm.tqdm + The tqdm progress bar object for validation. + """ + bar = super().init_validation_tqdm() + + # Disable the progress bar if stdout is not a TTY (i.e., it's not an interactive terminal) + if not sys.stdout.isatty(): + bar.disable = True + return bar + + def init_predict_tqdm(self): + """ + Initialize the progress bar for prediction phase. + + Returns + ---------- + bar : tqdm.tqdm + The tqdm progress bar object for predicting. + """ + bar = super().init_predict_tqdm() + + # Disable the progress bar if stdout is not a TTY (i.e., it's not an interactive terminal) + if not sys.stdout.isatty(): + bar.disable = True + return bar + + def init_test_tqdm(self): + """ + Initialize the progress bar for testing phase. + + Returns + ---------- + bar : tqdm.tqdm + TThe tqdm progress bar object for testing. + """ + bar = super().init_test_tqdm() + + # Disable the progress bar if stdout is not a TTY (i.e., it's not an interactive terminal) + if not sys.stdout.isatty(): + bar.disable = True + return bar diff --git a/combo/ner_modules/callbacks/__init__.py b/combo/ner_modules/callbacks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/classifiers/CrfClassifier.py b/combo/ner_modules/classifiers/CrfClassifier.py new file mode 100644 index 0000000000000000000000000000000000000000..87129340d17fe7f3710a97a00c28d40e020c3cc7 --- /dev/null +++ b/combo/ner_modules/classifiers/CrfClassifier.py @@ -0,0 +1,324 @@ +import torch +import torch.nn as nn +from ..data.SpecialTokens import SpecialTokens + +START_TOKEN, END_TOKEN, PAD_TOKEN = SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN, SpecialTokens.PAD_TOKEN + + +class CrfClassifier(nn.Module): + """ + A class that defines a Conditional Random Field (CRF) classifier layer. + The model is capable of transforming the encoder's output into the tag space, considering different strategies + like "bilstm", "transformer", or "linear". It also includes methods for the forward algorithm, Viterbi decoding, + and negative log likelihood calculation for training and prediction. + + Attributes: + ---------- + label_to_id_map : dict + Mapping of labels to their respective unique identifiers. + input_dim : int + Dimension of the output of the encoder. + to_tag_space : str, optional (default: 'linear') + The strategy to map the output of the encoder into the tag space. + Choices: 'linear', 'bilstm', 'transformer'. + start_label_id : int + Identifier for the start token label. + stop_label_id : int + Identifier for the end token label. + num_labels : int + Total number of labels. + hidden2tag : torch.nn.Linear + Linear layer to transform the encoder's output into the tag space. + BiLSTM : torch.nn.LSTM (optional) + Bi-directional LSTM layer, if 'bilstm' is chosen for 'to_tag_space'. + Transformer : torch.nn.TransformerEncoderLayer (optional) + Transformer layer, if 'transformer' is chosen for 'to_tag_space'. + transitions : torch.nn.Parameter + Matrix of transition parameters, representing scores for transitioning from one state to another. + + Methods: + ---------- + init_transition(self): + Initializes transition constraints according to specific rules. + _forward_alg(self, feats): + Forward algorithm to calculate log probabilities. + _score_sentence(self, feats, label_ids): + Gives the score of a provided label sequence. + _viterbi_decode(self, feats): + Performs Viterbi decoding to find the best path, given the features. + neg_log_likelihood(self, encoder_outputs, label_ids): + Computes the negative log likelihood for the given encoder outputs and label sequence. + forward(self, encoder_outputs): + Performs forward computation to find the score and label sequence IDs, used for prediction. + log_sum_exp_batch(log_Tensor, axis=-1): + A static method that computes the log sum exponential operation. + """ + def __init__(self, + label_to_id_map: dict, + input_dim: int, + to_tag_space: str = "linear") -> None: + super(CrfClassifier, self).__init__() + self.input_dim = input_dim # dimension of output of the encoder + + # Tag space + self.label_to_id_map = label_to_id_map + self.start_label_id = label_to_id_map[START_TOKEN] + self.stop_label_id = label_to_id_map[END_TOKEN] + self.num_labels = len(label_to_id_map) + + # Maps the output of the encoder into tag space. + self.to_tag_space = to_tag_space + self.hidden2tag = nn.Linear(input_dim, self.num_labels) + + if to_tag_space == "bilstm": + self.BiLSTM = nn.LSTM(bidirectional=True, + num_layers=2, + input_size=input_dim, + hidden_size=input_dim // 2, + batch_first=True) + elif to_tag_space == "transformer": + self.Transformer = torch.nn.TransformerEncoderLayer(d_model=input_dim, + nhead=16, + batch_first=True) + elif to_tag_space == "bigru": + self.BiLSTM = nn.GRU(bidirectional=True, + num_layers=2, + input_size=input_dim, + hidden_size=input_dim // 2, + batch_first=True) + + # Matrix of transition parameters. Entry i,j is the score of + # transitioning *to* i *from* j. + self.transitions = nn.Parameter( + torch.randn(self.num_labels, self.num_labels)) + + self.init_transition() + + def init_transition(self) -> None: + """ + Initializes the transition constraints for the CRF model according to specific rules regarding start, stop, + padding tokens, and transitions between different entity tags. Enforced rules are: + 1. We never transfer to the start tag. + 2. We never transfer from the stop tag unless to pad tag. + 3. After B-x going to I-y is impossible. + 4. After O going to I-x is impossible. + 5. Going to <PAD> tag from different tag than stop tag or <PAD>. + """ + # create entity name list + entity_list = [entity.split("-")[-1] for entity in self.label_to_id_map.keys() if entity.startswith("B-")] + + # We never transfer to the start tag + self.transitions.data[self.start_label_id, :] = -10000. + # We never transfer from the stop tag unless to pad tag + for entity in self.label_to_id_map.keys(): + if entity != PAD_TOKEN: + self.transitions.data[self.label_to_id_map[entity], self.stop_label_id] = -10000. + + # after B-x going to I-y is impossible + for previous_tag in entity_list: + for next_tag in entity_list: + if "I-"+next_tag not in self.label_to_id_map.keys() or "B-"+previous_tag not in self.label_to_id_map.keys(): + continue + if previous_tag != next_tag: + self.transitions.data[self.label_to_id_map["I-" + next_tag], self.label_to_id_map["B-" + previous_tag]] = -10000. + + # after O going to I-x is impossible + for next_tag in entity_list: + if "I-"+next_tag not in self.label_to_id_map.keys(): + continue + self.transitions.data[self.label_to_id_map["I-" + next_tag], self.label_to_id_map["O"]] = -10000. + + # going to <PAD> tag from different tag than stop tag or <PAD> tag is impossible + for previous_tag in self.label_to_id_map.keys(): + if previous_tag != PAD_TOKEN and previous_tag != END_TOKEN: + self.transitions.data[self.label_to_id_map[PAD_TOKEN], self.label_to_id_map[previous_tag]] = -10000. + + def _forward_alg(self, feats) -> torch.Tensor: + """ + Implements the forward algorithm to calculate log probabilities for a sequence. + + Parameters: + ---------- + feats : torch.Tensor + Features from the previous layer. + + Returns: + ------- + torch.Tensor + Log probabilities for the sequence. + """ + # T = self.max_seq_length + T = feats.shape[1] + batch_size = feats.shape[0] + + # alpha_recursion,forward, alpha(zt)=p(zt,bar_x_1:t) + log_alpha = torch.Tensor(batch_size, 1, self.num_labels).fill_(-10000.) + # normal_alpha_0 : alpha[0]=Ot[0]*self.PIs + # self.start_label has all of the score. it is log,0 is p=1 + log_alpha[:, 0, self.start_label_id] = 0 + + # feats: sentences -> word embedding -> lstm -> MLP -> feats + # feats is the probability of emission, feat.shape=(1,tag_size) + for t in range(1, T): + log_alpha = (self.log_sum_exp_batch(self.transitions + log_alpha.to(self.transitions.device), axis=-1) + feats[:, t]).unsqueeze(1) + + # log_prob of all barX + log_prob_all_barX = self.log_sum_exp_batch(log_alpha) + return log_prob_all_barX + + def _score_sentence(self, feats, label_ids) -> torch.Tensor: + """ + Gives the score of a provided label sequence + p(X=w1:t,Zt=tag1:t)=...p(Zt=tag_t|Zt-1=tag_t-1)p(xt|Zt=tag_t)... + Gives the score of a provided label sequence based on the CRF's parameters. + Parameters: + ---------- + feats : torch.Tensor + Features from the previous layer. + label_ids : torch.Tensor + Tensor of label IDs for the sequence. + + Returns: + ------- + torch.Tensor + Score for the provided label sequence. + """ + + T = feats.shape[1] + batch_size = feats.shape[0] + + batch_transitions = self.transitions.expand(batch_size, self.num_labels, self.num_labels) + batch_transitions = batch_transitions.flatten(1) + + score = torch.zeros((feats.shape[0], 1)).to(label_ids.device) + # the 0th node is start_label->start_word,the probability of them=1. so t begin with 1. + # label_ids = torch.where(label_ids == -100, torch.tensor(self.tag_to_ix[PAD_TOKEN]), label_ids) + + for t in range(1, T): + score = score + \ + batch_transitions.gather(-1, (label_ids[:, t] * self.num_labels + label_ids[:, t - 1]).view(-1, 1)) + \ + feats[:, t].gather(-1, label_ids[:, t].view(-1, 1)).view(-1, 1) + return score + + def _viterbi_decode(self, feats): + """ + Viterbi decoding algorithm to find the best path through the CRF's states. + Parameters: + ---------- + feats : torch.Tensor + Features from the previous layer. + + Returns: + ------- + torch.Tensor + Maximum log likelihood for the sequence. + torch.Tensor + The best path through the CRF's states. + """ + + T = feats.shape[1] + batch_size = feats.shape[0] + + log_delta = torch.Tensor(batch_size, 1, self.num_labels).fill_(-10000.).to(feats.device) + log_delta[:, 0, self.start_label_id] = 0 + + # psi is for the vaule of the last latent that make P(this_latent) maximum. + psi = torch.zeros((batch_size, T, self.num_labels), dtype=torch.long).to(feats.device) # psi[0]=0000 useless + for t in range(1, T): + # delta[t][k]=max_z1:t-1( p(x1,x2,...,xt,z1,z2,...,zt-1,zt=k|theta) ) + # delta[t] is the max prob of the path from z_t-1 to z_t[k] + log_delta, psi[:, t] = torch.max(self.transitions + log_delta, -1) + # psi[t][k]=argmax_z1:t-1( p(x1,x2,...,xt,z1,z2,...,zt-1,zt=k|theta) ) + # psi[t][k] is the path chosen from z_t-1 to z_t[k],the value is the z_state(is k) index of z_t-1 + log_delta = (log_delta + feats[:, t]).unsqueeze(1) + + # trace back + path = torch.zeros((batch_size, T), dtype=torch.long).to(feats.device) + + # max p(z1:t,all_x|theta) + max_logLL_allz_allx, path[:, -1] = torch.max(log_delta.squeeze(), -1) + + for t in range(T - 2, -1, -1): + # choose the state of z_t according the state choosed of z_t+1. + path[:, t] = psi[:, t + 1].gather(-1, path[:, t + 1].view(-1, 1)).squeeze() + + return max_logLL_allz_allx, path + + def neg_log_likelihood(self, encoder_outputs, label_ids): + """ + Computes the negative log likelihood for the given encoder outputs and label sequence. + + Parameters: + ---------- + encoder_outputs : torch.Tensor + Outputs from the encoder. + label_ids : torch.Tensor + Tensor of label IDs for the sequence. + + Returns: + ------- + torch.Tensor + Negative log likelihood. + """ + if self.to_tag_space == "transformer": + encoder_outputs = self.Transformer(encoder_outputs) + elif self.to_tag_space == "bilstm": + encoder_outputs, _ = self.BiLSTM(encoder_outputs) + + # transform to tag space + outputs = self.hidden2tag(encoder_outputs) + + forward_score = self._forward_alg(outputs) + # p(X=w1:t,Zt=tag1:t)=...p(Zt=tag_t|Zt-1=tag_t-1)p(xt|Zt=tag_t)... + gold_score = self._score_sentence(outputs, label_ids) + # - log[ p(X=w1:t,Zt=tag1:t)/p(X=w1:t) ] = - log[ p(Zt=tag1:t|X=w1:t) ] + return torch.mean(forward_score - gold_score) + + def forward(self, encoder_outputs): + """ + Forward computation to find the score and label sequence IDs, used for prediction. + + Parameters: + ---------- + encoder_outputs : torch.Tensor + Outputs from the encoder. + + Returns: + ------- + torch.Tensor + Score for the sequence. + torch.Tensor + Label sequence IDs for the sequence. + """ + if self.to_tag_space == "transformer": + encoder_outputs = self.Transformer(encoder_outputs) + elif self.to_tag_space == "bilstm": + encoder_outputs, _ = self.BiLSTM(encoder_outputs) + + # transform to tag space + encoder_outputs = self.hidden2tag(encoder_outputs) + + # Find the best path, given the features. + score, label_seq_ids = self._viterbi_decode(encoder_outputs) + return score, label_seq_ids + + @staticmethod + def log_sum_exp_batch(log_Tensor, axis=-1) -> torch.Tensor: # shape (batch_size,n,m) + """ + A static method that computes the log sum exponential operation for a given tensor along a specific axis. + + Parameters: + ---------- + log_Tensor : torch.Tensor + Input tensor. + axis : int, optional (default: -1) + Axis along which to compute the log sum exponential. + + Returns: + ------- + torch.Tensor + Result of the log sum exponential operation. + """ + return torch.max(log_Tensor, axis)[0] + torch.log( + torch.exp(log_Tensor - torch.max(log_Tensor, axis)[0].view(log_Tensor.shape[0], -1, 1)).sum(axis)) + diff --git a/combo/ner_modules/classifiers/VanillaClassifier.py b/combo/ner_modules/classifiers/VanillaClassifier.py new file mode 100644 index 0000000000000000000000000000000000000000..be762b83d7cdd0ead5d4d4fce8e4b6cb45594053 --- /dev/null +++ b/combo/ner_modules/classifiers/VanillaClassifier.py @@ -0,0 +1,59 @@ +import torch.nn as nn +from ..data.SpecialTokens import SpecialTokens + +PAD_TOKEN, START_TOKEN, END_TOKEN = SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +class VanillaClassifier(nn.Module): + """ + A customizable classifier that supports Linear, BiLSTM, or Transformer layers to map input features to a fixed + number of labels. + + Attributes: + ---------- + tag_mapper : torch.nn.Module, optional + Layer for mapping to the tag space. Can be LSTM, Transformer, or None (for a linear mapping). + linear : torch.nn.Linear + Linear layer mapping the input (or the output of tag_mapper) to the label space. + + """ + def __init__(self, + label_to_id_map: dict, + input_dim: int, + to_tag_space: str = "linear"): + """ + Parameters: + ---------- + label_to_id_map : dict + Mapping of labels to their respective unique identifiers + input_dim : int + Dimensionality of the input features. + to_tag_space : str, optional (default: "linear") + Specifies the tag space mapping, either "linear", "bilstm", or "transformer". + + """ + super(VanillaClassifier, self).__init__() + num_labels = len(label_to_id_map) - len({START_TOKEN, END_TOKEN, PAD_TOKEN} & set(label_to_id_map.keys())) + + if to_tag_space == "bilstm": + self.tag_mapper = nn.LSTM(input_dim, input_dim, bidirectional=True, batch_first=True) + self.linear = nn.Linear(in_features=input_dim * 2, out_features=num_labels) + elif to_tag_space == "transformer": + self.tag_mapper = nn.Transformer(input_dim, input_dim) + self.linear = nn.Linear(in_features=input_dim, out_features=num_labels) + else: # "linear" + self.tag_mapper = None + self.linear = nn.Linear(in_features=input_dim, out_features=num_labels) + + def forward(self, x): + """ + Parameters: + ---------- + x : torch.Tensor + Input tensor with features which should output of encoder on the word level. + """ + if self.tag_mapper: + x, _ = self.tag_mapper(x) + out = self.linear(x) + return out + diff --git a/combo/ner_modules/classifiers/__init__.py b/combo/ner_modules/classifiers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/data/NerDataset.py b/combo/ner_modules/data/NerDataset.py new file mode 100644 index 0000000000000000000000000000000000000000..83887aa64cec70aa641a94d578a1bb5d5624bbd0 --- /dev/null +++ b/combo/ner_modules/data/NerDataset.py @@ -0,0 +1,130 @@ +from torch.utils.data import Dataset +from pathlib import Path +from .SpecialTokens import SpecialTokens +from .NerTokenizer import NerTokenizer +from typing import Dict + +CHAR_PAD_TOKEN, CHAR_START_TOKEN, CHAR_END_TOKEN, CHAR_UNKNOWN_TOKEN, PAD_TOKEN, START_TOKEN, END_TOKEN = \ + SpecialTokens.CHAR_PAD_TOKEN, SpecialTokens.CHAR_START_TOKEN, SpecialTokens.CHAR_END_TOKEN, \ + SpecialTokens.CHAR_UNKNOWN_TOKEN, SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +class NerDataset(Dataset): + """ + Named Entity Recognition (NER) Dataset. + A PyTorch dataset class for reading and tokenizing NER data from a file. The file with data should adhere to the + following structure: + - Each line consists of an <entity> <tag> pair, delimited by a space. + - Consecutive lines represent words in the same sentence, in their original order. + - The start of a new sentence is indicated by a metadata line: `# sent_id = <n>`, where `<n>` is a unique + sentence identifier within the file. + - A newline character demarcates the end of each sentence. + + Attributes: + ---------- + tokenizer : NerTokenizer + Custom NerTokenizer object for encoding the entities. + sentences : List[List] + List of list of entities - strings. Inner list can be understood as sentence. + sentence_id : List[int] + List of unique sentence IDs. + labels : List[List] + List of lists of labels corresponding to each tokenized sentence. + max_sentence_len : int + Length of the longest sentence in the dataset. + max_word_len : int + Length of the longest word in the dataset. + + """ + + def __init__(self, + tokenizer: NerTokenizer, + file_path: Path, + encoding: str = 'utf-8') -> None: + """ + Initializes the dataset with a tokenizer, reads the data, and processes it. Tokenizer is expected to be a custom + NerTokenizer. Other tokenizers were not tested. + + Parameters: + ---------- + tokenizer : NerTokenizer + The tokenizer for encoding entities and labels. + file_path : Path + File path to the data. + encoding : str (optional) + Encoding of the data file. Defaults to 'utf-8'. + """ + # save tokenizer + self.tokenizer = tokenizer + + # data holders + self.sentences = [] + self.sentence_id = [] + self.labels = [] + + # additional information about data + self.max_sentence_len = 0 + self.max_word_len = 0 + + # read data + self._read_file(file_path=file_path, + encoding=encoding) + + def __len__(self) -> int: + return len(self.sentences) + + def __getitem__(self, + index: int) -> Dict: + # retrieve data + words = self.sentences[index] + labels = self.labels[index] + + # use tokenizer to encode it + output = self.tokenizer.encode(input_entities=words, + labels=labels) + return output + + def _read_file(self, + file_path: Path, + encoding: str) -> None: + """ + Reads the data file and processes it, populating sentences, labels, and other metadata. + + Parameters: + ---------- + file_path : Path + Path to the data file. + encoding : str + Encoding of the data file. + """ + # Reading file + with open(file_path, 'r', encoding=encoding) as f: + words = [] + labels = [] + + for line in f: + # at the end of each sequence of entities from the same sentence there should be "\n" + if line == '\n': + # update longest sentence + self.max_sentence_len = max(self.max_sentence_len, len(words)) + + # save labels and words + self.sentences.append(words) + self.labels.append(labels) + words = [] + labels = [] + + # save sentence id at the beginning of every sequence of entities + elif line.startswith("# sent_id ="): + self.sentence_id.append(line.split()[3]) + + # otherwise it should be entity and tag separated by space + else: + parts = line.strip().split() + words.append(parts[0]) + labels.append(parts[-1]) + + # update longest word length + self.max_word_len = max(self.max_word_len, len(parts[0])) + + self.max_word_len += 2 # for <b> and <e> tokens diff --git a/combo/ner_modules/data/NerTokenizer.py b/combo/ner_modules/data/NerTokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..001b7d9504819eb3bc6abf0a50c3d34a1a8aa299 --- /dev/null +++ b/combo/ner_modules/data/NerTokenizer.py @@ -0,0 +1,404 @@ +import torch +from transformers import BertTokenizer, AutoTokenizer +from typing import Dict, List, Optional +from .SpecialTokens import SpecialTokens +from ..utils.download import download_file +from lambo.segmenter.lambo import Lambo +from pathlib import Path +import json + +CHAR_PAD_TOKEN, CHAR_START_TOKEN, CHAR_END_TOKEN, CHAR_UNKNOWN_TOKEN, PAD_TOKEN, START_TOKEN, END_TOKEN = \ + SpecialTokens.CHAR_PAD_TOKEN, SpecialTokens.CHAR_START_TOKEN, SpecialTokens.CHAR_END_TOKEN, \ + SpecialTokens.CHAR_UNKNOWN_TOKEN, SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +class NerTokenizer: + """ + Named Entity Recognition (NER) Tokenizer. + + A tokenizer tailored for tokenizing entities and their corresponding labels. + This tokenizer provides the flexibility to encode characters, entities, and + labels. It can utilize pre-trained tokenizers, such as BERT and AutoTokenizer, + and augment them with custom tokenization techniques. Based on specification passed + in initialization it will return + + Attributes: + ---------- + backbone_tokenizer : transformers.PreTrainedTokenizer + The tokenizer from transformers BertTokenizer or AutoTokenizer. + char_to_id : Dict[str, int] + Mapping of characters to their respective unique identifiers. + label_to_id : Dict[str, int] + Mapping of labels to their respective unique identifiers. + max_word_len : int + Maximum length of word, used for padding. + use_start_end_token : bool + Indicates whether to append special start and end tokens to a sentence. + tokenize_entities : bool + Indicates whether to additionaly tokenize entities. If True, entities like "Doing" will be tokenized to "Do" + and "##ing" to fit BERT model's vocabulary. If False, entities will be left as they are which can result + in entities represented by BERT <unk> token. + use_char_level_embeddings : bool + Indicates whether to generate character-level embeddings. + + Methods: + -------- + encode(input_entities, labels=None) -> Dict: + Tokenizes and encodes input entities and their corresponding labels. + encode_sentence(words: List[str]) -> Tuple: + Encodes a sentence of words. + _encode_labels(labels: List[str], max_word_index: int) -> torch.Tensor: + Encodes labels into a tensor of label IDs. + _encode_characters(words: List[str], max_word_index: int) -> torch.Tensor: + Encodes characters of words into a tensor of character IDs. + _create_prediction_mask(words: List[str], max_word_index: int) -> List[int]: + Creates a prediction mask for the words. + """ + def __init__(self, + pretrained_model_type: str, + pretrained_model_name: str, + char_to_id_map: Dict[str, int] = None, + label_to_id_map: Dict[str, int] = None, + use_char_level_embeddings: bool = False, + use_start_end_token: bool = False, + tokenize_entities: bool = False, + language: str = "pl", + max_word_len: int = 69, + load_lambo_tokenizer: bool = True): + """ + Parameters: + ---------- + pretrained_model_type : str + Type of the pre-trained model to be used for tokenization (e.g., "BertModel", "AutoModel"). + pretrained_model_name : str + Name of the pre-trained model or path. + char_to_id_map : Dict[str, int], optional + Mapping of characters to unique IDs. + label_to_id_map : Dict[str, int], optional + Mapping of labels to unique IDs. + use_char_level_embeddings : bool, default=False + If True, character-level embeddings will be used. + use_start_end_token : bool, default=False + If True, special start and end tokens will be appended. + tokenize_entities : bool, default=False + If True, entities will be tokenized. + """ + + assert label_to_id_map, "Specify label_to_id_map" + assert char_to_id_map or not use_char_level_embeddings, \ + "Specify char_to_id_map if you want to use character embeddings" + + # define backbone tokenizer + if pretrained_model_type == "BertModel": + self.backbone_tokenizer = BertTokenizer.from_pretrained(pretrained_model_name) + elif pretrained_model_type == "AutoModel": + self.backbone_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name) + else: + raise ValueError("Not supported pretrained_model_type") + + # save mapping to ids + self.char_to_id = char_to_id_map + self.label_to_id = label_to_id_map + + # maximum word length + self.max_word_len = max_word_len + + # information about what needs to be returned + self.use_start_end_token = use_start_end_token + self.tokenize_entities = tokenize_entities + self.use_char_level_embeddings = use_char_level_embeddings + + # lambo segmenter + if load_lambo_tokenizer: + self.segmenter = Lambo.get(language, with_splitter=False) + + def encode(self, + input_entities, + labels: Optional[List[str]] = None, + batchify: bool = False): + """ + Tokenizes and encodes input entities and their corresponding labels if provided. If batchify is True returned + dictionary with tensors will be batchified. + """ + if isinstance(input_entities, list): + # If input is a list of entities which happens during training this encoding is done + input_ids, attention_mask, word_ids, max_word_index = self.encode_sentence(words=input_entities) + + output = dict( + input_ids=input_ids, + attention_mask=attention_mask) + + # if labels are provided we encode them + if labels: + label_ids = self._encode_labels(labels=labels, + max_word_index=max_word_index) + output["labels_ids"] = label_ids + + # when entities are not additionaly tokenized word_ids won't be provided + if word_ids is not None: + output["word_ids"] = word_ids + + # if character embeddings are used we encode words + if self.use_char_level_embeddings: + char_ids = self._encode_characters(words=input_entities, max_word_index=max_word_index) + output["char_ids"] = char_ids + + output["prediction_mask"] = self._create_prediction_mask(words=input_entities, + max_word_index=max_word_index) + + # when we use tokenizer for prediction we need to batchify data + if batchify: + output = {key: value.unsqueeze(0) for key, value in output.items()} + + return output + elif isinstance(input_entities, str): + # This is for custom prediction on strings. Segmentation using lambo has to be done before prediction + document = self.segmenter.segment(input_entities) + data = [] + entities = [] + for turn in document.turns: + for sentence in turn.sentences: + sentence_tokens = [token.text for token in sentence.tokens] + entities.append(sentence_tokens) + + # encoding + input_ids, attention_mask, word_ids, max_word_index = self.encode_sentence(words=sentence_tokens) + + output = dict( + input_ids=input_ids, + attention_mask=attention_mask) + + if word_ids is not None: + output["word_ids"] = word_ids + + if self.use_char_level_embeddings: + char_ids = self._encode_characters(words=sentence_tokens, max_word_index=max_word_index) + output["char_ids"] = char_ids + + output["prediction_mask"] = self._create_prediction_mask(words=sentence_tokens, + max_word_index=max_word_index) + data.append(output) + + batched_data = {key: torch.stack([d[key] for d in data]) for key in data[0].keys()} + return batched_data, entities + else: + # This is for custom prediction on strings + raise ValueError("Not supported input type") + + def encode_sentence(self, + words: List[str]): + """Transforms list of words/entities into a sequence of token ids, attentin mask for BERT model, mapping from + tokens to words and maximal word index. Also pads data and adds special tokens if needed.""" + word_ids = None + if not self.tokenize_entities: + # if entities are not to be tokenized we encode them using tokenizer at once + # this method will only work on BertTokenizer but not AutoTokenizer + encoded_sentence = self.backbone_tokenizer.encode_plus( + words, + add_special_tokens=self.use_start_end_token, + return_token_type_ids=False, + truncation=True, + max_length=self.backbone_tokenizer.model_max_length, + padding="max_length", + return_attention_mask=True, + return_tensors="pt" + ) + + max_word_index = self.backbone_tokenizer.model_max_length - 2 if self.use_start_end_token else \ + self.backbone_tokenizer.model_max_length + else: + # since tokenizers doesn't provide valid mapping from token to entity we need to do it manually + input_ids = [] + word_ids = [] + attention_mask = [] + + # if start and end tokens are used we need to subtract 2 from max length of entities in sentence + max_length = self.backbone_tokenizer.model_max_length - 2 if self.use_start_end_token else \ + self.backbone_tokenizer.model_max_length + + # enumerate words and encode them one by one + for i, word in enumerate(words): + word_tokenized_dict = self.backbone_tokenizer(word, add_special_tokens=False) + if len(input_ids) + len(word_tokenized_dict["input_ids"]) > max_length: + # there is no need to add more tokens because they will be truncated anyway + break + + if len(word_tokenized_dict["input_ids"]) == 0: + # dealing with strange tokenizers that return empty list + input_ids += [self.backbone_tokenizer.unk_token_id] + word_ids += [i] + attention_mask += [1] + else: + input_ids += word_tokenized_dict["input_ids"] + word_ids += [i] * len(word_tokenized_dict["input_ids"]) # mapping token -> word starting from 0 + attention_mask += word_tokenized_dict["attention_mask"] + + if self.use_start_end_token: + # adding start and end tokens + input_ids = [self.backbone_tokenizer.cls_token_id] + input_ids + [self.backbone_tokenizer.sep_token_id] + word_ids = [-1] + word_ids + [-1] # they are not words so we use -1 + attention_mask = [1] + attention_mask + [1] # attention should be applied to them + + # padding + input_ids += [self.backbone_tokenizer.pad_token_id] *\ + (self.backbone_tokenizer.model_max_length - len(input_ids)) + word_ids += [-1] * (self.backbone_tokenizer.model_max_length - len(word_ids)) # these are not words so we use -1 + attention_mask += [0] * (self.backbone_tokenizer.model_max_length - len(attention_mask)) + + encoded_sentence = dict( + input_ids=torch.tensor(input_ids), + attention_mask=torch.tensor(attention_mask) + ) + + max_word_index = max(word_ids) + 1 # word ids start from 0 + + if word_ids is not None: + word_ids = torch.tensor(word_ids) + + return encoded_sentence["input_ids"].squeeze(0), encoded_sentence["attention_mask"].squeeze(0), word_ids, \ + max_word_index + + def _encode_labels(self, + labels: List[str], + max_word_index: int): + """ + Encodes labels into a sequence of label ids and pads them + """ + # Encoding labels and truncating if needed + encoded_labels = [self.label_to_id[label] for label in labels[:max_word_index]] + + if self.use_start_end_token: + # adding start and end token if start and end tokens are used + encoded_labels = [self.label_to_id[START_TOKEN]] + encoded_labels + [self.label_to_id[END_TOKEN]] + + # Padding labels + encoded_labels += [self.label_to_id[PAD_TOKEN]] * (self.backbone_tokenizer.model_max_length - len(encoded_labels)) + + return torch.tensor(encoded_labels) + + def _encode_characters(self, + words: List[str], + max_word_index: int) -> torch.Tensor: + """ + Encodes the characters of each word into a sequence of character ids. + """ + # Splitting characters to create character ids + characters = [[self.char_to_id.get(char, self.char_to_id[CHAR_UNKNOWN_TOKEN]) for char in word] for word in + words[:max_word_index]] + + # adding <b> and <e> tokens and truncating if characters exceed max length + characters = [[self.char_to_id[CHAR_START_TOKEN]] + word[:self.max_word_len-2] + + [self.char_to_id[CHAR_END_TOKEN]] for word in characters] + + # Padding character sequence + characters = [word + [self.char_to_id[CHAR_PAD_TOKEN]] * + (self.max_word_len - len(word)) for word in characters] + + # Padding word sequence + # START token is used then add padding at the beginning + if self.use_start_end_token: + characters = [[self.char_to_id[CHAR_PAD_TOKEN]] * self.max_word_len] + characters + + characters += [[self.char_to_id[CHAR_PAD_TOKEN]] * self.max_word_len] * ( + self.backbone_tokenizer.model_max_length - len(characters)) + + return torch.tensor(characters) + + def _create_prediction_mask(self, + words: List[str], + max_word_index: int) -> torch.Tensor: + """ + Creates a prediction mask for the words so when predicting we can ignore the padding and special tokens. + """ + mask = [1]*min(len(words), max_word_index) + + if self.use_start_end_token: + mask = [0] + mask + [0] + + # padding + mask += [0] * (self.backbone_tokenizer.model_max_length - len(mask)) + return torch.tensor(mask) + + def decode(self, ids): + """ + Decode a list of IDs or list of lists of IDs to their corresponding labels. + """ + # if predicted tag is PAD or START or END we return O tag + id_to_label = {v: (k if k not in [PAD_TOKEN, END_TOKEN, START_TOKEN] else "O") + for k, v in self.label_to_id.items()} + + if len(ids) == 0: + return [] + + # Check if it's a list of lists + if isinstance(ids[0], list): + return [[id_to_label[i] for i in inner_list] for inner_list in ids] + else: + return [id_to_label[i] for i in ids] + + @classmethod + def get(cls, + model_name: str, + load_lambo_tokenizer : bool = True): + """ + Downloads and loads pretrained tokenizer from MinIO + """ + # Downloading files and loading jsons + PATH_CONFIG = Path(download_file(model_name, "config.json")) + config = json.load(open(PATH_CONFIG)) + + try: + PATH_CHAR_TO_ID = Path(download_file(model_name, "char_to_id.json")) + char_to_id = json.load(open(PATH_CHAR_TO_ID)) + except: + char_to_id = {} + + PATH_LABEL_TO_ID = Path(download_file(model_name, "label_to_id.json")) + label_to_id = json.load(open(PATH_LABEL_TO_ID)) + + # load tokenizer + tokenizer = NerTokenizer(pretrained_model_type=config["model"]["bert_embedder"]["pretrained_model_type"], + pretrained_model_name=config["model"]["bert_embedder"]["pretrained_model_name"], + char_to_id_map=char_to_id, + label_to_id_map=label_to_id, + use_char_level_embeddings=config["data"]["use_char_level_embeddings"], + use_start_end_token=config["data"]["use_start_end_token"], + tokenize_entities=config["data"]["tokenize_entities"], + language=config["data"].get("language", "pl"), + max_word_len=config["data"].get("max_word_len", 69), + load_lambo_tokenizer=load_lambo_tokenizer) + + return tokenizer + + @classmethod + def load_from_disc(cls, + folder_path: Path, + load_lambo_tokenizer : bool = True): + """ + Loads pretrained tokenizer from disc + """ + # Loading jsons + PATH_CONFIG = folder_path / "config.json" + config = json.load(open(PATH_CONFIG)) + + try: + PATH_CHAR_TO_ID = folder_path / "char_to_id.json" + char_to_id = json.load(open(PATH_CHAR_TO_ID)) + except: + char_to_id = {} + + PATH_LABEL_TO_ID = folder_path / "label_to_id.json" + label_to_id = json.load(open(PATH_LABEL_TO_ID)) + + # load tokenizer + tokenizer = NerTokenizer(pretrained_model_type=config["model"]["bert_embedder"]["pretrained_model_type"], + pretrained_model_name=config["model"]["bert_embedder"]["pretrained_model_name"], + char_to_id_map=char_to_id, + label_to_id_map=label_to_id, + use_char_level_embeddings=config["data"]["use_char_level_embeddings"], + use_start_end_token=config["data"]["use_start_end_token"], + tokenize_entities=config["data"]["tokenize_entities"], + language=config["data"].get("language", "pl"), + max_word_len=config["data"]["max_word_len"], + load_lambo_tokenizer=load_lambo_tokenizer) + return tokenizer diff --git a/combo/ner_modules/data/SpecialTokens.py b/combo/ner_modules/data/SpecialTokens.py new file mode 100644 index 0000000000000000000000000000000000000000..0f50e9fae47c6c8f6535e2f79236858fe760de2b --- /dev/null +++ b/combo/ner_modules/data/SpecialTokens.py @@ -0,0 +1,30 @@ +class SpecialTokens: + """ + SpecialTokens stores default token strings for various purposes. The class provides both word-level and + character-level special tokens. This class is rather used internally. + + Attributes + ---------- + START_TOKEN : str + Marks the beginning of a sentence. + END_TOKEN : str + Marks the end of a sentence. + PAD_TOKEN : str + Used for padding sequences to a consistent length. + CHAR_START_TOKEN : str + Marks the beginning of a character sequence. + CHAR_END_TOKEN : str + Marks the end of a character sequence. + CHAR_PAD_TOKEN : str + Used for padding character sequences to a consistent length. + CHAR_UNKNOWN_TOKEN : str + Represents out-of-vocabulary character. + """ + START_TOKEN = "[START]" + END_TOKEN = "[END]" + PAD_TOKEN = "[PAD]" + + CHAR_START_TOKEN = "<b>" + CHAR_END_TOKEN = "<e>" + CHAR_PAD_TOKEN = "<PAD>" + CHAR_UNKNOWN_TOKEN = "<unknown>" diff --git a/combo/ner_modules/data/__init__.py b/combo/ner_modules/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/data/utils.py b/combo/ner_modules/data/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6dc069324b7a4d17b13ac3cdc034a75b2831a8f3 --- /dev/null +++ b/combo/ner_modules/data/utils.py @@ -0,0 +1,134 @@ +from collections import Counter +from pathlib import Path +from typing import Dict +from .SpecialTokens import SpecialTokens + +CHAR_PAD_TOKEN, CHAR_START_TOKEN, CHAR_END_TOKEN, CHAR_UNKNOWN_TOKEN, PAD_TOKEN, START_TOKEN, END_TOKEN = \ + SpecialTokens.CHAR_PAD_TOKEN, SpecialTokens.CHAR_START_TOKEN, SpecialTokens.CHAR_END_TOKEN, \ + SpecialTokens.CHAR_UNKNOWN_TOKEN, SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +def create_tag2id(file_path: Path, + encoding: str = 'utf-8', + include_special_tokens: bool = False) -> Dict[str, int]: + """ + Generate a unique ID mapping for tags sourced from a specified file. The file should adhere to the following + structure: + 1. Each line consists of an <entity> <tag> pair, delimited by a space. + 2. Consecutive lines represent words in the same sentence, in their original order. + 3. The start of a new sentence is indicated by a metadata line: `# sent_id = <n>`, where `<n>` is a unique + sentence identifier within the file. + 4. A newline character demarcates the end of each sentence. + + Parameters: + ---------- + file_path : Path + Path to the file containing words and tags. + encoding : str, optional + Encoding of the file. Defaults to 'utf-8'. + include_special_tokens : bool, optional + If true, special tokens such as START and END are added to the mapping. Defaults to False. + + Returns: + ------- + tag2id : Dict[str, int] + A dictionary mapping tags to unique IDs. + """ + tag2id = set() + + # Reading the file + with open(file_path, 'r', encoding=encoding) as f: + for line in f: + # Omit spaces between sentences and sentence ID annotations + if line == '\n' or line.startswith("# sent_id ="): + continue + # Otherwise, the line should be a word-entity pair separated by space + else: + tag = line.strip().split(" ")[-1] + if tag != "O": + # Add both beginning and inside tags for entities + assert len(tag.split("-")) == 2, "Tags should be in format <entity>-<tag>, too many dashes detected" + tag2id.add("B-"+tag.split("-")[1]) + tag2id.add("I-" + tag.split("-")[1]) + else: + tag2id.add(tag) + + # Convert the set to a dictionary with sorted tags mapped to unique IDs + tag2id = {tag: i for i, tag in enumerate(sorted(tag2id))} + tag2id[PAD_TOKEN] = len(tag2id) + if include_special_tokens: + tag2id[START_TOKEN] = len(tag2id) + tag2id[END_TOKEN] = len(tag2id) + + return tag2id + + +def create_char2id(file_path: Path, + encoding: str = "utf-8", + discard_least_common: float = 0.0005) -> Dict[str, int]: + """ + Create a mapping from characters in the file to unique IDs, potentially discarding the least common characters. File + format is expected to be the same as indicated in create_tag2id() documentation + + Parameters: + ---------- + file_path : Path + Path to the file containing words and tags. + encoding : str, optional + Encoding of the file. Defaults to 'utf-8'. + discard_least_common : float, optional + Proportion of total characters to discard from the mapping based on their frequency. Defaults to 0.0005. + + Returns: + ------- + char2id : Dict[str, int] + A dictionary mapping characters to unique IDs. + """ + + char2id = {CHAR_PAD_TOKEN, CHAR_START_TOKEN, CHAR_END_TOKEN, CHAR_UNKNOWN_TOKEN} + char_counter = Counter() + + # Reading the file + with open(file_path, 'r', encoding=encoding) as f: + for line in f: + # Omit spaces between sentences and sentence ID annotations + if line == '\n' or line.startswith("# sent_id ="): + continue + # Otherwise, the line should be a word-entity pair separated by space + else: + word = line.strip().split()[0] + char_counter.update([char for char in word]) + + total_chars = sum(char_counter.values()) + cutoff = total_chars * discard_least_common # Calculate the cutoff frequency + + sorted_chars = sorted(char_counter.items(), key=lambda x: x[1]) # Sort characters by frequency + + # Discard the least common characters based on the cutoff + current_count = 0 + for char, count in sorted_chars: + current_count += count + if current_count > cutoff: + char2id.add(char) + + # Convert the set to a dictionary mapped to unique IDs + char2id = {char: i for i, char in enumerate(char2id)} + return char2id + + +def calculate_longest_word(file_path: Path, + encoding: str = "utf-8") -> int: + """Extract maximum character length of a word from a file.""" + max_length = 0 + # Reading the file + with open(file_path, 'r', encoding=encoding) as f: + for line in f: + # Omit spaces between sentences and sentence ID annotations + if line == '\n' or line.startswith("# sent_id ="): + continue + # Otherwise, the line should be a word-entity pair separated by space + else: + word = line.strip().split()[0] + max_length = max(max_length, len(word)) + + return max_length diff --git a/combo/ner_modules/embedders/BertEmbedder.py b/combo/ner_modules/embedders/BertEmbedder.py new file mode 100644 index 0000000000000000000000000000000000000000..c33aec1ef3d0626ed2c7c2d52ab3fe2627e78310 --- /dev/null +++ b/combo/ner_modules/embedders/BertEmbedder.py @@ -0,0 +1,111 @@ +from .TokenPooling import TokenPooling +import torch.nn as nn +import torch +from transformers import AutoModel, BertModel + + +class BertEmbedder(nn.Module): + """ + Embeds text using a BERT model and provides additional options for token pooling and projections. + + This class wraps either an AutoModel or BertModel from the Hugging Face transformers library, allowing optional + projection and pooling layers on top of the BERT embeddings. The pooling strategy and other options can be + customized based on the parameters provided. + + Attributes: + ---------- + bert : transformers.AutoModel or transformers.BertModel + The BERT model from transformers that's either an AutoModel or BertModel, depending on the specified + pretrained_model_type. + projection_layer : torch.nn.Linear, optional + A linear layer to project the BERT embeddings to a different dimension. Only initialized if projection_dimension + is provided. + token_pooling : TokenPooling, optional + A token pooling layer to pool tokens based on the specified strategy. Only initialized if token_pooling is set + to True. + output_dimension : int + The dimension of the output embeddings, determined by either the BERT hidden size or the projection_dimension + if specified. + + """ + def __init__(self, + pretrained_model_name: str, + pretrained_model_type: str, + projection_dimension: int = None, + freeze_bert: bool = True, + token_pooling: bool = True, + use_start_end_token: bool = False, + pooling_strategy: str = "max") -> None: + """ + Parameters: + ---------- + pretrained_model_name : str + The name of the pre-trained BERT model to be loaded (e.g., 'bert-base-uncased'). + pretrained_model_type : str + The type of BERT model to be used ('AutoModel' or 'BertModel'). + projection_dimension : int, optional + The dimension for the projection layer applied to BERT embeddings. If specified, a linear layer + is initialized for the projection. (default: None) + freeze_bert : bool, optional + Whether to freeze the parameters of the BERT model, disabling further training. (default: True) + token_pooling : bool, optional + Whether to initialize a token pooling layer based on the specified strategy. (default: True) + use_start_end_token : bool, optional + Whether to use start and end tokens in token pooling. Only relevant if token_pooling is True.(default: False) + pooling_strategy : str, optional + The strategy to be used for token pooling if token_pooling is set to True. (default: "max") + """ + assert pretrained_model_type in ['AutoModel', 'BertModel'], \ + 'pretrained_model_type must be either BertModel or AutoModel' + + super(BertEmbedder, self).__init__() + + # initialize bert model + if pretrained_model_type == 'AutoModel': + self.bert = AutoModel.from_pretrained(pretrained_model_name) + elif pretrained_model_type == 'BertModel': + self.bert = BertModel.from_pretrained(pretrained_model_name) + + # freeze bert model + if freeze_bert: + for param in self.bert.parameters(): + param.requires_grad = False + + # initialize projection layer if projection dimension is given + if projection_dimension: + self.projection_layer = nn.Linear(in_features=self.bert.config.hidden_size, + out_features=projection_dimension) + self.output_dimension = projection_dimension + else: + self.output_dimension = self.bert.config.hidden_size + self.projection_layer = None + + # initialize token pooling layer if specified + if token_pooling: + self.token_pooling = TokenPooling(pooling_strategy=pooling_strategy, + use_start_end_token=use_start_end_token) + else: + self.token_pooling = None + + def forward(self, + input_ids: torch.Tensor, + attention_mask: torch.Tensor, + word_ids: torch.Tensor = None) -> torch.Tensor: + """ + Performs the forward pass on the given inputs and returns the corresponding BERT embeddings, with optional + token pooling and projection + """ + outputs = self.bert(input_ids, + attention_mask=attention_mask) + outputs = outputs.last_hidden_state + + # apply token pooling if specified + if self.token_pooling is not None: + outputs = self.token_pooling(bert_outputs=outputs, + word_ids=word_ids) + + # apply projection layer if specified + if self.projection_layer is not None: + outputs = self.projection_layer(outputs) + + return outputs diff --git a/combo/ner_modules/embedders/ComboCharEmbedder.py b/combo/ner_modules/embedders/ComboCharEmbedder.py new file mode 100644 index 0000000000000000000000000000000000000000..d40d6508d4b7469f161106a231514f2a68d28d78 --- /dev/null +++ b/combo/ner_modules/embedders/ComboCharEmbedder.py @@ -0,0 +1,115 @@ +import torch +from torch import nn +import torch.nn.functional as F + + +class ComboCharEmbedder(nn.Module): + """ + A character embedding model that utilizes convolutional layers to generate embeddings + for sequences of characters in words. This model architecture for word character embeddings is + the same as in COMBO (https://aclanthology.org/2021.emnlp-demo.7.pdf). + + Attributes: + ---------- + embeddings : nn.Embedding + The character embedding layer. + conv1 : nn.Conv1d + The first convolutional layer. + conv2 : nn.Conv1d + The second convolutional layer. + conv3 : nn.Conv1d + The third convolutional layer. + output_dimension : int + The dimension of the output embeddings. + + Methods: + ------- + forward(batch_char_ids: torch.Tensor) -> torch.Tensor: + Forward pass to generate embeddings for the given batch of character sequences. + """ + def __init__(self, + vocab_size: int, + char_embedding_dim: int, + padding_idx: int = 0): + """ + Initialize the ComboCharEmbedder model. + + Parameters: + ---------- + vocab_size : int + Size of the character vocabulary including special tokens. + char_embedding_dim : int + Dimension of the output embeddings. + padding_idx : int, optional + The index of the padding token in the vocabulary. Defaults to 0. + """ + super(ComboCharEmbedder, self).__init__() + + # character embeddings + self.embeddings = nn.Embedding(num_embeddings=vocab_size, + embedding_dim=char_embedding_dim, + padding_idx=padding_idx) + + # convolutional layers + self.conv1 = nn.Conv1d(in_channels=char_embedding_dim, + out_channels=512, + kernel_size=3, + stride=1, + padding=1, + dilation=1) + self.conv2 = nn.Conv1d(in_channels=512, + out_channels=256, + kernel_size=3, + stride=1, + padding=2, + dilation=2) + self.conv3 = nn.Conv1d(in_channels=256, + out_channels=char_embedding_dim, + kernel_size=3, + stride=1, + padding=4, + dilation=4) + + # save output dimension + self.output_dimension = char_embedding_dim + + def forward(self, batch_char_ids): + """ + Perform a forward pass through the model. After convolutions max pooling across character embeddings is + performed. + + Parameters: + ---------- + batch_char_ids : torch.Tensor + A tensor of character IDs for a batch of sequences (words). Input shape: batch_size x words x characters. + + Returns: + ------- + torch.Tensor + A tensor of embeddings for each sequence in the batch. + """ + # embed characters + x = self.embeddings(batch_char_ids) + stacked_embeddings = [] # Empty list to store stacked word embeddings + + # Iterate over sentences + for i in range(x.shape[0]): + # sentence representation + # transforms to words x embedding_dim x characters_length + sentence = x[i].permute(0, 2, 1) + + # passing through convolutions + sentence = F.relu(self.conv1(sentence)) + sentence = F.relu(self.conv2(sentence)) + sentence = self.conv3(sentence) + + # max pooling across dimensions for whole sequence + # output: words_number x char_embedding_dim + word_embeddings = torch.max(sentence, dim=2)[0] + stacked_embeddings.append(word_embeddings) # Append word embeddings to the list + + # combining results + # output batch_size x words x char_embedding_dim + stacked_embeddings = torch.stack(stacked_embeddings, dim=0) + + return stacked_embeddings diff --git a/combo/ner_modules/embedders/TokenPooling.py b/combo/ner_modules/embedders/TokenPooling.py new file mode 100644 index 0000000000000000000000000000000000000000..67cae6cf89a615e63aa4bda23c34d2305ab9e5ca --- /dev/null +++ b/combo/ner_modules/embedders/TokenPooling.py @@ -0,0 +1,90 @@ +import torch +from torch import nn + + +class TokenPooling(nn.Module): + """ + TokenPooling Module. + + This module is used for pooling the token embeddings obtained from BERT models. + It provides two pooling strategies: max and mean. It is designed to transform token + embeddings into word embeddings by applying the selected pooling strategy over the + tokens of each word. The class also supports the option to account for special start + and end tokens. + + Attributes: + ----------- + pooling_strategy : str, optional + The strategy to pool token embeddings. Must be one of ['max', 'mean']. Default is 'max'. + use_start_end_token : bool, optional + Whether to consider special start and end tokens in pooling. Default is False. + + Methods: + -------- + forward(bert_outputs: torch.Tensor, word_ids: torch.Tensor) -> torch.Tensor: + Applies the specified pooling strategy on the token embeddings and returns the word embeddings. + """ + def __init__(self, + pooling_strategy: str = 'max', + use_start_end_token: bool = False): + super(TokenPooling, self).__init__() + + assert pooling_strategy in ['max', 'mean'], "Pooling strategy must be one of ['max', 'mean']" + + self.pooling_strategy = pooling_strategy + self.use_start_end_token = use_start_end_token + + def forward(self, + bert_outputs: torch.Tensor, + word_ids: torch.Tensor) -> torch.Tensor: + """ + Forward method for pooling token embeddings. + This method pools the token embeddings into word embeddings using the selected pooling strategy + ('max' or 'mean'). If use_start_end_token is set to True, the method will consider special start + and end tokens during pooling. + + Parameters + ---------- + bert_outputs: torch.Tensor + Token embeddings of shape (batch_size, max_seq_len, hidden_size). + word_ids: torch.Tensor + Word ID mapping of shape (batch_size, max_seq_len). + + Returns + ---------- + torch.Tensor + Pooled word embeddings of shape (batch_size, max_seq_len, hidden_size). + """ + # Max pooling of token embeddings to obtain word embeddings + # bert_outputs which are token embeddings: (batch_size, max_seq_len, hidden_size) + # word_ids: (batch_size, max_seq_len) + + # info whether to move word embeddings idx + # first token may be [CLS] + if self.use_start_end_token: + word_ids = word_ids + 1 + + word_ids = word_ids.unsqueeze(2).expand_as(bert_outputs) + word_ids = word_ids.type(torch.int64) + pooled = torch.zeros(bert_outputs.size(0), word_ids.max() + 1, bert_outputs.size(2)).to(bert_outputs.device) + + if self.pooling_strategy == "max": + pooled = pooled.scatter_reduce(1, word_ids, bert_outputs, reduce="amax", include_self=False) + elif self.pooling_strategy == "mean": + pooled = pooled.scatter_reduce(1, word_ids, bert_outputs, reduce="mean", include_self=False) + + if self.use_start_end_token: + pooled[:, 0, :] = 0 + # maybe adding cls and sep token embeddings are needed here + + # expanding with zeros so dimensions are maintained + rows_to_add = bert_outputs.shape[1] - pooled.shape[1] + zeros_tensor = torch.zeros(bert_outputs.shape[0], rows_to_add, bert_outputs.shape[2]).to(bert_outputs.device) + + # word_embeddings are pooled embeddings of tokens across word indexes + # fot other tokens like START, END, PAD they are 0s + # word_embeddings: (batch_size, max_seq_len, hidden_size) + bert_outputs = torch.cat((pooled, zeros_tensor), dim=1) + + return bert_outputs + diff --git a/combo/ner_modules/embedders/__init__.py b/combo/ner_modules/embedders/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/loss/FocalLoss.py b/combo/ner_modules/loss/FocalLoss.py new file mode 100644 index 0000000000000000000000000000000000000000..688efdf2a81fbf1d4f0a09fbc45cf2f2cf7889c1 --- /dev/null +++ b/combo/ner_modules/loss/FocalLoss.py @@ -0,0 +1,157 @@ +from typing import Optional, Sequence +import torch +from torch import Tensor +from torch import nn +from torch.nn import functional as F + + +class FocalLoss(nn.Module): + """ Focal Loss, as described in https://arxiv.org/abs/1708.02002. + + It is essentially an enhancement to cross entropy loss and is + useful for classification tasks when there is a large class imbalance. + x is expected to contain raw, unnormalized scores for each class. + y is expected to contain class labels. + + Shape: + - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0. + - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0. + + Attributes: + ---------- + alpha : Tensor, optional + Weights for each class. Default is None. + gamma : float + A constant, as described in the referenced paper. Default is 0. + ignore_index : int + Class label to ignore. Default is -100. + reduction : str + Specifies the reduction to apply to the output: 'mean', 'sum', or 'none'. + nll_loss : torch.nn.modules.loss.NLLLoss + Negative log likelihood loss. + + Methods: + ------- + forward(x: Tensor, y: Tensor) -> Tensor + Compute the focal loss for the given inputs. + + """ + + def __init__(self, + alpha: Optional[Tensor] = None, + gamma: float = 2., + reduction: str = 'mean', + ignore_index: int = -100): + """Constructor. + + Parameters: + ---------- + alpha : Tensor, optional + Weights for each class. Default is None. + gamma : float, optional + A constant, as described in the referenced paper. Default is 0. + reduction : str, optional + Specifies the reduction to apply to the output: 'mean', 'sum', or 'none'. Default is 'mean'. + ignore_index : int, optional + Class label to ignore. Default is -100. + """ + if reduction not in ('mean', 'sum', 'none'): + raise ValueError( + 'Reduction must be one of: "mean", "sum", "none".') + + super().__init__() + self.alpha = alpha + self.gamma = gamma + self.ignore_index = ignore_index + self.reduction = reduction + + self.nll_loss = nn.NLLLoss( + weight=alpha, reduction='none', ignore_index=ignore_index) + + def __repr__(self): + arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction'] + arg_vals = [self.__dict__[k] for k in arg_keys] + arg_strs = [f'{k}={v!r}' for k, v in zip(arg_keys, arg_vals)] + arg_str = ', '.join(arg_strs) + return f'{type(self).__name__}({arg_str})' + + def forward(self, x: Tensor, y: Tensor) -> Tensor: + if x.ndim > 2: + # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C) + c = x.shape[1] + x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c) + # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,) + y = y.view(-1) + + unignored_mask = y != self.ignore_index + y = y[unignored_mask] + if len(y) == 0: + return torch.tensor(0.) + x = x[unignored_mask] + + # compute weighted cross entropy term: -alpha * log(pt) + # (alpha is already part of self.nll_loss) + log_p = F.log_softmax(x, dim=-1) + ce = self.nll_loss(log_p, y) + + # get true class column from each row + all_rows = torch.arange(len(x)) + log_pt = log_p[all_rows, y] + + # compute focal term: (1 - pt)^gamma + pt = log_pt.exp() + focal_term = (1 - pt)**self.gamma + + # the full loss: -alpha * ((1 - pt)^gamma) * log(pt) + loss = focal_term * ce + + if self.reduction == 'mean': + loss = loss.mean() + elif self.reduction == 'sum': + loss = loss.sum() + + return loss + + +def focal_loss(alpha: Optional[Sequence] = None, + gamma: float = 2., + reduction: str = 'mean', + size: int = 10, + ignore_index: int = -100, + device='cpu', + dtype=torch.float32) -> FocalLoss: + """Factory function for FocalLoss. + + Parameters: + ---------- + alpha : Sequence, optional + Weights for each class. If provided, it will be converted to a Tensor. Default is None. + gamma : float, optional + A constant, as described in the referenced paper. Default is 0. + reduction : str, optional + Specifies the reduction to apply to the output: 'mean', 'sum', or 'none'. Default is 'mean'. + size : int, optional + Number of classes. + ignore_index : int, optional + Class label to ignore. Default is -100. + device : str, optional + Device to move the alpha parameter to. Default is 'cpu'. + dtype : torch.dtype, optional + Data type to cast the alpha parameter to. Default is torch.float32. + + Returns: + ------- + FocalLoss + A configured FocalLoss object. + """ + if alpha is not None: + if not isinstance(alpha, Tensor): + alpha = torch.full((size,), alpha) + alpha = alpha.to(device=device, dtype=dtype) + + fl = FocalLoss( + alpha=alpha, + gamma=gamma, + reduction=reduction, + ignore_index=ignore_index) + return fl diff --git a/combo/ner_modules/loss/__init__.py b/combo/ner_modules/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/metrics/MetricHolder.py b/combo/ner_modules/metrics/MetricHolder.py new file mode 100644 index 0000000000000000000000000000000000000000..ceebb623ff8dcd28146e13f7701629917e126977 --- /dev/null +++ b/combo/ner_modules/metrics/MetricHolder.py @@ -0,0 +1,124 @@ +from ..data.SpecialTokens import SpecialTokens +from seqeval.metrics import f1_score, recall_score, precision_score, classification_report + + +START_TOKEN, END_TOKEN, PAD_TOKEN = SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN, SpecialTokens.PAD_TOKEN + + +class MetricHolder: + """ + A utility class to hold and manage metrics related to sequence labeling tasks. Built in methods are provided to + print recall, F1 and precision scores as well as classification report. + + Attributes: + ---------- + targets : list + A list to store the true labels of sequences. + predictions : list + A list to store the predicted labels of sequences. + label_to_idx : dict + A dictionary to map label names to their respective integer indices. + idx_to_label : dict + A dictionary to map integer indices to their respective label names. + ignore_idx : int + Index of the token to be ignored during metric calculation. Default is -100. + """ + def __init__(self, + label_to_idx: dict = None, + idx_to_label: dict = None, + ignore_idx: int = -100): + """ + Initialize a MetricHolder instance. + + Parameters: + ---------- + label_to_idx : dict + A dictionary to map label names to their respective integer indices. + idx_to_label : dict + A dictionary to map integer indices to their respective label names. + ignore_idx : int + Index of the token to be ignored during metric calculation. Default is -100. This should be the index of + tokens like start, end and padding tokens. + """ + self.targets = [] + self.predictions = [] + + assert label_to_idx or idx_to_label, "Specify label_to_idx or idx_to_label" + if label_to_idx: + self.label_to_idx = label_to_idx + self.idx_to_label = {idx: label for label, idx in label_to_idx.items()} + else: + self.idx_to_label = idx_to_label + self.label_to_idx = {label: idx for idx, label in idx_to_label.items()} + + self.ignore_idx = ignore_idx + + def update(self, predictions, targets) -> None: + """ + Update the internal targets and predictions with new data. + + Parameters: + ---------- + predictions (list): A list of predicted labels for sequences. + targets (list): A list of true labels for sequences. + """ + # flatten predictions and targets + predictions = predictions.tolist() + targets = targets.tolist() + + # Delete padded values + predictions = [[y_hat for y, y_hat in zip(y_sentence,y_sentence_hat) if y != self.ignore_idx] for y_sentence, y_sentence_hat in zip(targets, predictions)] + targets = [[y for y in y_sentence if y != self.ignore_idx] for y_sentence in targets] + + # Translate to labels + predictions = [[self.idx_to_label[y_hat] for y_hat in y_sentence_hat] for y_sentence_hat in predictions] + # Fixing issue with some predictions being end or pad tag to by default be O tag + predictions = [[y_hat if y_hat not in [END_TOKEN, PAD_TOKEN, START_TOKEN] else "O" for y_hat in y_sentence_hat] + for y_sentence_hat in predictions] + targets = [[self.idx_to_label[y] for y in y_sentence] for y_sentence in targets] + + # Update targets and predictions + self.targets += targets + self.predictions += predictions + + def reset(self) -> None: + """ + Reset the internal targets and predictions to empty lists. + """ + self.targets = [] + self.predictions = [] + + def get_classification_report(self) -> str: + """ + Retrieve a detailed classification report for the current targets and predictions. + + Returns: + ---------- + str: The classification report in tabular format in string. + """ + report = classification_report(self.targets, + self.predictions, + zero_division=1) + return report + + def get_metrics(self, + stage: str = None) -> dict: + """ + Calculate and retrieve key metrics for the current targets and predictions. + + Parameters: + ---------- + stage : str + The name of the current stage (e.g., 'train', 'val', 'test') to be used as a prefix for metric keys. + + Returns: + ---------- + dict : A dictionary containing the precision, recall, and F1-score. + """ + assert stage is not None, "Specify stage" + + return { + f'{stage}_precision': precision_score(self.targets, self.predictions, average="micro", zero_division=1), + f'{stage}_recall': recall_score(self.targets, self.predictions, average="micro", zero_division=1), + f'{stage}_f1': f1_score(self.targets, self.predictions, average="micro", zero_division=1) + } diff --git a/combo/ner_modules/metrics/__init__.py b/combo/ner_modules/metrics/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/utils/__init__.py b/combo/ner_modules/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/combo/ner_modules/utils/constructors.py b/combo/ner_modules/utils/constructors.py new file mode 100644 index 0000000000000000000000000000000000000000..b020b26086ac644068104cf2d4174a54745ce07a --- /dev/null +++ b/combo/ner_modules/utils/constructors.py @@ -0,0 +1,134 @@ +from pytorch_lightning.callbacks.early_stopping import EarlyStopping +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint +from ..callbacks.FixedProgressBar import FixedProgressBar +from typing import Dict +import torch +from ..data.NerTokenizer import NerTokenizer +from ..NerDataModule import NerDataModule +from pathlib import Path +from ..loss.FocalLoss import focal_loss +from ..data.SpecialTokens import SpecialTokens + +PAD_TOKEN, START_TOKEN, END_TOKEN = SpecialTokens.PAD_TOKEN, SpecialTokens.START_TOKEN, SpecialTokens.END_TOKEN + + +def construct_tokenizer_from_config(config: Dict, + char_to_id_map: Dict, + label_to_id_map: Dict) -> NerTokenizer: + """ + Constructs a Named Entity Recognition tokenizer from a configuration dictionary. + + Parameters: + ----------- + config : Dict + Configuration dictionary containing all necessary parameters to initialize the tokenizer. + char_to_id_map : Dict + Mapping of characters to their respective unique identifiers. + label_to_id_map : Dict + Mapping of labels to their respective unique identifiers. + + Returns: + -------- + NerTokenizer + Initialized NER tokenizer object. + """ + tokenizer = NerTokenizer(pretrained_model_type=config["model"]["bert_embedder"]["pretrained_model_type"], + pretrained_model_name=config["model"]["bert_embedder"]["pretrained_model_name"], + char_to_id_map=char_to_id_map, + label_to_id_map=label_to_id_map, + use_char_level_embeddings=config["data"]["use_char_level_embeddings"], + use_start_end_token=config["data"]["use_start_end_token"], + tokenize_entities=config["data"]["tokenize_entities"], + language=config["data"].get("language", "pl"), + max_word_len=config["data"].get("max_word_len", 69)) + return tokenizer + + +def construct_data_module_from_config(config: Dict, + tokenizer: NerTokenizer) -> NerDataModule: + """ + Constructs a Named Entity Recognition data module for PyTorch Lightning usage from a configuration dictionary. + + Parameters: + ----------- + config : Dict + Configuration dictionary containing all necessary parameters to initialize the data module. + tokenizer : NerTokenizer + NER tokenizer to be used for tokenization. It should be custom tokenizer from this repository. + + Returns: + -------- + NerDataModule + Initialized NER data module object. + """ + datamodule = NerDataModule(path_data=Path(config["data"]["path_data"]), + tokenizer=tokenizer, + batch_size=config["data"]["batch_size"], + encoding=config["data"]["encoding"], + num_workers=config["data"]["num_workers"]) + return datamodule + + +def construct_loss_from_config(config: Dict, + label_to_id: Dict): + """ + Constructs a loss function based on the given configuration dictionary. + + Parameters: + ----------- + config : Dict + Configuration dictionary containing the loss configuration. + label_to_id : Dict + Mapping of labels to their respective unique identifiers. + + Returns: + -------- + loss + Loss function, which could be "crf", CrossEntropyLoss, or a custom focal loss. + + """ + if config["model"]["classifier"]["type"] == "crf": + loss = "crf" + elif config["loss"] == "ce": + loss = torch.nn.CrossEntropyLoss() + elif config["loss"] == "focal": + size = len(label_to_id) - len({PAD_TOKEN, START_TOKEN, END_TOKEN} & set(label_to_id.keys())) + loss = focal_loss(size=size) + else: + raise ValueError(f"Loss {config['loss']} not supported") + + return loss + + +def construct_callbacks_from_config(config_callbacks: Dict) -> list: + """ + Constructs a list of callbacks based on the given configuration dictionary. Supports following callbacks + EarlyStopping, LearningRateMonitor, ModelCheckpoint, and FixedProgressBar. + + Parameters: + ----------- + config_callbacks : Dict + Configuration dictionary containing the callback configuration. + + Returns: + -------- + list + List of initialized callback objects. + """ + callbacks = [] + for callback in config_callbacks: + if callback == "EarlyStopping": + callbacks.append(EarlyStopping(**config_callbacks[callback])) + elif callback == "LearningRateMonitor": + callbacks.append(LearningRateMonitor(**config_callbacks[callback])) + elif callback == "ModelCheckpoint": + pass + elif callback == "FixedProgressBar": + callbacks.append(FixedProgressBar()) + else: + raise Exception(f"Callback {config_callbacks[callback]} not supported") + # adding checkpoint callback at the end + if "ModelCheckpoint" in config_callbacks: + callbacks.append(ModelCheckpoint(**config_callbacks["ModelCheckpoint"])) + + return callbacks diff --git a/combo/ner_modules/utils/default_config.py b/combo/ner_modules/utils/default_config.py new file mode 100644 index 0000000000000000000000000000000000000000..babc0ea4ec21813d0a61240376f4e5625898e990 --- /dev/null +++ b/combo/ner_modules/utils/default_config.py @@ -0,0 +1,52 @@ +default_config = { + "data": { + "path_data": "./data/pl/", + "use_char_level_embeddings": True, + "use_start_end_token": True, + "tokenize_entities": True, + "batch_size": 32, + "encoding": "utf-8", + "num_workers": 1 + }, + + "model": { + "bert_embedder": { + "pretrained_model_name": "allegro/herbert-base-cased", + "pretrained_model_type": "AutoModel", + "projection_dimension": None, + "freeze_bert": True, + "token_pooling": True, + "pooling_strategy": "max" + }, + "char_embedder": {"type" : "combo", + "char_embedding_dim": 64 + }, + "classifier": {"type" : "crf", + "to_tag_space" : "bilstm"}, + "dropout": 0.1 + }, + + "loss": "ce", + "learning_rate": 0.001, + + "callbacks": {"FixedProgressBar": True, + "LearningRateMonitor": {"logging_interval":"epoch"}, + "ModelCheckpoint": {"monitor": "validation_f1", + "mode": "max", + "save_top_k": 1, + "save_weights_only": True, + "filename" : "best_model"}, + "EarlyStopping": {"monitor": "validation_f1", + "mode": "max", + "min_delta": 0.001, + "patience": 6}}, + + "trainer": {"devices": [0], + "max_epochs": 50, + "accelerator": "cuda", + "log_every_n_steps": 10} +} + + +class Config: + default_config = default_config diff --git a/combo/ner_modules/utils/download.py b/combo/ner_modules/utils/download.py new file mode 100644 index 0000000000000000000000000000000000000000..df26816c007d2dc9eb965e0ddb452cdcc66eb0a5 --- /dev/null +++ b/combo/ner_modules/utils/download.py @@ -0,0 +1,73 @@ +import errno +import logging +import os +import requests +import tqdm +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +logger = logging.getLogger(__name__) + +_URL = "http://s3.clarin-pl.eu/dspace/combo/ner/{model_name}/{file_name}" +_HOME_DIR = os.getenv("HOME", os.curdir) +_CACHE_DIR = os.path.join(_HOME_DIR, ".tmp_ner_model") + + +def download_file(model_name, file_name, force=False): + _make_cache_dir(model_name) + url = _URL.format(model_name=model_name, file_name=file_name) + local_filename = url.split("/")[-1] + local_filename = os.path.join(model_name, local_filename) + location = os.path.join(_CACHE_DIR, local_filename) + + if os.path.exists(location) and not force: + logger.debug("Using cached model.") + return location + + chunk_size = 1024 + + logger.info(f"Downloading {file_name} from {url}") + + try: + with _requests_retry_session(retries=2).get(url, stream=True, timeout=600) as r: + pbar = tqdm.tqdm(unit="B", total=int(r.headers.get("content-length")), + unit_divisor=chunk_size, unit_scale=True) + with open(location, "wb") as f: + for chunk in r.iter_content(chunk_size=chunk_size): + f.write(chunk) + pbar.update(len(chunk)) + pbar.close() + + except requests.exceptions.RequestException as e: + logger.error(f"Request error: {e}") + raise + except Exception as e: + logger.error(f"An error occurred: {e}") + raise + + return location + + +def _make_cache_dir(model_name): + cache_dir_model = os.path.join(_CACHE_DIR, model_name) + os.makedirs(cache_dir_model, exist_ok=True) + logger.info(f"Cache directory created at {cache_dir_model}") + + +def _requests_retry_session( + retries=3, + backoff_factor=0.3, + status_forcelist=(500, 502, 504), +): + session = requests.Session() + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session \ No newline at end of file diff --git a/combo/ner_modules/utils/utils.py b/combo/ner_modules/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6eef9dcddc3af7e8ffeecb8a2bf478eb7befb615 --- /dev/null +++ b/combo/ner_modules/utils/utils.py @@ -0,0 +1,175 @@ +from typing import Dict +import warnings +import string +import random +from .default_config import Config +import torch +import warnings + + +def fix_common_warnings(float_precision="medium"): + """Fix common warnings that are not relevant to the user""" + # Fixing + # You are using a CUDA device (<graphic-card>) that has Tensor Cores.To properly utilize them, you should set ... + torch.set_float32_matmul_precision(float_precision) + + # Fixing warning about number of workers being too small. + warnings.filterwarnings("ignore", ".*does not have many workers.*") + + # Fixing warning about depreciation of tensorboard + warnings.filterwarnings("ignore", ".*`tensorboardX` has been removed as a dependency*") + + +def move_tensors_to_device(tensor_dict, device): + """Move all tensors in a dict to a device""" + return {key: tensor.to(device) for key, tensor in tensor_dict.items()} + + +def generate_random_string(length=16): + """Generate a random string of given length""" + letters = string.ascii_lowercase + return ''.join(random.choice(letters) for i in range(length)) + + +def custom_formatwarning(msg, *args, **kwargs): + """Custom warning formatter""" + # ignore everything except the message + return "WARNING: " + str(msg) + '\n' + + +def check_config_constraints(config) -> Dict: + """Check if the config is valid according to the constraints and fixes config if possible""" + POSSIBLE_LOSSES = ["crf", "ce", "focal"] + + warnings.formatwarning = custom_formatwarning + + # loading default config + default_config = Config.default_config + + data_config = config.get("data", {}) + model_config = config.get("model", {}) + trainer_config = config.get("trainer", {}) + + # Check if learning rate is specified + if "learning_rate" not in config.keys(): + warnings.warn(f"Learning rate is not specified") + if "loss" not in config.keys(): + warnings.warn(f"Loss is not specified") + else: + if config["loss"] not in POSSIBLE_LOSSES: + raise Exception(f"Loss is not in {POSSIBLE_LOSSES}") + + # Check if the model config is valid + if model_config: + # check bert embedder + bert_embedder_config = model_config.get("bert_embedder", {}) + bert_embedder_config_keys = bert_embedder_config.keys() + if "pretrained_model_name" not in bert_embedder_config_keys: + raise Exception("Specify pretrained_model_name in model config") + + if "pretrained_model_type" not in bert_embedder_config_keys: + raise Exception("Specify pretrained_model_type in model config") + + if "freeze_bert" not in bert_embedder_config_keys: + default_value = default_config["model"]["bert_embedder"]["freeze_bert"] + warnings.warn(f"No freeze_bert specified, using default value : {default_value}") + bert_embedder_config["freeze_bert"] = default_value + + if "token_pooling" not in bert_embedder_config_keys: + default_value = default_config["model"]["bert_embedder"]["token_pooling"] + warnings.warn(f"No token_pooling specified, using default value - {default_value}") + bert_embedder_config["token_pooling"] = default_value + + if bert_embedder_config["token_pooling"]: + warnings.warn("Token pooling is enabled, setting data-tokenize_entities to True") + data_config["tokenize_entities"] = True + else: + warnings.warn("Token pooling is disables, setting data-tokenize_entities to False") + data_config["tokenize_entities"] = False + + if "pooling_strategy" not in bert_embedder_config_keys and bert_embedder_config["token_pooling"]: + default_value = default_config["model"]["bert_embedder"]["pooling_strategy"] + warnings.warn(f"No pooling_strategy specified, using default value: {default_value}") + bert_embedder_config["pooling_strategy"] = default_value + else: + if bert_embedder_config["token_pooling"] and bert_embedder_config["pooling_strategy"] not in ["max", "mean"]: + raise Exception("Specify pooling_strategy in model config - possible values [max, mean]") + + config["model"]["bert_embedder"] = bert_embedder_config + + # check char_embedder + char_embedder_config = model_config.get("char_embedder", {}) + char_embedder_config_keys = char_embedder_config.keys() + if char_embedder_config: + if char_embedder_config.get("type", "-") not in ["combo", "contextual"]: + raise Exception("Specify char_embedder type in model config - possible values [combo, contextual]") + + if "char_embedding_dim" not in char_embedder_config_keys: + default_value = default_config["model"]["char_embedder"]["char_embedding_dim"] + warnings.warn(f"No char_embedding_dim specified, using default value {default_value}") + char_embedder_config["char_embedding_dim"] = default_value + + warnings.warn("Character embedder architecture specified, setting data-use_char_level_embeddings to True") + data_config["use_char_level_embeddings"] = True + else: + warnings.warn("Character embedder architecture not specified, setting data-use_char_level_embeddings to False") + data_config["use_char_level_embeddings"] = False + + config["model"]["char_embedder"] = char_embedder_config + + # check classifier + classifier_config = model_config.get("classifier", {}) + classifier_config_keys = classifier_config.keys() + if classifier_config: + if classifier_config.get("type", "-") not in ["crf", "vanilla"]: + raise Exception("Specify classifier type in model config - possible values [crf, vanilla]") + if classifier_config.get("to_tag_space", "-") not in ["linear", "bilstm", "transformer"]: + raise Exception("Specify transformation to tag space in model config - possible values [linear, bilstm, transformer]") + else: + raise Exception("Specify model classifier in model config") + + else: + raise Exception("Specify model architecture in model config") + + # Check if the data config is valid + data_config_keys = data_config.keys() + if "use_char_level_embeddings" not in data_config_keys: + default_value = default_config["data"]["use_char_level_embeddings"] + warnings.warn(f"No use_char_level_embeddings specified, using default value {default_value}") + data_config["use_char_level_embeddings"] = default_value + + if "use_start_end_token" not in data_config_keys: + default_value = default_config["data"]["use_start_end_token"] + warnings.warn(f"No use_start_end_token specified, using default value {default_value}") + data_config["use_start_end_token"] = default_value + + if "tokenize_entities" not in data_config_keys: + default_value = default_config["data"]["tokenize_entities"] + warnings.warn(f"No tokenize_entities specified, using default value {default_value}") + data_config["tokenize_entities"] = default_value + + if "batch_size" not in data_config_keys: + default_value = default_config["data"]["batch_size"] + warnings.warn(f"No batch_size specified, using default value {default_value}") + data_config["batch_size"] = default_value + + if "encoding" not in data_config_keys: + default_value = default_config["data"]["encoding"] + warnings.warn(f"No encoding specified, using default value {default_value}") + data_config["encoding"] = default_value + + if "num_workers" not in data_config_keys: + default_value = default_config["data"]["num_workers"] + warnings.warn(f"No num_workers specified, using default value {default_value}") + data_config["num_workers"] = default_value + + config["data"] = data_config + + # Check if the training config is valid + if trainer_config: + pass + else: + warnings.warn("No trainer config provided, using default trainer config") + config["trainer"] = default_config["trainer"] + + return config diff --git a/combo/predict.py b/combo/predict.py index 3b8b81a9b7ac4bfb5917d5aef162bd4e0bf7fbb3..3feacb6071d9231df02014aa3fe6f5b133b5cb07 100644 --- a/combo/predict.py +++ b/combo/predict.py @@ -22,6 +22,12 @@ from combo.utils import download, graph from combo.modules.model import Model from combo.data import Token +from combo.ner_modules.NerModel import NerModel +from combo.ner_modules.data.NerTokenizer import NerTokenizer +from pathlib import Path +from combo.ner_modules.utils.utils import move_tensors_to_device + + logger = logging.getLogger(__name__) @@ -32,7 +38,9 @@ class COMBO(PredictorModule): model: Model, dataset_reader: DatasetReader, batch_size: int = 1024, - line_to_conllu: bool = True) -> None: + line_to_conllu: bool = True, + ner_model: str = None) -> None: + super().__init__(model, dataset_reader) self.batch_size = batch_size self.vocab = model.vocab @@ -42,6 +50,13 @@ class COMBO(PredictorModule): self.without_sentence_embedding = False self.line_to_conllu = line_to_conllu + # Ner modules + self.ner_model = None + self.ner_tokenizer = None + if ner_model is not None: + self._load_ner_model(ner_model) + + def __call__(self, sentence: Union[str, List[str], List[List[str]], List[data.Sentence]], **kwargs): """Depending on the input uses (or ignores) tokenizer. When model isn't only text-based only List[data.Sentence] is possible input. @@ -101,6 +116,8 @@ class COMBO(PredictorModule): for sentences_batch in util.lazy_groups_of(sentences, self.batch_size): sentences_batch = self.predict_batch_json(sentences_batch) result.extend(sentences_batch) + if self.ner_model is not None: + result = self._predict_ner_tags(result) return result elif isinstance(example, data.Sentence): result = [] @@ -108,6 +125,8 @@ class COMBO(PredictorModule): for sentences_batch in util.lazy_groups_of(sentences, self.batch_size): sentences_batch = self.predict_batch_instance(sentences_batch) result.extend(sentences_batch) + if self.ner_model is not None: + result = self._predict_ner_tags(result) return result else: raise ValueError("List must have either sentences as str, List[str] or Sentence object.") @@ -282,7 +301,9 @@ class COMBO(PredictorModule): def from_pretrained(cls, path: str, batch_size: int = 1024, - cuda_device: int = -1): + cuda_device: int = -1, + ner_model: str = None): + if os.path.exists(path): model_path = path else: @@ -296,4 +317,30 @@ class COMBO(PredictorModule): archive = load_archive(model_path, cuda_device=cuda_device) model = archive.model dataset_reader = archive.dataset_reader or default_ud_dataset_reader(archive.config.get("model_name")) - return cls(model, dataset_reader, batch_size) + return cls(model, dataset_reader, batch_size, ner_model=ner_model) + + def _load_ner_model(self, + ner_model: str = None): + try: + self.ner_model = NerModel.get(ner_model).to(self.device) + self.ner_tokenizer = NerTokenizer.get(ner_model, + load_lambo_tokenizer=False) + except: + logger.debug(f"No {ner_model} found in repository. Trying local path.") + self.ner_model = NerModel.load_from_disc(folder_path=Path(ner_model)).to(self.device) + self.ner_tokenizer = NerTokenizer.load_from_disc(folder_path=Path(ner_model), + load_lambo_tokenizer=False) + + + def _predict_ner_tags(self, + result: List[data.Sentence]) -> List[data.Sentence]: + """Enriches predictions with NER tags.""" + for sentence in result: + entities = [token.text for token in sentence.tokens] + input_data = self.ner_tokenizer.encode(entities, batchify=True) + input_data = move_tensors_to_device(input_data, self.ner_model.device) + preds = self.ner_model.predict(input_data) + for token, pred in zip(sentence.tokens, self.ner_tokenizer.decode(preds)[0]): + token.ner_tag = pred + return result + diff --git a/docs/NerBuilder.png b/docs/NerBuilder.png new file mode 100644 index 0000000000000000000000000000000000000000..a3387e44433a53a782ce088c3782a2eaa6a28f98 Binary files /dev/null and b/docs/NerBuilder.png differ diff --git a/docs/Ner_config_files.md b/docs/Ner_config_files.md new file mode 100644 index 0000000000000000000000000000000000000000..ad7f50ec04fa1afeb2e131990b2ded08fa372b52 --- /dev/null +++ b/docs/Ner_config_files.md @@ -0,0 +1,111 @@ +# Config file documentation + +Ideally config file should contain as much information as possible so it is easier to track experiments. In this file description of keys is provided. +Config file format is a `.json` file with nested dictionaries. First level keys are: +- `data` - containes information about data processing +- `model` - nested dictionary with information about model architecture +- `loss` - it is a string that specifies loss function. Can be either `ce`, `focal` or `crf` +- `learning_rate` - it is a float that specifies learning rate used in training +- `callbacks` - containes names of callbacks used in training +- `trainer` + +Defalut values are values which will be set if following code is run. +```python +from combo.ner_modules.utils.utils import check_config_constraints +config = check_config_constraints(config) +``` + +--- +`data` + +It is required for this dictionary to contain path_data unless training is done using [train.py](..%2Ftrain.py) and data is passed as an argument. + +Default values: + +```json + "data": { + "path_data": "./data/pl/", + "use_char_level_embeddings": true, + "use_start_end_token": true, + "tokenize_entities": true, + "batch_size": 32, + "encoding": "utf-8", + "num_workers": 1 + } +``` +- `use_start_end_token` [optional bool]- if true, start and end token will be added to each sentence +- `tokenize_entities` [optional bool] - if true, entities will be tokenized by backbone tokenizer. It is advised to set it to true. +- `encoding` [optional str] - encoding of data files + +--- +`model` + +Specifies model architecture. It has up to 4 keys: +- `bert_embedder` [required dict] +- `char_embedder` [optional dict] - if not specified, no character level embeddings will be used +- `classifier` [required dict] +- `dropout` [optional float] - dropout probability applied to BERT embedder output + +Example values for `bert_embedder`: +```json +"bert_embedder": { + "projection_dimension": null, + "freeze_bert": true, + "token_pooling": true, + "pooling_strategy": "max" +``` +- `pretrained_model_name` [required] is a string of HuggingFace Bert model +- `pretrained_model_type` [required] is a string of HuggingFace Bert model type should either `AutoModel` or `BertModel` +- `projection_dimension` [optional int] if specified, output of BERT will be projected to this dimension +- `freeze_bert` [optional bool] if true, BERT weights will be frozen during training +- `pooling_strategy` [optional str] if `token_pooling` is true, this key specifies pooling strategy. Can be either `max` or `mean` + +Example values for `char_embedder`: +```json +"char_embedder": {"type" : "combo", + "char_embedding_dim": 64 + } +``` +- `type` [required str] specifies type of character level embeddings. Currently, it supports only `combo`. But `contextual` is planned to be added. +- `char_embedding_dim` [optional int] specifies dimension of character level embeddings. +- `context_window` [optional int] if `type` is `contextual`, this key specifies context window size. To be added in future version. + +Example values for `classifier`: +```json +"classifier": {"type" : "crf", + "to_tag_space" : "bilstm"}, +``` +- `type` [required str] specifies type of classifier. Can be either `crf` or `vanilla` +- `to_tag_space` [required str] specifies type of layer that transforms output of BERT and possibly character embedder to tag space. Can be either `bilstm`, `transformer` or `linear` + +--- +`callbacks` + +Each key of callbacks dict is another callback used in training, despite `FixedProgressBar` which is a fix to common problem with progress bar in PyTorch Lightning, each callback is a callback implemented in PyTorch lightning library. Values of other thatn `FixedProgressBar` callbacks are dictionaries with arguments passed to callback. By default there is no callbacks used in training. + +Example values: +```json +"callbacks": {"FixedProgressBar": true, + "LearningRateMonitor": {"logging_interval":"epoch"}, + "ModelCheckpoint": {"monitor": "validation_f1", + "mode": "max", + "save_top_k": 1, + "save_weights_only": true, + "filename" : "best_model"}, + "EarlyStopping": {"monitor": "validation_f1", + "mode": "max", + "min_delta": 0.001, + "patience": 6}} +``` +--- +`trainer` + +Similarly to `callbacks`, each key of `trainer` dict is a parameter of PyTorch Lightning trainer. + +Deafult values are: +```json +"trainer": {"devices": [0], + "max_epochs": 50, + "accelerator": "cuda", + "log_every_n_steps": 10} +``` \ No newline at end of file diff --git a/docs/Ner_docs.md b/docs/Ner_docs.md new file mode 100644 index 0000000000000000000000000000000000000000..58323150e54a7f06d6f98492ac1138e7f6e2eb77 --- /dev/null +++ b/docs/Ner_docs.md @@ -0,0 +1,275 @@ +# Table of Contents + +1. [Data format](#data-format) +2. [Choose architecture](#choose-architecture) +3. [Config files](#config-files) +4. [Scripts](Scripts) + +# Data format + +To train a model or evaluate an existing one, the expected input consists of text files: +- `train.txt` +- `dev.txt` +- `test.txt` + +These files should be located in the same directory and will be fed into the model during training. + +The expected file format is as follows: +- Each line consists of a pair `<token> <tag>`, separated by a space. +- Consecutive lines represent the entities/words to be tagged in the same sentence, in their original order. +- The beginning of a new sentence is indicated by a metadata line: `# sent_id = <n>`, where `<n>` is the unique identifier for the sentence in the file. +- A newline character signifies the end of each sentence. +- Currently supported tags follow the [IOB format](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)). + +Example of train.txt file: + +```buildoutcfg +# sent_id = 0 +SOCCER O +- O +JAPAN B-LOC +GET O +LUCKY O +WIN O +, O +CHINA B-LOC +IN O +SURPRISE O +DEFEAT O +. O + +# sent_id = 1 +Nadim B-PER +Ladki I-PER +... +``` +Example training files can be also found in root directory ```/notebooks/example_data``` + +# Choose architecture + +When training new model from scratch user is expected to choose architecture of the model. It is however recommended to use following architecture: +- BertEmbedder = Bert Model + Mean Token Pooling +- CharacterEmbedder = Combo character embedder +- Classifier = CRF + BiLSTM + +It can be created using following part of config: +```python +"data": { + "use_char_level_embeddings": true, + "use_start_end_token": true, + "tokenize_entities": true, + ... # other parameters + } +... +"model": { + "bert_embedder": { + "pretrained_model_name": "allegro/herbert-base-cased", # or any other mode from huggingface + "pretrained_model_type": "AutoModel", + "projection_dimension": None, + "freeze_bert": True, + "token_pooling": True, + "pooling_strategy": "max" + }, + "char_embedder": {"type" : "combo", + "char_embedding_dim": 64 + }, + "classifier": {"type" : "crf", + "to_tag_space" : "bilstm"}, + "dropout": 0.1 + }, +... +``` + + + +The image above provides an overview of possible architectures for NER. Each architecture comprises four main components that can be customized either through configuration files or directly during instantiation: + +`NerTokenizer` Module for preparing data for training. When creating an instance, specify: +- the backbone tokenizer corresponding to the BERT model in the BERT embedder +- the mapping between character and id and between tag and id. +- whether start and end tokens should be added to sentence which is required if CRF layer is used. +- whether entities should be tokenised before the back bone BERT model. I.e. whether the entity as input to the BERT model is to be additionally tokenised. If so, the token pooling strategy should be defined for the BERT embbedder. +- whether character level embeddings will be used +- language in the case of string handling. This is the language of the LAMBO model used to segment the input string. + +`Character Embedder` A module for creating entity representations based on letters. Currently supported is Character Embedder such as in the COMBO model. + +`Bert Embedder`A module that creates entity vector representations of a given dimension as the last layer of the BERT model. When additional tokenisation of entities is requested, this module aggregates token vectors to the entity level via max pooling or mean pooling. + +`Classifier` A module that concatenates vector representations from BERT and optionally Character Embedder through the BiLSTM or Transformer layer and transforms them to tag space using the Linear layer or CRF layer. + +# Config files + +For quick prototyping, model creation and training parameter settings can be set using configuration files in JSON format. +Template for building config files +``` +{ + "data": { + "path_data": "./data/pl/", + "use_char_level_embeddings": true, + "use_start_end_token": true, + "tokenize_entities": true, + "batch_size": 32, + "encoding": "utf-8", + "num_workers": 1 + }, + + "model": { + "bert_embedder": { + "pretrained_model_name": "allegro/herbert-base-cased", + "pretrained_model_type": "AutoModel", + "projection_dimension": null, + "freeze_bert": true, + "token_pooling": true, + "pooling_strategy": "max" + }, + "char_embedder": {"type" : "combo", + "char_embedding_dim": 64 + }, + "classifier": {"type" : "crf", + "to_tag_space" : "bilstm"}, + "dropout": 0.1 + }, + + "loss": "ce", + "learning_rate": 0.001, + + "callbacks": {"FixedProgressBar": true, + "LearningRateMonitor": {"logging_interval":"epoch"}, + "ModelCheckpoint": {"monitor": "validation_f1", + "mode": "max", + "save_top_k": 1, + "save_weights_only": true, + "filename" : "best_model"}, + "EarlyStopping": {"monitor": "validation_f1", + "mode": "max", + "min_delta": 0.001, + "patience": 6}}, + + "trainer": {"devices": [0], + "max_epochs": 50, + "accelerator": "cuda", + "log_every_n_steps": 10} +} + +``` +The configuration file should contain nested dictionaries detailing the parameters for various modules and hyperparameters. Refer to +[Config Files documentation](Ner_config_files.md) for more information how to build valid config file. + +Most parameters are self-explanatory, but some require additional notes: + +- `"model"-"bert_embedder"-"pretrained_model_name"` specifies the name of the model from the Hugging Face library. +- `"model"-"bert_embedder"-"pretrained_model_type"` is the class used for loading the model. Currently, two types are supported: BertModel or AutoModel. +- `"model"-"bert_embedder"-"projection_dimension"` specifies the desired dimension of the output vectors from the Bert Embedder. +- `"model"-"bert_embedder"-"pooling_strategy"` can take either 'mean' or 'max' values. It defines the type of pooling applied to the output vectors from the Bert Embedder to obtain word representations. This requires both token_pooling and tokenize_entities to be set to true. +- `"model"-"char_embedder"-"type`" defines how to obtain word representations based on characters. Currently, it takes either 'combo' or 'contextualized' values. In both cases, `char_embedding_dim` should be specified. For the 'contextualized' option, you'll also need to specify how many characters are considered by adjusting the `context_window` parameter. +- `"model"-"classifier"-"type"` can take one of two values: crf or vanilla, which defines the last layer of the model. Additionally, you'll need to specify `to_tag_space`, which outlines additional layers in the classifier module. It can take values like transformer, bilstm, or linear. +- `"model"-"dropout"` specifies the dropout value applied to the output vectors from the Bert Embedder. +- `loss` takes either 'ce' for Cross Entropy Loss or 'Focal' for Focal Loss. If the classifier type is CRF, this parameter is omitted. +- `callbacks` are callbacks used during training along with their parameters. Supported callbacks are LearningRateMonitor, ModelCheckpoint, EarlyStopping, and FixedProgressBar, which fixes a bug related to the progress bar on some terminals. + +# Scripts + +There is possibility to train and evaluate model directly from terminal. You have access to 3 scripts: +- [find_lr.py](..%2Fner_scripts%2Ffind_lr.py) - script for finding optimal learning rate +##### Arguments +`--config_path` The path to the JSON configuration file that defines various settings for model and training. This argument is required. + +Example: `--config_path="./config.json"` + +`--data_path:` The path to the data directory. If this argument is not provided, the data path is taken from the configuration file. + +Example: `--data_path="./data/"` + +`--check_config` A flag to enable additional configuration validation. If this flag is provided, the script will perform additional checks on the configuration settings. + +Example: `--check_config` + +##### Example usage +```python +python find_lr.py --config_path="./config.json" --check_config +``` + +- [train.py](..%2Fner_scripts%2Ftrain.py) - script for training model +##### Arguments +`--config_path` Path to the configuration file for training the model. + +Default: `./configs/default_config.json` + +Example: `--config_path="./configs/my_config.json"` + +`--n_reruns` Number of times the model training should be rerun. + +Default: `1` + +Example: `--n_reruns=5` + +`--data_path` Path to the data directory. If not provided, it is taken from the configuration file. + +Example: `--data_path="./data/"` + +`--serialization_dir` Directory where the model should be saved. + +Default: `./models/` + +Example: `--serialization_dir="./my_models/"` + +`--check_config` A flag to indicate whether to check the configuration constraints. + +Example: `--check_config` + +`--use_wandb_logger` A flag to specify whether to use Weights and Biases (wandb) for logging. Otherwise tensorboard logger will be used. + +Example: `--use_wandb_logger` + +`--wandb_project_name` Name of the Weights and Biases (wandb) project for logging. + +Default: `NER_ipi_pan3` + +Example: `--wandb_project_name="My_NER_Project"` + +##### Example usage +```python +python train.py --config_path="./configs/my_config.json" --n_reruns=3 +``` + +- [eval.py](..%2Fner_scripts%2Feval.py) - script for evaluating model. Model path is considered to represent model as whole. It should contain: +`best_model.ckpt` - model weights, `char_to_id.json` if model uses character embeddings, `config.json` which was created during training, and `label_to_id.json` which is mapping from tags to ids. + +##### Arguments +`--config_path` Path to the configuration file for training the model. + +`--model_path`: The path to the folder containing the pre-trained NER model and its associated files. + +Default: `./models/pl_example` + +Example: `--model_path="./models/my_pretrained_model"` + +`--data_file_path`: The path to the dataset file for prediction. + +Default: `./data/pl/test.txt` + +Example: `--data_file_path="./data/custom_test.txt"` + +`--device`: The computational device for prediction. Use -1 for CPU. Any other integer will correspond to a specific CUDA device. + +Default: `0` + +Example: `--device=1` + +`--batch_size`: The number of data points processed in each batch during prediction. + +Default: `24` + +Example: `--batch_size=32` + +`--encoding`: The encoding used to read the dataset file. + +Default: `utf-8` + +Example: `--encoding="ascii"` + +##### Example usage +```python +python eval.py --model_path="./models/custom_model" --data_file_path="./data/custom_test.txt" --device=-1 +``` diff --git a/ner_scripts/eval.py b/ner_scripts/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..1055dd6b93f09b19bea544e160b02399c071b186 --- /dev/null +++ b/ner_scripts/eval.py @@ -0,0 +1,112 @@ +""" +This script facilitates the evaluation of prediction process for a pre-trained Named Entity Recognition (NER) model. +At the end of evaluation it prints classification report and global metrics. + +Arguments: +--------- +--model_path : str, optional (default: "./models/pl_example") +Path to the folder containing the pre-trained NER model and associated files. +--data_file_path : str, optional (default: "./data/pl/test.txt") +Path to the dataset file that needs to be fed to the model for prediction. +--device : int, optional (default: 0) +The computational device to be used. '-1' signifies CPU usage. Any other integer denotes the specific CUDA device. +--batch_size : int, optional (default: 24) +The number of data points to be processed in each batch during prediction. +--encoding : str, optional (default: "utf-8") +Encoding format used to read the dataset file. +""" +import argparse +from pathlib import Path +import json +import torch +from combo.ner_modules.NerModel import NerModel +from combo.ner_modules.data.NerDataset import NerDataset +from torch.utils.data import DataLoader +from combo.ner_modules.metrics.MetricHolder import MetricHolder +from tqdm import tqdm +from combo.ner_modules.utils.constructors import construct_loss_from_config, construct_tokenizer_from_config +from combo.ner_modules.utils.utils import fix_common_warnings + +# Argument parsing +parser = argparse.ArgumentParser() +parser.add_argument("--model_path", action="store", default="./models/pl_example", help="Path folder with model") +parser.add_argument("--data_file_path", action="store", default="./data/pl/test.txt", help="Path to file with data") +parser.add_argument("--device", action="store", default=0, help="Specify device") +parser.add_argument("--batch_size", action="store", default=24, help="Specify device") +parser.add_argument("--encoding", action="store", default="utf-8", help="Specify device") + + +args = parser.parse_args() +PATH_MODEL = Path(args.model_path) +PATH_DATA_FILE = Path(args.data_file_path) + + +def move_tensors_to_device(tensor_dict, device): + return {key: tensor.to(device) for key, tensor in tensor_dict.items()} + + +if __name__ == "__main__": + fix_common_warnings() + + # load jsons + config = json.load(open(PATH_MODEL / "config.json")) + try: + char_to_id = json.load(open(PATH_MODEL / "char_to_id.json")) + except: + char_to_id = {} + label_to_id = json.load(open(PATH_MODEL / "label_to_id.json")) + + # specify device + if int(args.device) == -1: + device = torch.device("cpu") + else: + device = torch.device(f"cuda:{args.device}") + + # define model + loss_fn = construct_loss_from_config(config=config, + label_to_id=label_to_id) + + # load model + model = NerModel.load_from_checkpoint(PATH_MODEL / "best_model.ckpt", + loss_fn=loss_fn, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id, + config=config) + model.to(device) + + # create dataset & dataloader + tokenizer = construct_tokenizer_from_config(config=config, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id) + + dataset = NerDataset(file_path=PATH_DATA_FILE, + tokenizer=tokenizer, + encoding=args.encoding) + + dataloader = DataLoader(dataset, + batch_size=args.batch_size, + shuffle=False) + + # predicting + metrics_holder = MetricHolder(label_to_idx=label_to_id) + model.freeze() + i = 0 + for batch in tqdm(dataloader): + batch = move_tensors_to_device(batch, device) + model.test_step(batch, i) + i += 1 + + print("#" * 50) + print("Classification report") + print(model.test_metrics_holder.get_classification_report()) + print("General metrics") + metrics = model.test_metrics_holder.get_metrics(stage="eval") + for metric in metrics: + print(f"{metric}: {metrics[metric]}") + + + + + + + diff --git a/ner_scripts/find_lr.py b/ner_scripts/find_lr.py new file mode 100644 index 0000000000000000000000000000000000000000..60764c38bc2e055bae3a66abc8f627ed75b4811a --- /dev/null +++ b/ner_scripts/find_lr.py @@ -0,0 +1,95 @@ +""" +This script is designed for finding learning rate for specified architecture of a Named Entity Recognition (NER) model +It loads configuration settings from a JSON file and performs a learning rate search using a PyTorch Lightning learning +rate finder. At the end of the script, it prints the suggested learning rate obtained from the learning rate finder. + +Arguments: +---------- +--config_path : str + Path to the configuration file in JSON format. +--data_path : str, optional + Path to the data directory. If provided, it overrides the data path in the configuration. +--check_config : bool, optional + Flag to check the validity of the configuration. +""" +from pathlib import Path +import pytorch_lightning as pl +from pytorch_lightning.tuner import Tuner +import json +import argparse +from combo.ner_modules.data.utils import create_tag2id, create_char2id, calculate_longest_word +from combo.ner_modules.NerModel import NerModel +from combo.ner_modules.utils.utils import check_config_constraints +from combo.ner_modules.utils.constructors import construct_loss_from_config, construct_tokenizer_from_config, construct_data_module_from_config +import torch +from combo.ner_modules.utils.utils import fix_common_warnings + +torch.set_float32_matmul_precision("medium") # to make lightning happy + +# Argument parsing +parser = argparse.ArgumentParser() +parser.add_argument("--config_path", action="store", default="", help="Path to config file") +parser.add_argument("--data_path", action="store", default="", help="Path to data, if not provided taken from config") +parser.add_argument("--check_config", action="store_true", help="Flag whether check on config should be done") +args = parser.parse_args() + +PATH_CONFIG_FILE = Path(args.config_path) +DATA_PATH = args.data_path + +if __name__ == "__main__": + fix_common_warnings() + + # Loading config file + default_config = open(PATH_CONFIG_FILE) + config = json.load(default_config) + + # Parse data path to config if it is not give + if DATA_PATH: + config["data"]["path_data"] = DATA_PATH + + # Check config + if args.check_config: + config = check_config_constraints(config) + + # create vocabularies + char_to_id = create_char2id(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"]) + label_to_id = create_tag2id(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"], + include_special_tokens=config["data"]["use_start_end_token"]) + + # Extract max word length + config["data"]["max_word_len"] = calculate_longest_word(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"]) + + # construct tokenizer + tokenizer = construct_tokenizer_from_config(config=config, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id) + # construct data module + data_module = construct_data_module_from_config(config=config, + tokenizer=tokenizer) + + # construct loss + loss = construct_loss_from_config(config=config, + label_to_id=label_to_id) + + model = NerModel(loss_fn=loss, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id, + config=config) + + # Create an instance of the PyTorch Lightning Trainer + trainer = pl.Trainer(devices=config["trainer"]["devices"], + max_epochs=-1, + accelerator=config["trainer"]["accelerator"], + log_every_n_steps=10) + tuner = Tuner(trainer) + + # Train the model + lr_finder = tuner.lr_find(model, datamodule=data_module) + + # Print learning rate + print(f"Found learning rate:{lr_finder.suggestion()}") + + diff --git a/ner_scripts/train.py b/ner_scripts/train.py new file mode 100644 index 0000000000000000000000000000000000000000..c2ffaecfdc2c6cb2f9ab1eb01ca4680a10c938e9 --- /dev/null +++ b/ner_scripts/train.py @@ -0,0 +1,157 @@ +""" +This script allows for the training and testing of a Named Entity Recognition model using PyTorch Lightning. + +Arguments: +--------- +--config_path : str, optional (default: "./configs/default_config.json") + Path to the configuration file used for training the model. +--n_reruns : int, optional (default: 1) + Number of model reruns. +--data_path : str, optional (default: "") + Path to the data, if not provided, taken from the config. +--serialization_dir : str, optional (default: "./models/") + Path to save the model. +--check_config : bool, optional + Flag to check the constraints on the configuration. +--use_wandb_logger : bool, optional + Flag to determine whether to use the wandb logger. +--wandb_project_name : str, optional (default: "NER_ipi_pan3") + Name of the WANDB project. +""" +import argparse +from pathlib import Path +import torch +import json +import shutil +import os +import pytorch_lightning as pl +from combo.ner_modules.NerModel import NerModel +from combo.ner_modules.data.utils import create_tag2id, create_char2id, calculate_longest_word +from pytorch_lightning.loggers import WandbLogger +import wandb +from combo.ner_modules.utils.utils import generate_random_string, check_config_constraints +from combo.ner_modules.utils.constructors import construct_callbacks_from_config, construct_loss_from_config, construct_tokenizer_from_config,\ + construct_data_module_from_config +from combo.ner_modules.utils.utils import fix_common_warnings + +# Argument parsing +parser = argparse.ArgumentParser() +parser.add_argument("--config_path", action="store", default="./configs/default_config.json", help="Path to config file") +parser.add_argument("--n_reruns", action="store", default=1, help="Number of model reruns") +parser.add_argument("--data_path", action="store", default="", help="Path to data, if not provided taken from config") +parser.add_argument("--serialization_dir", action="store", default="./models/", help="Path to save_model") +parser.add_argument("--check_config", action="store_true", help="Flag whether check on config should be done") +parser.add_argument("--use_wandb_logger", action="store_false", help="Flag whether to use wandb logger") +parser.add_argument("--wandb_project_name", action="store", default="NER_ipi_pan3", help="WANDB project name") + + +args = parser.parse_args() + +PATH_CONFIG_FILE = Path(args.config_path) +N_RERUNS = int(args.n_reruns) +DATA_PATH = args.data_path + +torch.set_float32_matmul_precision("medium") # to make lightning happy + +if __name__ == "__main__": + # fixing common warnings + fix_common_warnings() + + # Loading config file + default_config = open(PATH_CONFIG_FILE) + config = json.load(default_config) + + # Wandb logger config name will be used as run name + config["name"] = PATH_CONFIG_FILE.stem + + # Parse data path to config if it is given + if DATA_PATH: + config["data"]["path_data"] = DATA_PATH + + # Check config + if args.check_config: + config = check_config_constraints(config) + + # create vocabularies + char_to_id = create_char2id(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"]) + label_to_id = create_tag2id(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"], + include_special_tokens=config["data"]["use_start_end_token"]) + + # Extract max word length + config["data"]["max_word_len"] = calculate_longest_word(file_path=Path(config["data"]["path_data"]) / "train.txt", + encoding=config["data"]["encoding"]) + + # construct tokenizer + tokenizer = construct_tokenizer_from_config(config=config, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id) + # construct data module + data_module = construct_data_module_from_config(config=config, + tokenizer=tokenizer) + + # construct loss + loss = construct_loss_from_config(config=config, + label_to_id=label_to_id) + + for k in range(N_RERUNS): + print("#"*50) + print(f"Starting training number {k+1} out of {N_RERUNS}") + serialization_dir = Path(args.serialization_dir) / generate_random_string() + serialization_dir.mkdir(parents=True, exist_ok=True) + + if args.use_wandb_logger: + wandb.login() + logger = WandbLogger(project=args.wandb_project_name, + config=config, + reinit=True) + else: + logger = True + + # save vocab + with open(serialization_dir / "char_to_id.json", "w+") as f: + json.dump(char_to_id, f) + + with open(serialization_dir / "label_to_id.json", "w+") as f: + json.dump(label_to_id, f) + + with open(serialization_dir / "config.json", "w+") as f: + json.dump(config, f) + + model = NerModel(loss_fn=loss, + char_to_id_map=char_to_id, + label_to_id_map=label_to_id, + config=config) + + # initialize trainer + params = config["trainer"] + params["callbacks"] = construct_callbacks_from_config(config.get("callbacks", {})) + params["default_root_dir"] = serialization_dir + params["logger"] = logger + trainer = pl.Trainer(**params) + + # start training + trainer.fit(model, + datamodule=data_module) + + # Testing + if (Path(config["data"]["path_data"]) / "test.txt").is_file(): + test_result = trainer.test(verbose=True, + ckpt_path='best', + datamodule=data_module) + + if args.use_wandb_logger: + wandb.finish() + + # Moving best model to correct directory + if "ModelCheckpoint" in config.get("callbacks", {}).keys(): + checkpoint_callback = params["callbacks"][-2] + best_model_path = checkpoint_callback.best_model_path + if not best_model_path: + best_model_path = f"./{logger.name}/{logger.version}/checkpoints/{checkpoint_callback.filename}.ckpt" + new_name = 'best_model.ckpt' + new_path = os.path.join(serialization_dir, new_name) + shutil.move(best_model_path, new_path) + + print(f"Serialization directory: {serialization_dir}") diff --git a/notebooks/NER_inference.ipynb b/notebooks/NER_inference.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e5da7e553d1afee84ccd37f1692947efd3019fd9 --- /dev/null +++ b/notebooks/NER_inference.ipynb @@ -0,0 +1,400 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7e188da4-37e6-417c-a728-48d315d5c5b4", + "metadata": {}, + "source": [ + "# NER Inference using COMBO as package" + ] + }, + { + "cell_type": "markdown", + "id": "07f45e19-7243-4162-9700-11db8bdcc395", + "metadata": {}, + "source": [ + "In order to predict Named Entity Recognition tags, specify COMBO model name and NER model name" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ada97d69-3cdc-42e0-8ce9-b986060f22c2", + "metadata": {}, + "outputs": [], + "source": [ + "parser_model = \"polish-herbert-base-ud213\"\n", + "ner_model = \"pl_large\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2112ff72-7f8b-4c76-9944-6f9bf1ec6cb4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2023-11-29 22:35:01 UTC Loading archive] Error while loading Training Data Loader: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html. Setting Data Loader to None\n", + "Exception ignored in: <function tqdm.__del__ at 0x00000200983298B0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\std.py\", line 1162, in __del__\n", + " self.close()\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\notebook.py\", line 291, in close\n", + " self.disp(bar_style='success', check_delay=False)\n", + "AttributeError: 'tqdm_notebook' object has no attribute 'disp'\n", + "[2023-11-29 22:35:01 UTC Loading archive] Error while loading Validation Data Loader: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html. Setting Data Loader to None\n", + "Exception ignored in: <function tqdm.__del__ at 0x00000200983298B0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\std.py\", line 1162, in __del__\n", + " self.close()\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\notebook.py\", line 291, in close\n", + " self.disp(bar_style='success', check_delay=False)\n", + "AttributeError: 'tqdm_notebook' object has no attribute 'disp'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model LAMBO-UD_Polish-PDB\n", + "Using model LAMBO-UD_Polish-PDB\n", + "Using model LAMBO-UD_Polish-PDB\n" + ] + } + ], + "source": [ + "from combo.predict import COMBO\n", + "c = COMBO.from_pretrained(path=parser_model,\n", + " ner_model=ner_model)" + ] + }, + { + "cell_type": "markdown", + "id": "f4f03e62-d383-482b-a94f-badbedf9f13b", + "metadata": {}, + "source": [ + "You can pass data as string, list of sentences or list of list of tokenized sentences." + ] + }, + { + "cell_type": "markdown", + "id": "621204c3-702c-4abf-aa57-1f87ac92cf9d", + "metadata": {}, + "source": [ + "### Prediction on string" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "34095b90-d609-49b3-a284-8314f68144a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOKEN LEMMA UPOS HEAD DEPREL NER_TAG \n", + "--------------------------------------------------------------------------------\n", + "Czy czy PART 2 advmod O \n", + "wiesz wiedzieć VERB 0 root O \n", + ", , PUNCT 5 punct O \n", + "jak jak ADV 5 advmod O \n", + "dojechać dojechać VERB 2 xcomp O \n", + "do do ADP 7 case O \n", + "Åšrody Åšroda PROPN 5 obl B-nam_loc_gpe_city\n", + "albo albo CCONJ 10 cc O \n", + "do do ADP 10 case O \n", + "Piotrkowa Piotrków PROPN 7 conj B-nam_loc_gpe_city\n", + "Trybunalskiego trybunalski ADJ 10 amod:flat I-nam_loc_gpe_city\n", + "? ? PUNCT 2 punct O \n", + "--------------------------------------------------------------------------------\n", + "JeÅ›li jeÅ›li SCONJ 3 mark O \n", + "nie nie PART 3 advmod:neg O \n", + "wiesz wiedzieć VERB 6 advcl O \n", + ", , PUNCT 6 punct O \n", + "to to SCONJ 6 mark O \n", + "zapytaj zapytać VERB 0 root O \n", + "Marka Marek PROPN 6 obj B-nam_liv_person\n", + "Strusia StruÅ› PROPN 7 flat I-nam_liv_person\n", + "albo albo CCONJ 10 cc O \n", + "sprawdź sprawdzić VERB 6 conj O \n", + "w w ADP 12 case O \n", + "mapie mapa NOUN 10 obl O \n", + "Google Goog PROPN 12 nmod B-nam_pro_media_web\n", + ". . PUNCT 6 punct O \n", + "--------------------------------------------------------------------------------\n", + "Nie nie PART 2 advmod:neg O \n", + "dojedziesz dojechać VERB 0 root O \n", + "do do ADP 4 case O \n", + "celu cel NOUN 2 obl O \n", + "PKS-em PKS NOUN 2 iobj B-nam_org_company\n", + "ani ani CCONJ 7 cc O \n", + "Intercity Intercity PROPN 5 conj O \n", + ". . PUNCT 2 punct O \n" + ] + } + ], + "source": [ + "example = \"Czy wiesz, jak dojechać do Åšrody albo do Piotrkowa Trybunalskiego? JeÅ›li nie wiesz, to zapytaj Marka Strusia albo sprawdź w mapie Google. Nie dojedziesz do celu PKS-em ani Intercity.\"\n", + "prediction = c(example)\n", + "\n", + "print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL', \"NER_TAG\"))\n", + "for sentence in prediction:\n", + " print(\"-\"*80)\n", + " for token in sentence.tokens:\n", + " print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format(token.text, token.lemma, token.upostag, token.head, token.deprel, token.ner_tag))" + ] + }, + { + "cell_type": "markdown", + "id": "3972e889-935a-4f0f-b851-68a37b0c3c2d", + "metadata": {}, + "source": [ + "### Prediction on tokenized sentence" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "68f1c636-55d2-4206-a879-49a8e0eda15e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOKEN LEMMA UPOS HEAD DEPREL NER_TAG \n", + "--------------------------------------------------------------------------------\n", + "Nie nie PART 2 advmod:neg O \n", + "dojedziesz dojechać VERB 0 root O \n", + "do do ADP 4 case O \n", + "celu cel NOUN 2 obl O \n", + "PKS-em PKS NOUN 2 iobj B-nam_org_company\n", + "ani ani CCONJ 7 cc O \n", + "Intercity Intercity PROPN 5 conj B-nam_pro_software\n", + ". . PUNCT 2 punct O \n" + ] + } + ], + "source": [ + "example = [['Nie', 'dojedziesz', 'do', 'celu', 'PKS-em', 'ani', 'Intercity', '.']]\n", + "prediction = c(example)\n", + "\n", + "print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL', \"NER_TAG\"))\n", + "for sentence in prediction:\n", + " print(\"-\"*80)\n", + " for token in sentence.tokens:\n", + " print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format(token.text, token.lemma, token.upostag, token.head, token.deprel, token.ner_tag))" + ] + }, + { + "cell_type": "markdown", + "id": "78d96be2-d8fa-4a9f-8af9-e12d0971c0a0", + "metadata": {}, + "source": [ + "### Prediction on list of senteces" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8676b30d-54a7-478b-b188-e8407ca49601", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOKEN LEMMA UPOS HEAD DEPREL NER_TAG \n", + "--------------------------------------------------------------------------------\n", + "Czy czy PART 2 advmod O \n", + "wiesz wiedzieć VERB 0 root O \n", + ", , PUNCT 5 punct O \n", + "jak jak ADV 5 advmod O \n", + "dojechać dojechać VERB 2 xcomp O \n", + "do do ADP 7 case O \n", + "Åšrody Åšroda PROPN 5 obl B-nam_loc_gpe_city\n", + "albo albo CCONJ 10 cc O \n", + "do do ADP 10 case O \n", + "Piotrkowa Piotrków PROPN 7 conj B-nam_loc_gpe_city\n", + "Trybunalskiego trybunalski ADJ 10 amod:flat I-nam_loc_gpe_city\n", + "? ? PUNCT 2 punct O \n", + "--------------------------------------------------------------------------------\n", + "JeÅ›li jeÅ›li SCONJ 3 mark O \n", + "nie nie PART 3 advmod:neg O \n", + "wiesz wiedzieć VERB 6 advcl O \n", + ", , PUNCT 6 punct O \n", + "to to SCONJ 6 mark O \n", + "zapytaj zapytać VERB 0 root O \n", + "Marka Marek PROPN 6 obj B-nam_liv_person\n", + "Strusia StruÅ› PROPN 7 flat I-nam_liv_person\n", + "albo albo CCONJ 10 cc O \n", + "sprawdź sprawdzić VERB 6 conj O \n", + "w w ADP 12 case O \n", + "mapie mapa NOUN 10 obl O \n", + "Google Goog PROPN 12 nmod B-nam_pro_media_web\n", + ". . PUNCT 6 punct O \n" + ] + } + ], + "source": [ + "example = [\"Czy wiesz, jak dojechać do Åšrody albo do Piotrkowa Trybunalskiego?\", \"JeÅ›li nie wiesz, to zapytaj Marka Strusia albo sprawdź w mapie Google.\"]\n", + "prediction = c(example)\n", + "\n", + "print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL', \"NER_TAG\"))\n", + "for sentence in prediction:\n", + " print(\"-\"*80)\n", + " for token in sentence.tokens:\n", + " print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format(token.text, token.lemma, token.upostag, token.head, token.deprel, token.ner_tag))" + ] + }, + { + "cell_type": "markdown", + "id": "eaf81c8f-649b-4110-96a1-c43133feb11e", + "metadata": {}, + "source": [ + "### Using custom model" + ] + }, + { + "cell_type": "markdown", + "id": "d08d6dd7-b7c3-4f3b-8cc6-175d340dd2c1", + "metadata": {}, + "source": [ + "If you have trained your own model, you can pass the path to it as a ner_model parameter. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "25c6f47f-b698-49d4-acd7-d9a675a99e09", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2023-11-28 00:45:10 UTC Loading archive] Error while loading Training Data Loader: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html. Setting Data Loader to None\n", + "Exception ignored in: <function tqdm.__del__ at 0x000001970A82C1F0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\std.py\", line 1162, in __del__\n", + " self.close()\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\notebook.py\", line 291, in close\n", + " self.disp(bar_style='success', check_delay=False)\n", + "AttributeError: 'tqdm_notebook' object has no attribute 'disp'\n", + "[2023-11-28 00:45:10 UTC Loading archive] Error while loading Validation Data Loader: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html. Setting Data Loader to None\n", + "Exception ignored in: <function tqdm.__del__ at 0x000001970A82C1F0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\std.py\", line 1162, in __del__\n", + " self.close()\n", + " File \"C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\notebook.py\", line 291, in close\n", + " self.disp(bar_style='success', check_delay=False)\n", + "AttributeError: 'tqdm_notebook' object has no attribute 'disp'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model LAMBO-UD_Polish-PDB\n", + "Using model LAMBO-UD_Polish-PDB\n", + "Using model LAMBO-UD_Polish-PDB\n", + "Using model LAMBO-UD_Polish-PDB\n" + ] + } + ], + "source": [ + "ner_model = r\"L:\\combo-lightning\\ner_disc_test\"\n", + "c = COMBO.from_pretrained(path=parser_model,\n", + " ner_model=ner_model)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "82c4d866-7dde-4657-a089-367bc0853987", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOKEN LEMMA UPOS HEAD DEPREL NER_TAG \n", + "--------------------------------------------------------------------------------\n", + "Czy czy PART 2 advmod O \n", + "wiesz wiedzieć VERB 0 root O \n", + ", , PUNCT 5 punct O \n", + "jak jak ADV 5 advmod O \n", + "dojechać dojechać VERB 2 xcomp O \n", + "do do ADP 7 case O \n", + "Åšrody Åšroda PROPN 5 obl B-nam_loc_gpe_city\n", + "albo albo CCONJ 10 cc I-nam_loc_gpe_city\n", + "do do ADP 10 case O \n", + "Piotrkowa Piotrków PROPN 7 conj B-nam_loc_gpe_city\n", + "Trybunalskiego trybunalski ADJ 10 amod:flat I-nam_loc_gpe_city\n", + "? ? PUNCT 2 punct O \n", + "--------------------------------------------------------------------------------\n", + "JeÅ›li jeÅ›li SCONJ 3 mark O \n", + "nie nie PART 3 advmod:neg O \n", + "wiesz wiedzieć VERB 6 advcl O \n", + ", , PUNCT 6 punct O \n", + "to to SCONJ 6 mark O \n", + "zapytaj zapytać VERB 0 root O \n", + "Marka Marek PROPN 6 obj B-nam_liv_person\n", + "Strusia StruÅ› PROPN 7 flat I-nam_liv_person\n", + "albo albo CCONJ 10 cc O \n", + "sprawdź sprawdzić VERB 6 conj O \n", + "w w ADP 12 case O \n", + "mapie mapa NOUN 10 obl O \n", + "Google Goog PROPN 12 nmod B-nam_org_company\n", + ". . PUNCT 6 punct O \n", + "--------------------------------------------------------------------------------\n", + "Nie nie PART 2 advmod:neg O \n", + "dojedziesz dojechać VERB 0 root O \n", + "do do ADP 4 case O \n", + "celu cel NOUN 2 obl O \n", + "PKS-em PKS NOUN 2 iobj B-nam_org_company\n", + "ani ani CCONJ 7 cc O \n", + "Intercity Intercity PROPN 5 conj B-nam_org_company\n", + ". . PUNCT 2 punct O \n" + ] + } + ], + "source": [ + "example = \"Czy wiesz, jak dojechać do Åšrody albo do Piotrkowa Trybunalskiego? JeÅ›li nie wiesz, to zapytaj Marka Strusia albo sprawdź w mapie Google. Nie dojedziesz do celu PKS-em ani Intercity.\"\n", + "prediction = c(example)\n", + "\n", + "print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL', \"NER_TAG\"))\n", + "for sentence in prediction:\n", + " print(\"-\"*80)\n", + " for token in sentence.tokens:\n", + " print(\"{:15} {:15} {:10} {:10} {:10} {:15}\".format(token.text, token.lemma, token.upostag, token.head, token.deprel, token.ner_tag))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "combo_ner_integration", + "language": "python", + "name": "combo_ner_integration" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/NER_training.ipynb b/notebooks/NER_training.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..21e21ce86efc11d5a32db26097d63a3cf48d2cf5 --- /dev/null +++ b/notebooks/NER_training.ipynb @@ -0,0 +1,882 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ff9142be-4752-491b-bf9d-456763a2d7a5", + "metadata": {}, + "source": [ + "# NER model training" + ] + }, + { + "cell_type": "markdown", + "id": "3d1ba70b-c57d-421c-9298-65b7a2f06ef7", + "metadata": {}, + "source": [ + "Specify model directory. There will be 4 files saved there: \n", + "- confg.json - json config file specifying model architecture\n", + "- char_to_id.json - mapping between characters and ids\n", + "- label_to_id.json - mapping between ner tag and ids\n", + "- best_model.ckpt - model weights" + ] + }, + { + "cell_type": "markdown", + "id": "a3009202-3e5d-4f1c-9b7a-016a3d8694d3", + "metadata": {}, + "source": [ + "To demonstrate how to do it we will use very small subset of NER dataset for Polish." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "69f208e7-f717-4452-a60c-5730e7e48323", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "serialization_directory = Path(\"./models/notebook_example\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fc9c342a-4e41-4c9b-b0ae-e6c460d17404", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from combo.ner_modules.utils.utils import fix_common_warnings\n", + "fix_common_warnings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8972f473-19e4-49c3-8365-5c59ec5a811d", + "metadata": {}, + "outputs": [], + "source": [ + "import torch \n", + "config = {\n", + " \"data\": {\n", + " \"path_data\" : r\".\\example_data\",\n", + " \"use_char_level_embeddings\": True,\n", + " \"use_start_end_token\": True,\n", + " \"tokenize_entities\": True,\n", + " \"batch_size\": 32,\n", + " \"encoding\": \"utf-8\",\n", + " \"num_workers\": 1\n", + " },\n", + "\n", + " \"model\": {\n", + " \"bert_embedder\": {\n", + " \"pretrained_model_name\": \"allegro/herbert-base-cased\",\n", + " \"pretrained_model_type\": \"AutoModel\",\n", + " \"projection_dimension\": None,\n", + " \"freeze_bert\": True,\n", + " \"token_pooling\": True,\n", + " \"pooling_strategy\": \"max\"\n", + " },\n", + " \"char_embedder\": {\"type\" : \"combo\",\n", + " \"char_embedding_dim\": 64\n", + " },\n", + " \"classifier\": {\"type\" : \"vanilla\",\n", + " \"to_tag_space\" : \"linear\"},\n", + " \"dropout\": 0\n", + " },\n", + " \"loss\": \"ce\",\n", + " \"learning_rate\": 0.0007585775750,\n", + " \"callbacks\": {\"FixedProgressBar\": True},\n", + " \"trainer\": {\n", + " \"devices\" : [0],\n", + " \"max_epochs\": 2,\n", + " \"accelerator\": \"cuda\" if torch.cuda.is_available() else \"cpu\",\n", + " \"log_every_n_steps\": 10}\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "41e3ec1a-661e-44a3-bb61-74b2477771a9", + "metadata": {}, + "source": [ + "# Training using config file" + ] + }, + { + "cell_type": "markdown", + "id": "a36614a6-8e76-4006-8c0b-3d704189a721", + "metadata": {}, + "source": [ + "## create vocabularies" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9f402941-42ea-47ed-a2e1-757aaa48d1f6", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.data.utils import create_tag2id, create_char2id\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "aea6de16-4670-4e78-a3f7-0b2f667d8d68", + "metadata": {}, + "outputs": [], + "source": [ + "char_to_id = create_char2id(file_path=Path(config[\"data\"][\"path_data\"]) / \"train.txt\", )\n", + "label_to_id = create_tag2id(file_path=Path(config[\"data\"][\"path_data\"]) / \"train.txt\",\n", + " encoding=config[\"data\"][\"encoding\"],\n", + " include_special_tokens=config[\"data\"][\"use_start_end_token\"])" + ] + }, + { + "cell_type": "markdown", + "id": "d71e974e-2f89-4a6b-aa77-31908ad28a6d", + "metadata": {}, + "source": [ + "## create tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "83529f2e-34bb-4d42-9378-cec4e2e84c83", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.utils.constructors import construct_tokenizer_from_config" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "efb8125c-7b0a-4701-8d67-ae238874b8f4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model LAMBO-UD_Polish-PDB\n" + ] + } + ], + "source": [ + "tokenizer = construct_tokenizer_from_config(config=config,\n", + " char_to_id_map=char_to_id,\n", + " label_to_id_map=label_to_id)" + ] + }, + { + "cell_type": "markdown", + "id": "4eba05f3-0847-45f6-8d8e-98222b0aee8e", + "metadata": {}, + "source": [ + "## create pytorch lightning datamodule" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8f643dd5-279b-4e29-b6db-abf65e282741", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.utils.constructors import construct_data_module_from_config\n", + "data_module = construct_data_module_from_config(config=config,\n", + " tokenizer=tokenizer)" + ] + }, + { + "cell_type": "markdown", + "id": "e9df229a-ef28-4453-b188-ce62221523b6", + "metadata": {}, + "source": [ + "## create loss" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fa9235de-fddd-4343-a0d6-fef35e180b3f", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.utils.constructors import construct_loss_from_config" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0183ecab-56a1-40e5-ab6f-52c2652cf2b9", + "metadata": {}, + "outputs": [], + "source": [ + "loss = construct_loss_from_config(config=config,\n", + " label_to_id=label_to_id)" + ] + }, + { + "cell_type": "markdown", + "id": "a9cebfb2-e570-4fa1-a100-c1cd07eeab55", + "metadata": {}, + "source": [ + "## saving data to serialization directory" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d80fff2a-089e-4fa0-b14b-0c6b2037b88e", + "metadata": {}, + "outputs": [], + "source": [ + "serialization_directory.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ff58efd1-9341-4ac3-b381-512784f1f3fe", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "with open(serialization_directory / \"char_to_id.json\", \"w+\") as f:\n", + " json.dump(char_to_id, f)\n", + "\n", + "with open(serialization_directory / \"label_to_id.json\", \"w+\") as f:\n", + " json.dump(label_to_id, f)\n", + "\n", + "with open(serialization_directory / \"config.json\", \"w+\") as f:\n", + " json.dump(config, f)" + ] + }, + { + "cell_type": "markdown", + "id": "8f44ea8a-010f-40df-bc68-5403631b0c01", + "metadata": {}, + "source": [ + "## creating model instance" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "aea14060-0844-4178-954e-407cab7f00a5", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.NerModel import NerModel " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5ba60867-3187-4b39-a79a-5b4e4077944b", + "metadata": {}, + "outputs": [], + "source": [ + "model = NerModel(loss_fn=loss,\n", + " char_to_id_map=char_to_id,\n", + " label_to_id_map=label_to_id,\n", + " config=config)" + ] + }, + { + "cell_type": "markdown", + "id": "4e7d6b32-b256-4a50-bc7e-f0ad00c067e9", + "metadata": {}, + "source": [ + "## training" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d93dc81c-e148-4f5d-b8f4-b6f8a80ccf5b", + "metadata": {}, + "outputs": [], + "source": [ + "import pytorch_lightning as pl\n", + "from combo.ner_modules.utils.constructors import construct_callbacks_from_config" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6f91e688-d197-41b5-b929-0925af024846", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | bert_embedder | BertEmbedder | 124 M \n", + "1 | char_embedder | ComboCharEmbedder | 546 K \n", + "2 | classifier | VanillaClassifier | 65.8 K\n", + "3 | dropout | Dropout | 0 \n", + "4 | loss_fn | CrossEntropyLoss | 0 \n", + "----------------------------------------------------\n", + "612 K Trainable params\n", + "124 M Non-trainable params\n", + "125 M Total params\n", + "500.223 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|█████████████████████████████████████████████| 20/20 [00:19<00:00, 1.03it/s, v_num=11, train_loss=1.200]\n", + "Epoch 1: 100%|â–ˆ| 20/20 [00:19<00:00, 1.03it/s, v_num=11, train_loss=0.630, validation_loss=0.480, validation_precision\n", + "Epoch 1: 100%|â–ˆ| 20/20 [00:23<00:00, 1.17s/it, v_num=11, train_loss=0.630, validation_loss=0.251, validation_precision" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=2` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|â–ˆ| 20/20 [00:28<00:00, 1.43s/it, v_num=11, train_loss=0.630, validation_loss=0.251, validation_precision\n" + ] + } + ], + "source": [ + "params = config[\"trainer\"]\n", + "params[\"callbacks\"] = construct_callbacks_from_config(config.get(\"callbacks\", {}))\n", + "params[\"default_root_dir\"] = serialization_directory\n", + "trainer = pl.Trainer(**params)\n", + "\n", + "# start training\n", + "trainer.fit(model,\n", + " datamodule=data_module)" + ] + }, + { + "cell_type": "markdown", + "id": "74a69251-7df3-474d-96de-59caaf41467b", + "metadata": {}, + "source": [ + "## Evaluate on test data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b659848a-53ad-4367-87db-4d9f6f69fbb6", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Restoring states from the checkpoint path at models\\notebook_example\\lightning_logs\\version_11\\checkpoints\\epoch=2-step=40.ckpt\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "Loaded model weights from the checkpoint at models\\notebook_example\\lightning_logs\\version_11\\checkpoints\\epoch=2-step=40.ckpt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing: 0it [00:00, ?it/s] precision recall f1-score support\n", + "\n", + " nam_adj_country 1.00 0.00 0.00 3\n", + " nam_liv_person 1.00 0.00 0.00 3\n", + " nam_loc_gpe_city 1.00 0.00 0.00 1\n", + " nam_loc_gpe_country 1.00 0.00 0.00 8\n", + " nam_org_company 1.00 0.00 0.00 10\n", + " nam_org_nation 1.00 0.00 0.00 3\n", + "nam_org_organization 1.00 0.00 0.00 3\n", + " nam_oth_currency 1.00 0.00 0.00 2\n", + " nam_oth_tech 1.00 0.00 0.00 5\n", + " nam_pro_brand 1.00 0.00 0.00 2\n", + " nam_pro_software 1.00 0.00 0.00 37\n", + "\n", + " micro avg 1.00 0.00 0.00 77\n", + " macro avg 1.00 0.00 0.00 77\n", + " weighted avg 1.00 0.00 0.00 77\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓\n", + "┃<span style=\"font-weight: bold\"> Test metric </span>┃<span style=\"font-weight: bold\"> DataLoader 0 </span>┃\n", + "┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> epoch </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 3.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_f1 </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 0.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_precision </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 1.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_recall </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 0.0 </span>│\n", + "└───────────────────────────┴───────────────────────────┘\n", + "</pre>\n" + ], + "text/plain": [ + "â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩\n", + "│\u001b[36m \u001b[0m\u001b[36m epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 3.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_f1 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_precision \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_recall \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = trainer.test(verbose=True,\n", + " ckpt_path='best',\n", + " datamodule=data_module)" + ] + }, + { + "cell_type": "markdown", + "id": "033aac6e-1747-4b39-9956-1cafebb04b6b", + "metadata": {}, + "source": [ + "# Training using as little config file as possible" + ] + }, + { + "cell_type": "markdown", + "id": "6099ba5d-5702-45b9-a7af-d79945b06d33", + "metadata": {}, + "source": [ + "## create vocabularies" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "a7ff832e-2fc8-4fba-95b4-416c169779b0", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.data.utils import create_tag2id, create_char2id\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3c86f465-998e-4949-8b11-547ed83c3deb", + "metadata": {}, + "outputs": [], + "source": [ + "training_data_path = Path(r\".\\example_data\\train.txt\") \n", + "char_to_id = create_char2id(file_path=training_data_path)\n", + "label_to_id = create_tag2id(file_path=training_data_path,\n", + " encoding=\"utf-8\",\n", + " include_special_tokens=True)" + ] + }, + { + "cell_type": "markdown", + "id": "79e1507c-e0e6-42e9-ac30-c899edafaa59", + "metadata": {}, + "source": [ + "## create tokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a6d3cfed-266d-487c-a2d8-82a154bcc5b6", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.data.NerTokenizer import NerTokenizer" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "aedfd9ae-52e7-447e-a70a-7cf3f1d8ca79", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using model LAMBO-UD_Polish-PDB\n" + ] + } + ], + "source": [ + "tokenizer = NerTokenizer(pretrained_model_type=\"AutoModel\",\n", + " pretrained_model_name=\"allegro/herbert-base-cased\",\n", + " char_to_id_map=char_to_id,\n", + " label_to_id_map=label_to_id,\n", + " use_char_level_embeddings=True,\n", + " use_start_end_token=True,\n", + " tokenize_entities=True)" + ] + }, + { + "cell_type": "markdown", + "id": "7a021776-2a19-4c9c-9dd1-b113fac8cdd4", + "metadata": {}, + "source": [ + "## create pytorch lightning datamodule" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4e5374a1-d83f-4cb6-8bd4-ac95ebb75886", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.NerDataModule import NerDataModule" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8793da2d-39a7-49e0-85c1-56e9761725be", + "metadata": {}, + "outputs": [], + "source": [ + "data_path = Path(r\".\\example_data\") \n", + "data_module = NerDataModule(path_data=data_path,\n", + " tokenizer=tokenizer,\n", + " batch_size=32,\n", + " encoding=\"utf-8\",\n", + " num_workers=1)" + ] + }, + { + "cell_type": "markdown", + "id": "20a27e33-103e-406d-b621-a47546483389", + "metadata": {}, + "source": [ + "## create losss function" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "556d2b85-2e16-4488-90c8-a3cbc6452c61", + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "9accfeae-9f16-4b0b-8113-1e2352ebebd8", + "metadata": {}, + "outputs": [], + "source": [ + "loss = torch.nn.CrossEntropyLoss()" + ] + }, + { + "cell_type": "markdown", + "id": "1a8a90aa-100b-4ed8-b5c5-5f02ddf770c5", + "metadata": {}, + "source": [ + "## create model instance" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "df74c687-d1d7-4805-ae13-456fb117e1f8", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.NerModel import NerModel" + ] + }, + { + "cell_type": "markdown", + "id": "d57fa876-b467-404b-ab1d-095a9961310a", + "metadata": {}, + "source": [ + "Minimal config should contain information about model architecture, learning rate and whether to use start and end tokens as well as whether to use character level embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "a9941c27-8bb5-4e8d-b9a5-f9a66cae4dad", + "metadata": {}, + "outputs": [], + "source": [ + "config = {\n", + " \"data\": {\n", + " \"use_char_level_embeddings\": True,\n", + " \"use_start_end_token\": True},\n", + " \n", + " \"model\": {\n", + " \"bert_embedder\": {\n", + " \"pretrained_model_name\": \"allegro/herbert-base-cased\",\n", + " \"pretrained_model_type\": \"AutoModel\",\n", + " \"projection_dimension\": None,\n", + " \"freeze_bert\": True,\n", + " \"token_pooling\": True,\n", + " \"pooling_strategy\": \"max\"\n", + " },\n", + " \"char_embedder\": {\"type\" : \"combo\",\n", + " \"char_embedding_dim\": 64\n", + " },\n", + " \"classifier\": {\"type\" : \"vanilla\",\n", + " \"to_tag_space\" : \"linear\"},\n", + " \"dropout\": 0\n", + " },\n", + " \"learning_rate\": 0.0007585775750}" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "5eb269ee-7b5d-48a9-b0f1-4695741625a1", + "metadata": {}, + "outputs": [], + "source": [ + "model = NerModel(loss_fn=loss,\n", + " char_to_id_map=char_to_id,\n", + " label_to_id_map=label_to_id,\n", + " config=config)" + ] + }, + { + "cell_type": "markdown", + "id": "1a5da2da-2945-4489-98b1-9e5d472ab06d", + "metadata": {}, + "source": [ + "## train" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "65c97e08-9740-4943-8498-02747e3d53d0", + "metadata": {}, + "outputs": [], + "source": [ + "from combo.ner_modules.callbacks.FixedProgressBar import FixedProgressBar\n", + "import pytorch_lightning as pl" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "02c4c729-e763-4f25-94b8-4166a0e1c231", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "\n", + " | Name | Type | Params\n", + "----------------------------------------------------\n", + "0 | bert_embedder | BertEmbedder | 124 M \n", + "1 | char_embedder | ComboCharEmbedder | 546 K \n", + "2 | classifier | VanillaClassifier | 65.8 K\n", + "3 | dropout | Dropout | 0 \n", + "4 | loss_fn | CrossEntropyLoss | 0 \n", + "----------------------------------------------------\n", + "612 K Trainable params\n", + "124 M Non-trainable params\n", + "125 M Total params\n", + "500.223 Total estimated model params size (MB)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " " + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\lpsze\\anaconda3\\envs\\combo_ner_integration\\lib\\site-packages\\pytorch_lightning\\loops\\fit_loop.py:281: PossibleUserWarning: The number of training batches (20) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n", + " rank_zero_warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0: 100%|██████████████████████████████████████████████| 20/20 [00:19<00:00, 1.05it/s, v_num=0, train_loss=1.020]\n", + "Epoch 1: 100%|â–ˆ| 20/20 [00:19<00:00, 1.03it/s, v_num=0, train_loss=0.128, validation_loss=0.448, validation_precision=\n", + "Epoch 1: 100%|â–ˆ| 20/20 [00:23<00:00, 1.17s/it, v_num=0, train_loss=0.128, validation_loss=0.256, validation_precision=" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`Trainer.fit` stopped: `max_epochs=2` reached.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1: 100%|â–ˆ| 20/20 [00:28<00:00, 1.42s/it, v_num=0, train_loss=0.128, validation_loss=0.256, validation_precision=\n" + ] + } + ], + "source": [ + "callbacks = [FixedProgressBar()]\n", + "trainer = pl.Trainer(devices = [0],\n", + " accelerator=\"cuda\",\n", + " max_epochs=2,\n", + " callbacks=callbacks)\n", + "trainer.fit(model,\n", + " datamodule=data_module)" + ] + }, + { + "cell_type": "markdown", + "id": "5367be2f-83dc-41fe-9b73-7051abb7b5d9", + "metadata": {}, + "source": [ + "## evaluate on test data" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "5ffc539c-d4b1-4ba1-8876-de65cafa1cf9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Restoring states from the checkpoint path at L:\\combo-lightning\\docs\\ner_docs\\lightning_logs\\version_0\\checkpoints\\epoch=2-step=40.ckpt\n", + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]\n", + "Loaded model weights from the checkpoint at L:\\combo-lightning\\docs\\ner_docs\\lightning_logs\\version_0\\checkpoints\\epoch=2-step=40.ckpt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing: 0it [00:00, ?it/s] precision recall f1-score support\n", + "\n", + " nam_adj_country 1.00 0.00 0.00 3\n", + " nam_liv_person 1.00 0.00 0.00 3\n", + " nam_loc_gpe_city 1.00 0.00 0.00 1\n", + " nam_loc_gpe_country 1.00 0.00 0.00 8\n", + " nam_org_company 1.00 0.00 0.00 10\n", + " nam_org_nation 1.00 0.00 0.00 3\n", + "nam_org_organization 1.00 0.00 0.00 3\n", + " nam_oth_currency 1.00 0.00 0.00 2\n", + " nam_oth_tech 1.00 0.00 0.00 5\n", + " nam_pro_brand 1.00 0.00 0.00 2\n", + " nam_pro_software 1.00 0.00 0.00 37\n", + "\n", + " micro avg 1.00 0.00 0.00 77\n", + " macro avg 1.00 0.00 0.00 77\n", + " weighted avg 1.00 0.00 0.00 77\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓\n", + "┃<span style=\"font-weight: bold\"> Test metric </span>┃<span style=\"font-weight: bold\"> DataLoader 0 </span>┃\n", + "┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> epoch </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 3.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_f1 </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 0.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_precision </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 1.0 </span>│\n", + "│<span style=\"color: #008080; text-decoration-color: #008080\"> test_recall </span>│<span style=\"color: #800080; text-decoration-color: #800080\"> 0.0 </span>│\n", + "└───────────────────────────┴───────────────────────────┘\n", + "</pre>\n" + ], + "text/plain": [ + "â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┳â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┓\n", + "┃\u001b[1m \u001b[0m\u001b[1m Test metric \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m DataLoader 0 \u001b[0m\u001b[1m \u001b[0m┃\n", + "┡â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”╇â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”â”┩\n", + "│\u001b[36m \u001b[0m\u001b[36m epoch \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 3.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_f1 \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_precision \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 1.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "│\u001b[36m \u001b[0m\u001b[36m test_recall \u001b[0m\u001b[36m \u001b[0m│\u001b[35m \u001b[0m\u001b[35m 0.0 \u001b[0m\u001b[35m \u001b[0m│\n", + "└───────────────────────────┴───────────────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "reults = trainer.test(verbose=True,\n", + " ckpt_path='best',\n", + " datamodule=data_module)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "combo_ner_integration", + "language": "python", + "name": "combo_ner_integration" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/example_data/dev.txt b/notebooks/example_data/dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..eda789558e86bafba13474a72c58bb65a5ac1052 --- /dev/null +++ b/notebooks/example_data/dev.txt @@ -0,0 +1,2515 @@ +# sent_id = 100 +Konferencja O +jest O +organizowana O +pod O +patronatem O +Minister B-nam_org_institution +Nauki I-nam_org_institution +i I-nam_org_institution +Szkolnictwa I-nam_org_institution +Wyższego I-nam_org_institution +, O +Profesor O +Barbary B-nam_liv_person +Kudryckiej I-nam_liv_person +, O +PrzewodniczÄ…cej O +KRASP B-nam_org_organization +, O +Profesor O +Katarzyny B-nam_liv_person +ChaÅ‚asiÅ„skiej I-nam_liv_person +- I-nam_liv_person +Macukow I-nam_liv_person +oraz O +Prezesa O +PAN B-nam_org_organization +, O +Profesora O +MichaÅ‚a B-nam_liv_person +Kleibera I-nam_liv_person +. O + +# sent_id = 101 +Miejsce O +: O +Sala B-nam_fac_goe +Senatu I-nam_fac_goe +, O +PaÅ‚ac B-nam_fac_goe +Kazimierzowski I-nam_fac_goe +, O +główny O +kampus O +Uniwersytetu B-nam_org_organization +Warszawskiego I-nam_org_organization + +# sent_id = 102 +Termin O +: O +5 O +maja O +, O +10 O +. O +00 O +- O +16 O +. O +30 O + +# sent_id = 103 +UdziaÅ‚ O +w O +konferencji O +jest O +bezpÅ‚atny O +. O + +# sent_id = 104 +Ze O +wzglÄ™du O +na O +ograniczonÄ… O +liczbÄ™ O +miejsc O +, O +prosimy O +o O +zgÅ‚aszanie O +udziaÅ‚u O +w O +konferencji O +przez O +wysÅ‚anie O +listu O +na O +adres O +kkrak O +@ O +icm O +. O +edu O +. O +pl O +. O + +# sent_id = 105 +Program O +konferencji O + +# sent_id = 106 +10 O +. O +00 O +– O +10 O +. O +30 O +Otwarcie O +konferencji O +: O +dr O +Alek B-nam_liv_person +Tarkowski I-nam_liv_person +( O +koordynator O +, O +Creative B-nam_org_organization +Commons I-nam_org_organization +Polska I-nam_org_organization +, O +ICM B-nam_org_institution +UW B-nam_org_organization +) O + +# sent_id = 107 +10 O +. O +30 O +– O +12 O +. O +45 O +Sesja O +przedpoÅ‚udniowa O +: O +przeglÄ…d B-nam_pro_title +zagadnieÅ„ I-nam_pro_title +zwiÄ…zanych I-nam_pro_title +z I-nam_pro_title +otwartÄ… I-nam_pro_title +naukÄ… I-nam_pro_title + +# sent_id = 108 +10 O +. O +30 O +- O +11 O +. O +00 O +: O +dr O +Ignasi B-nam_liv_person +Labastida I-nam_liv_person +i I-nam_liv_person +Juan I-nam_liv_person +( O +Uniwersytet B-nam_org_organization +BarceloÅ„ski I-nam_org_organization +, O +Creative B-nam_org_organization +Commons I-nam_org_organization +Catalonia I-nam_org_organization +) O +– O +W B-nam_pro_title +stronÄ™ I-nam_pro_title +otwartoÅ›ci I-nam_pro_title +. I-nam_pro_title +DoÅ›wiadczenia I-nam_pro_title +biblioteki I-nam_pro_title +uniwersyteckiej I-nam_pro_title +. O + +# sent_id = 109 +11 O +. O +00 O +- O +11 O +. O +30 O +: O +dr O +Ahrash B-nam_liv_person +Bissell I-nam_liv_person +( O +Creative B-nam_org_organization +Commons I-nam_org_organization +Learn I-nam_org_organization +, O +USA B-nam_loc_gpe_country +) O +– O +Edukacja B-nam_pro_title +dla I-nam_pro_title +innowacji I-nam_pro_title +– I-nam_pro_title +z I-nam_pro_title +pomocÄ… I-nam_pro_title +Creative I-nam_pro_title +Commons I-nam_pro_title +. O + +# sent_id = 110 +11 O +. O +30 O +- O +11 O +. O +40 O +: O +przerwa O +na O +kawÄ™ O + +# sent_id = 111 +11 O +. O +40 O +- O +12 O +. O +10 O +: O +PaweÅ‚ B-nam_liv_person +SzczÄ™sny I-nam_liv_person +( O +ZakÅ‚ad B-nam_org_institution +Bioinformatyki I-nam_org_institution +Instytutu B-nam_org_institution +Biochemii I-nam_org_institution +i I-nam_org_institution +Biofizyki I-nam_org_institution +PAN B-nam_org_organization +, O +WydziaÅ‚ B-nam_org_institution +Biologii I-nam_org_institution +UW B-nam_org_organization +) O +– O +Nauka B-nam_pro_title +2 I-nam_pro_title +. I-nam_pro_title +0 I-nam_pro_title + +# sent_id = 112 +12 O +. O +10 O +- O +12 O +. O +30 O +: O +Dyskusja O + +# sent_id = 113 +12 O +. O +30 O +- O +12 O +. O +45 O +: O +Prezentacja O +projektu O +“ O +Otwórz B-nam_eve_human +książkÄ™ I-nam_eve_human +†O + +# sent_id = 114 +12 O +. O +45 O +- O +13 O +. O +45 O +Przerwa O +obiadowa O + +# sent_id = 115 +13 O +. O +45 O +- O +14 O +. O +45 O +Sesja O +popoÅ‚udniowa O +: O +otwarta B-nam_pro_title +nauka I-nam_pro_title +w I-nam_pro_title +Polsce I-nam_pro_title + +# sent_id = 116 +13 O +. O +45 O +- O +14 O +. O +05 O +prof O +. O +Marek B-nam_liv_person +Niezgódka I-nam_liv_person +( O +Dyrektor O +Interdyscyplinarnego B-nam_org_institution +Centrum I-nam_org_institution +Modelowania I-nam_org_institution +Matematycznego I-nam_org_institution +i I-nam_org_institution +Komputerowego I-nam_org_institution +, O +Uniwersytet B-nam_org_organization +Warszawski I-nam_org_organization +) O + +# sent_id = 117 +14 O +. O +05 O +- O +14 O +. O +25 O +: O +Krzysztof B-nam_liv_person +Siewicz I-nam_liv_person +( O +Creative B-nam_org_organization +Commons I-nam_org_organization +Polska I-nam_org_organization +, O +Kancelaria B-nam_org_company +prawna I-nam_org_company +Grynhoff I-nam_org_company +, I-nam_org_company +Woźny I-nam_org_company +, I-nam_org_company +MaliÅ„ski I-nam_org_company +) O +: O +Prawne B-nam_pro_title +aspekty I-nam_pro_title +otwartej I-nam_pro_title +nauki I-nam_pro_title + +# sent_id = 118 +14 O +. O +25 O +- O +14 O +. O +40 O +: O +dr O +Jan B-nam_liv_person +KozÅ‚owski I-nam_liv_person +( O +Centrum B-nam_org_institution +BadaÅ„ I-nam_org_institution +Polityki I-nam_org_institution +Naukowej I-nam_org_institution +i I-nam_org_institution +Szkolnictwa I-nam_org_institution +Wyższego I-nam_org_institution +UW B-nam_org_organization +) O +: O +Alternatywne B-nam_pro_title +formy I-nam_pro_title +peer I-nam_pro_title +review I-nam_pro_title + +# sent_id = 119 +14 O +. O +45 O +- O +15 O +. O +00 O +Przerwa O +na O +kawÄ™ O + +# sent_id = 120 +15 O +. O +00 O +- O +16 O +. O +30 O +Debata O +panelowa O +: O +jak B-nam_pro_title +wprowadzić I-nam_pro_title +otwartÄ… I-nam_pro_title +naukÄ™ I-nam_pro_title +do I-nam_pro_title +Polski I-nam_pro_title +? I-nam_pro_title + +# sent_id = 121 +Prowadzenie O +: O +Edwin B-nam_liv_person +Bendyk I-nam_liv_person +( O +†O +Polityka O +†O +, O +Collegium B-nam_org_organization +Civitas I-nam_org_organization +) O + +# sent_id = 122 +Zaproszeni O +uczestnicy O +: O +Krzysztof B-nam_liv_person +Gulda I-nam_liv_person +( O +Dyrektor O +, O +Departament B-nam_org_institution +Strategii I-nam_org_institution +i I-nam_org_institution +Rozwoju I-nam_org_institution +Nauki I-nam_org_institution +MNiSW B-nam_org_institution +) O +, O +Juliusz B-nam_liv_person +Braun I-nam_liv_person +( O +Dyrektor O +, O +Departament B-nam_org_institution +Strategii I-nam_org_institution +i I-nam_org_institution +Analiz I-nam_org_institution +MKiDN B-nam_org_institution +) O +, O +Prof O +. O +Marek B-nam_liv_person +Niezgódka I-nam_liv_person +( O +Dyrektor O +, O +Interdyscyplinarne B-nam_org_institution +Centrum I-nam_org_institution +Modelowania I-nam_org_institution +Matematycznego I-nam_org_institution +i I-nam_org_institution +Komputerowego I-nam_org_institution +, O +Uniwersytet B-nam_org_organization +Warszawski I-nam_org_organization +) O +, O +PaweÅ‚ B-nam_liv_person +SzczÄ™sny I-nam_liv_person +( O +ZakÅ‚ad B-nam_org_institution +Bioinformatyki I-nam_org_institution +Instytutu B-nam_org_institution +Biochemii I-nam_org_institution +i I-nam_org_institution +Biofizyki I-nam_org_institution +PAN B-nam_org_organization +, O +WydziaÅ‚ B-nam_org_institution +Biologii I-nam_org_institution +UW B-nam_org_organization +) O +. O + +# sent_id = 367 +Zwolennicy O +Kondratiewowskiego O +determinizmu O +skupujÄ… O +zÅ‚oto O +, O +keynesiÅ›ci O +drukujÄ… O +pieniÄ…dze O +, O +pozostaÅ‚e O +przy O +życiu O +banki O +zapewne O +poszukujÄ… O +nowych O +statystyków O +, O +może O +ich O +specjaliÅ›ci O +od O +stochastyki O +siÄ™ O +jednak O +pomylili O +, O +a O +inni O +już O +na O +pewno O +przewidzÄ… O +wszystko O +wÅ‚aÅ›ciwie O +. O + +# sent_id = 368 +Ekonomia O +byÅ‚a O +z O +zaÅ‚ożenia O +naukÄ… O +spoÅ‚ecznÄ… O +. O + +# sent_id = 369 +OpisywaÅ‚a O +i O +analizowaÅ‚a O +, O +byÅ‚a O +naukÄ… O +empirycznÄ… O +i O +raczej O +wszechstronnÄ… O +. O + +# sent_id = 370 +Wszechstronność O +czasem O +wychodziÅ‚a O +bokiem O +, O +nadmierny O +zapaÅ‚ O +filozoficzny O +nie O +uÅ‚atwia O +analizy O +, O +w O +przeciwieÅ„stwie O +do O +czystego O +i O +eleganckiego O +wzoru O +matematycznego O +. O + +# sent_id = 371 +ÅšcisÅ‚e O +metody O +okazaÅ‚y O +siÄ™ O +idealnie O +dopasowane O +do O +prostych O +zaÅ‚ożeÅ„ O +ekonomii O +, O +a O +im O +wiÄ™kszy O +postÄ™p O +dokonywaÅ‚ O +siÄ™ O +w O +matematyce O +, O +tym O +bardziej O +skomplikowane O +problemy O +ekonomiczne O +można O +byÅ‚o O +tÅ‚umaczyć O +w O +sposób O +" O +Å›cisÅ‚y O +" O +. O + +# sent_id = 372 +W O +koÅ„cu O +doszÅ‚o O +do O +tego O +, O +że O +model O +matematyczny O +staÅ‚ O +siÄ™ O +podstawÄ… O +każdej O +analizy O +. O + +# sent_id = 373 +Wszystko O +zaczÄ™to O +sprowadzać O +do O +funkcji O +, O +wzór O +i O +jego O +opis O +staÅ‚ O +siÄ™ O +kanonem O +artykuÅ‚u O +z O +ekonomii O +. O + +# sent_id = 374 +Analiza O +przyczyn O +, O +których O +nie O +da O +siÄ™ O +ująć O +w O +sposób O +statystyczny O +zaczęła O +zanikać O +. O + +# sent_id = 375 +NarzÄ™dzia O +, O +które O +miaÅ‚y O +być O +tylko O +pomocÄ… O +, O +staÅ‚y O +siÄ™ O +celem O +samym O +w O +sobie O +. O + +# sent_id = 376 +Ekonomia O +staÅ‚a O +siÄ™ O +naukÄ… O +o O +modelach O +, O +modelach O +dopasowanych O +do O +danych O +z O +przeszÅ‚oÅ›ci O +. O + +# sent_id = 377 +JeÅ›li O +model O +nie O +przewidywaÅ‚ O +skutecznie O +przyszÅ‚oÅ›ci O +, O +tym O +gorzej O +dla O +przyszÅ‚oÅ›ci O +, O +zawsze O +znalazÅ‚ O +siÄ™ O +ekonomista O +- O +matematyk O +, O +który O +dodaÅ‚ O +do O +modelu O +coÅ› O +nowego O +, O +co O +uzasadniÅ‚o O +jego O +stosowanie O +na O +dalsze O +lata O +. O + +# sent_id = 378 +EkonomiÅ›ci O +prÄ™dzej O +potrafili O +by O +wytÅ‚umaczyć O +teoriÄ™ O +strun O +niż O +spoÅ‚eczne O +skutki O +kryzysu O +paliwowego O +. O + +# sent_id = 379 +Wielkie O +obliczenia O +nie O +sprawdziÅ‚y O +siÄ™ O +w O +przypadku O +caÅ‚ych O +systemów O +ekonomicznych O +, O +które O +planowaÅ‚y O +przyszÅ‚ość O +i O +Å›lepo O +te O +plany O +realizowaÅ‚y O +. O + +# sent_id = 380 +Kryzys O +dowodzi O +, O +że O +nie O +sprawdziÅ‚y O +siÄ™ O +też O +w O +skali O +przedsiÄ™biorstw O +. O + +# sent_id = 381 +Czemu O +jednak O +siÄ™ O +dziwić O +- O +wiÄ™kszość O +ogÅ‚oszeÅ„ O +z O +ofertami O +pracy O +dla O +" O +analityków O +ekonomicznych O +" O +wymaga O +wyksztaÅ‚cenia O +w O +dziedzinie O +" O +matematyki O +, O +matematyki O +stosowanej O +lub O +ekonometrii O +" O +. O + +# sent_id = 382 +W O +takiej O +kolejnoÅ›ci O +i O +bez O +ekonomii O +. O + +# sent_id = 383 +Tymczasem O +teraz O +, O +ci O +wszyscy O +matematycy O +siÄ™gajÄ… O +po O +wyjaÅ›nienia O +raczej O +filozoficzne O +. O + +# sent_id = 384 +Przecież O +" O +po O +każdej O +hossie O +nadchodzi O +bessa O +" O +. O + +# sent_id = 408 +GÅ‚owi O +siÄ™ O +taki O +Benkler B-nam_liv_person +, O +jak O +tu O +pomóc O +biednym O +krajom O +w O +tworzeniu O +odpowiednich O +roÅ›lin O +, O +wymyÅ›la O +peer O +production O +, O +dzielenie O +siÄ™ O +licencjami O +przez O +uniwersytety O +, O +udostÄ™pnianie O +nasion O +, O +które O +i O +tak O +siÄ™ O +na O +nic O +krajom O +rozwiniÄ™tym O +nie O +przydadzÄ… O +, O +a O +tymczasem O +wÅ‚adze O +WrocÅ‚awia B-nam_loc_gpe_city +znalazÅ‚y O +nowy O +, O +lepszy O +sposób O +. O + +# sent_id = 409 +Obejmuje O +on O +pÅ‚acenie O +wiedzÄ… O +za O +gÅ‚osy O +krajów O +biednych O +niezbÄ™dne O +do O +organizacji O +wielkich O +imprez O +. O + +# sent_id = 410 +WrocÅ‚awscy O +naukowcy O +z O +Uniwersytetu B-nam_org_organization +Przyrodniczego I-nam_org_organization +opracowujÄ… O +specjalne O +odmiany O +ziemniaków O +dla O +Madagaskaru B-nam_loc_gpe_country +- O +w O +zamian O +za O +gÅ‚os O +tego O +kraju O +w O +wyborach O +organizatora O +EXPO B-nam_eve_human +2012 I-nam_eve_human +. O + +# sent_id = 411 +Dotychczas O +podobny O +proces O +miaÅ‚ O +oczywiÅ›cie O +miejsce O +- O +chociaż O +wiÄ™kszość O +krajów O +bogatych O +stawiaÅ‚a O +raczej O +na O +rozdawanie O +mercedesów B-nam_pro_brand +, O +wycieczek O +lub O +wiz O +bezpoÅ›rednio O +gÅ‚osujÄ…cym O +, O +zamiast O +pÅ‚acić O +krajowi O +, O +z O +którego O +pochodzili O +. O + +# sent_id = 412 +Teraz O +jednak O +można O +ustanowić O +specjalny O +cennik O +- O +gÅ‚os O +na O +letnie O +igrzyska O +- O +zezwolenie O +na O +produkcjÄ™ O +generyków O +potrzebnych O +w O +leczeniu O +AIDS B-nam_oth +lub O +podjÄ™cie O +prac O +nad O +szczepionkÄ… O +na O +malariÄ™ O +, O +igrzyska O +zimowe O +- O +licencja O +na O +produkcjÄ™ O +nasion O +specjalnych O +odmian O +ryżu O +lub O +pszenicy O +, O +EXPO B-nam_eve_human +- O +specjalna O +odmiana O +ziemniaka O +lub O +kapusty O +, O +maÅ‚e O +EXPO B-nam_eve_human +- O +licencja O +na O +groszek O +ozdobny O +lub O +saÅ‚atÄ™ O +. O + +# sent_id = 413 +PIPRA O +, O +CGIAR O +i O +BIOS O +przestanÄ… O +być O +potrzebne O +, O +jest O +tyle O +krajów O +, O +których O +gÅ‚osy O +można O +kupić O +przed O +każdÄ… O +ważnÄ… O +imprezÄ… O +( O +a O +można O +przecież O +wprowadzić O +gÅ‚osowanie O +na O +organizatorów O +mistrzostw O +Å›wiata O +w O +piÅ‚ce O +nożnej O +itp O +. O +) O +, O +że O +prace O +nad O +żywnoÅ›ciÄ… O +i O +lekami O +w O +krajach O +rozwijajÄ…cych O +siÄ™ O +wreszcie O +nabiorÄ… O +tempa O +. O + +# sent_id = 414 +OczywiÅ›cie O +plan O +WrocÅ‚awia B-nam_loc_gpe_city +ma O +sÅ‚aby O +punkt O +- O +ziemniaki O +, O +ziemniakami O +, O +ale O +przedstawiciel O +Madagaskaru B-nam_loc_gpe_country +za O +hyundaia B-nam_pro_brand +może O +przecież O +pomylić O +przyciski O +. O +. O +. O + +# sent_id = 477 +ZresztÄ… O +" O +przeniesienie O +do O +sieci O +" O +nie O +nastÄ…pi O +, O +tak O +jak O +nie O +nastÄ…piÅ‚o O +" O +przeniesienie O +na O +komputery O +" O +. O + +# sent_id = 478 +W O +pewnym O +momencie O +ludzie O +znajdÄ… O +siÄ™ O +w O +sieci O +, O +mnóstwo O +rzeczy O +zostanie O +offline O +i O +nigdy O +siÄ™ O +w O +sieci O +nie O +znajdzie O +. O + +# sent_id = 479 +Po O +prostu O +zaczniemy O +używać O +sieci O +zamiast O +dysku O +- O +już O +teraz O +pewnie O +z O +czegoÅ› O +tam O +każdy O +korzysta O +, O +jak O +nie O +z O +bramki O +pocztowej O +( O +kiedyÅ› O +wiÄ™kszość O +ludzi O +jednak O +używaÅ‚a O +outlooków O +itp O +. O +) O +, O +to O +z O +przeglÄ…darki O +zdjęć O +. O + +# sent_id = 480 +Przeniesienie O +( O +dla O +mnie O +) O +zakÅ‚ada O +podjÄ™cie O +decyzji O +o O +zmianie O +- O +coÅ› O +jak O +przejÅ›cie O +miÄ™dzy O +systemami O +operacyjnymi O +. O + +# sent_id = 481 +W O +przypadku O +sieci O +wiÄ™kszość O +ludzi O +tego O +nawet O +nie O +zauważy O +. O + +# sent_id = 482 +W O +sprawie O +metki O +. O + +# sent_id = 483 +Przeceniasz O +te O +podziaÅ‚y O +, O +bo O +ich O +reprezentanci O +najgÅ‚oÅ›niej O +krzyczÄ… O +. O + +# sent_id = 484 +WiÄ™kszość O +użytkowników O +komputerów O +nawet O +nie O +zauważy O +, O +jaki O +masz O +system O +operacyjny O +i O +nie O +zwraca O +na O +takie O +szczegóły O +żadnej O +uwagi O +. O + +# sent_id = 485 +Dopóki O +sÄ… O +w O +stanie O +znaleźć O +ikonÄ™ O +w O +stylu O +globu O +/ O +kompasu O +/ O +wielkiego O +" O +e O +" O +podpisanej O +" O +internet B-nam_oth_tech +" O +. O + +# sent_id = 486 +Tak O +jak O +dla O +wiÄ™kszoÅ›ci O +ludzi O +nie O +jest O +istotne O +kto O +zrobiÅ‚ O +pÅ‚ytÄ™ O +głównÄ… O +, O +pamięć O +czy O +dysk O +twardy O +, O +może O +wkrótce O +nie O +być O +istotne O +kto O +zrobiÅ‚ O +system O +operacyjny O +. O + +# sent_id = 487 +OczywiÅ›cie O +marki O +pozostanÄ… O +i O +dalej O +bÄ™dÄ… O +mieć O +znaczenie O +, O +ale O +na O +innym O +poziomie O +. O + +# sent_id = 488 +Apple B-nam_org_company +nie O +jest O +znany O +z O +produkcji O +systemu O +operacyjnego O +, O +ale O +caÅ‚ych O +rozwiÄ…zaÅ„ O +. O + +# sent_id = 489 +Microsoft B-nam_org_company +dążyÅ‚ O +do O +tego O +, O +żeby O +" O +pecet O +" O +byÅ‚ O +synonimem O +komputera O +z O +Windowsem B-nam_pro_software +( O +po O +angielsku O +w O +zasadzie O +jest O +) O +i O +to O +może O +okazać O +siÄ™ O +bÅ‚Ä™dem O +, O +bo O +coraz O +wiÄ™cej O +firm O +sprzedaje O +pecety O +z O +Linuksem B-nam_pro_software +( O +np O +. O +Dell B-nam_org_company +) O +i O +schizofrenia O +siÄ™ O +robi O +. O + +# sent_id = 490 +Ciekawe O +co O +bÄ™dzie O +dalej O +, O +biurowa O +wersja O +xboksa O +? O + +# sent_id = 491 +Nie O +dość O +, O +że O +browar O +, O +to O +jeszcze O +stary O +. O + +# sent_id = 492 +Galeria O +handlowo O +- O +artystyczna O +w O +Poznaniu B-nam_loc_gpe_city +wzbogaciÅ‚a O +siÄ™ O +o O +serwerowniÄ™ O +. O + +# sent_id = 493 +Znak O +czasów O +? O + +# sent_id = 494 +Do O +tej O +serwerowni O +trafiÅ‚a O +( O +wciąż O +tam O +zmierza O +? O +) O +nasza O +klasa O +. O + +# sent_id = 495 +Teraz O +polski B-nam_adj_country +serwis O +nareszcie O +wróciÅ‚ O +do O +macierzy O +( O +nomen O +omen O +) O +, O +tym O +bardziej O +wiÄ™c O +wzroÅ›nie O +mu O +pewnie O +liczba O +użytkowników O +. O + +# sent_id = 496 +Niedawno O +przeczytaÅ‚ O +em O +książkÄ™ O +pod O +sensacyjnym O +tytuÅ‚em O +" O +Who B-nam_pro_title_book +Controls I-nam_pro_title_book +the I-nam_pro_title_book +Internet I-nam_pro_title_book +? I-nam_pro_title_book +Illusions I-nam_pro_title_book +of I-nam_pro_title_book +a I-nam_pro_title_book +Borderless I-nam_pro_title_book +World I-nam_pro_title_book +" O +, O +w O +której O +prawnicy O +Goldsmith B-nam_liv_person +i O +Wu B-nam_liv_person +jasno O +i O +zupeÅ‚nie O +niesensacyjnie O +tÅ‚umaczÄ… O +, O +dlaczego O +internet B-nam_oth_tech +wciąż O +tkwi O +w O +ramach O +granic O +narodowych O +. O + +# sent_id = 497 +Nie O +chodzi O +bynajmniej O +tylko O +o O +Chiny B-nam_loc_gpe_country +, O +które O +sÄ… O +wygodnym O +punktem O +odniesienia O +dla O +paÅ„stw O +demokratycznych O +, O +gdzie O +też O +istnieje O +pokusa O +, O +żeby O +przykrÄ™cić O +Å›rubÄ™ O +, O +a O +zawsze O +można O +odwrócić O +uwagÄ™ O +wskazujÄ…c O +wiÄ™kszych O +zamordystów O +. O + +# sent_id = 498 +O O +przenosinach O +serwerów O +do O +Polski B-nam_loc_gpe_country +poinformowaÅ‚ O +mnie O +wÅ‚aÅ›nie O +mój O +host O +, O +który O +przy O +okazji O +uprzejmie O +prosi O +o O +wstrzymanie O +na O +czas O +przeprowadzki O +aktualizacji O +stron O +internetowych O +. O + +# sent_id = 499 +Już O +jednÄ… O +takÄ… O +przeprowadzkÄ™ O +przeżyÅ‚ O +em O +, O +od O +tego O +czasu O +nie O +mam O +oporów O +przed O +powierzaniem O +mojej O +poczty O +Wielkim O +i O +ZÅ‚ym O +Korporacjom O +. O + +# sent_id = 500 +Blogger O +i O +Wordpress O +zapraszajÄ… O +? O + +# sent_id = 501 +Co O +tam O +. O + +# sent_id = 502 +Teraz O +bÄ™dÄ™ O +wreszcie O +miaÅ‚ O +polski O +serwer O +dla O +polskiego O +bloga O +. O + +# sent_id = 503 +Internet B-nam_oth_tech +nie O +zna O +granic O +, O +przecież O +robaki O +bÄ™dÄ… O +wchodzić O +i O +tak O +, O +nic O +siÄ™ O +nie O +zmieni O +. O +. O +. O + +# sent_id = 504 +Gdyby O +m O +chciaÅ‚ O +zaistnieć O +miÄ™dzynarodowo O +, O +nie O +pisaÅ‚ O +by O +m O +raczej O +po O +polsku O +. O + +# sent_id = 505 +Sam O +jestem O +sobie O +winien O +, O +pierwszÄ… O +granicÄ™ O +ustanowiÅ‚ O +em O +sam O +. O + +# sent_id = 506 +JÄ™zyk O +, O +infrastruktura O +, O +systemy O +prawne O +, O +zestawy O +wartoÅ›ci O +- O +wszystko O +dzieli O +internet B-nam_oth_tech +na O +kawaÅ‚ki O +, O +co O +tylko O +powiela O +znany O +schemat O +. O + +# sent_id = 507 +NTSC B-nam_oth_tech +, O +110V O +, O +gniazdka O +elektryczne O +, O +ruch O +lewostronny O +. O + +# sent_id = 508 +Rozpoznawanie O +krajów O +wedÅ‚ug O +numerów O +IP B-nam_oth_tech +nie O +tylko O +uniemożliwia O +mi O +legalne O +Å›ciÄ…ganie O +seriali O +z O +NBC O +, O +ale O +zmienia O +interfejs O +miÄ™dzynarodowych O +portali O +, O +gdy O +jestem O +za O +granicÄ… O +( O +tak O +jakby O +nie O +można O +byÅ‚o O +posiÅ‚kować O +siÄ™ O +jÄ™zykiem O +przeglÄ…darki O +) O +. O + +# sent_id = 509 +Sprytne O +próbujÄ… O +być O +niektóre O +reklamy O +- O +pudÅ‚o O +, O +nie O +kupiÄ™ O +waszych O +towarów O +, O +jestem O +przybyszem O +z O +innego O +internetu B-nam_oth_tech +, O +nie O +rozumiem O +, O +i O +tak O +nie O +wysyÅ‚acie O +tego O +do O +Polski B-nam_loc_gpe_country +. O + +# sent_id = 510 +Czy O +nasza O +klasa O +bÄ™dzie O +chciaÅ‚a O +wyjść O +poza O +browar O +? O + +# sent_id = 511 +Czy O +potrzebne O +sÄ… O +jej O +inne O +internety B-nam_oth_tech +? O + +# sent_id = 512 +Ilu O +użytkowników O +Å‚Ä…czy O +siÄ™ O +z O +Wielkiej B-nam_loc_gpe_country +Brytanii I-nam_loc_gpe_country +i O +Irlandii B-nam_loc_gpe_country +? O + +# sent_id = 513 +W O +googlowym O +Open O +Social O +dominujÄ… O +firmy O +amerykaÅ„skie B-nam_adj_country +( O +z O +rodzynkami O +miÄ™dzy O +innymi O +z O +Brazylii B-nam_loc_gpe_country +i O +Indii B-nam_loc_gpe_country +) O +, O +wiÄ™c O +era O +open O +na O +razie O +raczej O +nie O +powtórzy O +sukcesu O +swej O +tenisowej O +imienniczki O +. O + +# sent_id = 514 +Infrastruktura O +internetowa O +w O +Stanach B-nam_loc_gpe_country +Zjednoczonych I-nam_loc_gpe_country +powstaÅ‚a O +w O +ramach O +baÅ„ki O +, O +u O +nas O +balon O +pompuje O +nasza O +klasa O +, O +może O +wiÄ™c O +popyt O +na O +porzÄ…dne O +Å‚Ä…cza O +rzeczywiÅ›cie O +wzroÅ›nie O +, O +a O +UKE B-nam_org_organization +niech O +zamieni O +kary O +finansowe O +na O +inwestycje O +w O +infrastrukturÄ™ O +, O +w O +tym O +Å‚Ä…cza O +, O +które O +nas O +Å‚Ä…czÄ… O +ze O +Å›wiatem O +zewnÄ™trznym O +. O + +# sent_id = 515 +A O +może O +lepiej O +zostać O +w O +naszym O +wÅ‚asnym O +starym O +browarze O +? O + +# sent_id = 532 +Z O +ciekawoÅ›ci O +napisaÅ‚ O +em O +nawet O +do O +kina O +Atom B-nam_fac_goe +, O +ale O +być O +może O +zajÄ™ci O +byli O +chodzeniem O +do O +kina O +i O +nie O +doczekaÅ‚ O +em O +siÄ™ O +odpowiedzi O +w O +sprawie O +foteli O +. O + +# sent_id = 533 +Nie O +wszystko O +stracone O +, O +w O +koÅ„cu O +nie O +każdy O +sprawdza O +spam O +codziennie O +. O + +# sent_id = 534 +Z O +tego O +jednak O +co O +zrozumiaÅ‚ O +em O +z O +jakichÅ› O +wystÄ™pujÄ…cych O +w O +sieci O +strzÄ™pów O +wypowiedzi O +wÅ‚aÅ›cicieli O +kina O +i O +przepisów O +o O +ochronie O +zabytków O +, O +wymiana O +foteli O +jest O +możliwa O +jak O +najbardziej O +, O +ale O +nie O +dość O +, O +że O +trudna O +administracyjnie O +, O +to O +jeszcze O +droga O +, O +bo O +jak O +siÄ™ O +konserwator O +zgodzi O +, O +to O +tylko O +na O +nowoczesne O +antyki O +. O + +# sent_id = 535 +ZresztÄ… O +z O +tych O +kawaÅ‚ków O +informacji O +wynika O +, O +że O +sala O +istnieje O +od O +1937 O +, O +czyli O +obchodzi O +okrÄ…gÅ‚Ä… O +rocznicÄ™ O +, O +co O +mogÅ‚o O +by O +stanowić O +dobry O +pretekst O +do O +namówienia O +jakiejÅ› O +instytucji O +na O +maÅ‚Ä… O +dotacjÄ™ O +. O + +# sent_id = 536 +Teraz O +jest O +oczywiÅ›cie O +za O +późno O +, O +budżety O +na O +ten O +rok O +dawno O +zamkniÄ™te O +, O +w O +piÅ‚kÄ™ O +w O +Atomie B-nam_fac_goe +zagrać O +nie O +można O +, O +elektrowni O +jÄ…drowej O +też O +w O +Polsce B-nam_loc_gpe_country +nie O +mamy O +. O + +# sent_id = 537 +Co O +do O +filmów O +i O +festiwalu O +. O + +# sent_id = 538 +Z O +filmami O +jest O +tak O +, O +że O +nie O +wiadomo O +. O + +# sent_id = 539 +PróbowaÅ‚ O +em O +sprawdzić O +, O +czy O +filmy O +siÄ™ O +nie O +nadajÄ… O +do O +oglÄ…dania O +, O +ale O +nie O +miaÅ‚em O +karnetu O +i O +napiÄ™ty O +grafik O +, O +i O +na O +żaden O +film O +nie O +udaÅ‚o O +mi O +siÄ™ O +na O +ostatniÄ… O +chwilÄ™ O +kupić O +biletów O +. O + +# sent_id = 540 +ObejrzaÅ‚ O +em O +wiÄ™c O +dwa O +filmy O +( O +plus O +jeden O +na O +Rynku B-nam_fac_square +, O +wiÄ™c O +siÄ™ O +nie O +liczy O +) O +, O +jeden O +dobry O +, O +a O +drugi O +bardzo O +dobry O +. O + +# sent_id = 541 +Drugi O +( O +Persepolis B-nam_pro_title +) O +wejdzie O +nawet O +do O +dystrybucji O +, O +wiÄ™c O +siÄ™ O +bÄ™dzie O +można O +naocznie O +przekonać O +. O + +# sent_id = 542 +KiedyÅ› O +na O +pewno O +by O +m O +wolaÅ‚ O +każdy O +film O +zobaczyć O +w O +kinie O +, O +teraz O +jednak O +nie O +chce O +mi O +siÄ™ O +, O +bo O +z O +jednej O +strony O +nie O +lubiÄ™ O +kameralnych O +filmów O +oglÄ…dać O +w O +multipleksie O +, O +z O +drugiej O +Atom B-nam_fac_goe +też O +nie O +jest O +moim O +ulubionym O +kinem O +. O + +# sent_id = 543 +A O +ponieważ O +raczej O +jest O +maÅ‚o O +prawdopodobne O +, O +żeby O +m O +byÅ‚ O +jedyny O +taki O +wybredny O +, O +wiÄ™c O +jakby O +mi O +ktoÅ› O +daÅ‚ O +nielegalne O +kopie O +filmów O +, O +na O +które O +siÄ™ O +nie O +zaÅ‚apaÅ‚ O +em O +( O +swojÄ… O +drogÄ… O +do O +dystrybucji O +pewnie O +nie O +wejdÄ… O +, O +DVD B-nam_oth_tech +w O +Polsce B-nam_loc_gpe_country +też O +ma O +maÅ‚e O +szanse O +) O +, O +to O +by O +m O +braÅ‚ O +, O +telewizor O +mi O +Å‚adnie O +odbiera O +. O + +# sent_id = 544 +Chodzi O +o O +to O +, O +że O +liczba O +osób O +, O +które O +zobaczyÅ‚y O +by O +film O +w O +kinie O +, O +gdyby O +miaÅ‚y O +lepszy O +wybór O +rodzajów O +kin O +jest O +pewnie O +na O +tyle O +duża O +, O +że O +może O +warto O +siÄ™ O +o O +nie O +starać O +bardziej O +. O + +# sent_id = 673 +ZachÄ™ceni O +przeÅ‚omowym O +pomysÅ‚em O +dystrybutorów O +DVD B-nam_oth_tech +, O +do O +niekonwencjonalnej O +i O +wreszcie O +skutecznej O +walki O +z O +piractwem O +postanowili O +wÅ‚Ä…czyć O +siÄ™ O +również O +inni O +posiadacze O +praw O +autorskich O +. O + +# sent_id = 674 +Åšladem O +Filipa B-nam_liv_person +II I-nam_liv_person +do O +walki O +rzucili O +wszystkie O +siÅ‚y O +. O + +# sent_id = 675 +Wytwórnie O +pÅ‚ytowe O +siÄ™gnęły O +po O +stare O +sprawdzone O +metody O +i O +do O +Å‚ask O +wróciÅ‚a O +kaseta O +magnetofonowa O +. O + +# sent_id = 676 +Nowoczesne O +technologie O +produkcji O +pozwoliÅ‚y O +jednak O +uzyskać O +taÅ›mÄ™ O +wystarczajÄ…co O +cienkÄ… O +, O +aby O +już O +po O +kilkunastu O +odtworzeniach O +rwaÅ‚a O +siÄ™ O +na O +kawaÅ‚ki O +. O + +# sent_id = 677 +W O +ten O +sposób O +na O +nowo O +zdefiniowane O +zostaÅ‚o O +pojÄ™cie O +remiksu O +. O + +# sent_id = 678 +Temu O +wciÄ…gajÄ…cemu O +zajÄ™ciu O +oddaje O +siÄ™ O +nowe O +pokolenie O +sÅ‚uchaczy O +muzyki O +. O + +# sent_id = 679 +NieÅ‚atwe O +zadanie O +czekaÅ‚o O +dystrybutorów O +muzyki O +w O +formie O +elektronicznej O +, O +ale O +i O +oni O +znaleźli O +sposób O +. O + +# sent_id = 680 +DRM O +odczytujÄ…cy O +odcisk O +palca O +w O +poÅ‚Ä…czeniu O +z O +recytacjÄ… O +Odysei B-nam_pro_title +wspak O +umożliwia O +odsÅ‚uchanie O +caÅ‚oÅ›ci O +utworu O +. O + +# sent_id = 681 +Prawdziwi O +melomani O +recytujÄ… O +także O +IliadÄ™ B-nam_pro_title +, O +EneidÄ™ B-nam_pro_title +, O +wzglÄ™dnie O +inne O +klasyczne O +heksametry O +. O + +# sent_id = 682 +W O +celu O +ochrony O +przed O +greko O +- O +Å‚aciÅ„skÄ… O +kakofoniÄ… O +coraz O +wiÄ™cej O +osób O +nabywa O +przenoÅ›ne O +odtwarzacze O +i O +zasila O +szeregi O +kupujÄ…cych O +pliki O +muzyczne O +. O + +# sent_id = 683 +Programy O +telewizyjne O +co O +45 O +sekund O +przerywajÄ… O +plansze O +ostrzegajÄ…ce O +o O +karach O +za O +naruszenie O +prawa O +autorskiego O +, O +równoczeÅ›nie O +na O +wszystkich O +kanaÅ‚ach O +. O diff --git a/notebooks/example_data/test.txt b/notebooks/example_data/test.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f1977f27c2b2f03040c0af5f4be0bf7a69e88b6 --- /dev/null +++ b/notebooks/example_data/test.txt @@ -0,0 +1,2062 @@ +# sent_id = 0 +W O +koÅ„cu O +wyszÅ‚o O +, O +do O +czego O +potrzebne O +byÅ‚o O +Google B-nam_org_company +Gears B-nam_pro_software +. O + +# sent_id = 1 +Dorzucono O +kilka O +bardziej O +smakowitych O +kÄ…sków O +i O +mamy O +wreszcie O +system O +operacyjny O +przeglÄ…darkÄ™ O +Google B-nam_org_company +Chrome B-nam_pro_software +. O + +# sent_id = 2 +Niezależnie O +od O +tego O +, O +czy O +bÄ™dzie O +to O +Å›miertelny O +cios O +dla O +Windows B-nam_pro_software +czy O +dla O +Firefoksa B-nam_pro_software +, O +program O +jest O +kolejnym O +zwiastunem O +zmian O +w O +interfejsie O +graficznym O +. O + +# sent_id = 3 +Po O +kilku O +minutach O +rzeczywiÅ›cie O +da O +siÄ™ O +odczuć O +szybkość O +wczytywania O +stron O +z O +JavaScriptem B-nam_oth_tech +, O +a O +każda O +zakÅ‚adka O +w O +oddzielnym O +procesie O +też O +brzmi O +nieźle O +( O +szczególnie O +pod O +Linuksem B-nam_pro_software +* O +, O +w O +którym O +Adobe B-nam_pro_software +Flash I-nam_pro_software +regularnie O +wywala O +przeglÄ…darki O +- O +niestety O +wersji O +dla O +Linuksa B-nam_pro_software +na O +razie O +brak O +) O +. O + +# sent_id = 4 +Beznadziejnie O +na O +pierwszy O +rzut O +oka O +wyglÄ…dajÄ… O +zakÅ‚adki O +( O +foldery O +? O +! O +) O +, O +ale O +być O +może O +trzymanie O +zakÅ‚adek O +w O +przeglÄ…darce O +jest O +już O +niemodne O +. O + +# sent_id = 5 +SwojÄ… O +drogÄ… O +integracji O +z O +del.icio.us O +też O +nie O +widać O +. O + +# sent_id = 6 +Motywacje O +Google B-nam_org_company +też O +sÄ… O +raczej O +jasne O +( O +konwergencja O +, O +znowu O +) O +- O +gdy O +Chrome B-nam_pro_software +ustawimy O +jako O +domyÅ›lnÄ… O +przeglÄ…darkÄ™ O +w O +Windows B-nam_pro_software +, O +nazwa O +" O +Internet B-nam_oth_tech +" O +w O +menu O +start O +nabierze O +wÅ‚aÅ›ciwego O +znaczenia O +. O +. O +. O + +# sent_id = 7 +Chrome B-nam_pro_software +wyglÄ…da O +Å‚adnie O +. O + +# sent_id = 8 +W O +nowym O +Firefoksie B-nam_pro_software +jest O +AwesomeBar B-nam_pro_software +, O +podobny O +pasek O +adresu O +jest O +też O +w O +Chrome B-nam_pro_software +. O + +# sent_id = 9 +Niby O +to O +nic O +wielkiego O +- O +użytkownicy O +Apple B-nam_pro_software +od O +dawna O +majÄ… O +Spotlight B-nam_pro_software +( O +zresztÄ… O +- O +to O +co O +w O +Windowsie B-nam_pro_software +nazywa O +siÄ™ O +" O +Explorer B-nam_pro_software +" O +, O +na O +Maku B-nam_pro_software +od O +dawna O +nazywa O +siÄ™ O +" O +Finder B-nam_pro_software +" O +) O +, O +w O +ViÅ›cie B-nam_pro_software +też O +jest O +nieÅ›miaÅ‚e O +okienko O +wyszukiwania O +( O +choć O +prezentacja O +wyników O +w O +postaci O +nudnej O +listy O +) O +, O +Google B-nam_org_company +Desktop B-nam_pro_software +istnieje O +już O +dÅ‚ugo O +, O +a O +w O +Linuksie B-nam_pro_software +gÅ‚owa O +wrÄ™cz O +boli O +od O +przybytku O +( O +Tracker B-nam_pro_software +, O +Beagle B-nam_pro_software +, O +Deskbar B-nam_pro_software +) O +, O +ale O +jedno O +jest O +pewne O +: O +rola O +pisania O +w O +interfejsie O +graficznym O +siÄ™ O +zwiÄ™ksza O +. O + +# sent_id = 10 +JeÅ›li O +AwesomeBar B-nam_pro_software +jest O +za O +maÅ‚o O +Awesome O +- O +można O +zainstalować O +Ubiquity B-nam_pro_software +. O + +# sent_id = 11 +JeÅ›li O +tekstowa O +obsÅ‚uga O +przeglÄ…darki O +to O +za O +maÅ‚o O +- O +na O +Maku B-nam_pro_software +jest O +Quicksilver B-nam_pro_software +, O +w O +Linuksie B-nam_pro_software +Gnome B-nam_pro_software +Do O +( O +w O +Windowsie B-nam_pro_software +na O +pewno O +też O +coÅ› O +jest O +) O +: O +piszesz O +i O +wybierasz O +z O +listy O +, O +program O +zapamiÄ™tuje O +zachowania O +użytkownika O +, O +czasem O +wystarczy O +kilka O +liter O +. O + +# sent_id = 12 +Szukanie O +nie O +sÅ‚uży O +już O +tylko O +znalezieniu O +rzeczy O +, O +które O +gdzieÅ› O +siÄ™ O +zapodziaÅ‚y O +, O +szukanie O +zastÄ™puje O +przeglÄ…danie O +. O + +# sent_id = 13 +Przecież O +przed O +pojawieniem O +siÄ™ O +Google B-nam_pro_software +, O +internet B-nam_oth_tech +byÅ‚ O +katalogowany O +, O +dopiero O +później O +zostaÅ‚ O +tak O +naprawdÄ™ O +zindeksowany O +. O + +# sent_id = 14 +Być O +może O +to O +stara O +forma O +internetu B-nam_oth_tech +byÅ‚a O +przyczynÄ… O +porażki O +microsoftowego O +Active O +Desktop O +. O + +# sent_id = 15 +W O +koÅ„cu O +kiepskie O +wykonanie O +, O +wymuszanie O +niestandardowych O +formatów O +i O +zasobożerność O +innym O +produktom O +MS B-nam_org_company +nie O +zaszkodziÅ‚y O +. O +. O +. O + +# sent_id = 16 +Tymczasem O +teraz O +nasze O +dyski O +sÄ… O +już O +zindeksowane O +, O +czas O +wyrobić O +odpowiednie O +nawyki O +. O + +# sent_id = 17 +A O +wtedy O +granica O +miÄ™dzy O +online O +i O +offline O +zatrze O +siÄ™ O +jeszcze O +bardziej O +. O + +# sent_id = 18 +I O +kto O +na O +tym O +skorzysta O +? O + +# sent_id = 19 +* O +Firma O +Google B-nam_org_company +zapisuje O +Twój O +adres O +w O +celu O +przesyÅ‚ania O +wiadomoÅ›ci O +dotyczÄ…cych O +przeglÄ…darki O +Google B-nam_org_company +Chrome B-nam_pro_software +oraz O +inforlinuxji O +o O +aktualizacjach O +i O +wydaniu O +gotowej O +wersji O +. O + +# sent_id = 20 +Podanie O +adresu O +oznacza O +wyrażenie O +zgody O +na O +otrzymywanie O +e O +- O +maili O +zawierajÄ…cych O +inforlinuxje O +tego O +typu O +. O + +# sent_id = 21 +Google B-nam_org_company +bÄ™dzie O +przechowywać O +Twój O +adres O +e O +- O +mail O +przez O +pewien O +czas O +po O +wydaniu O +przeglÄ…darki O +Google B-nam_org_company +Chrome B-nam_pro_software +dla O +systemu O +Linux B-nam_pro_software +, O +a O +nastÄ™pnie O +go O +usunie O +. O + +# sent_id = 22 +WiÄ™cej O +inforlinuxji O +na O +temat O +przechowywania O +danych O +użytkowników O +przez O +Google B-nam_org_company +można O +znaleźć O +w O +zasadach O +ochrony O +prywatnoÅ›ci O +. O + +# sent_id = 23 +Subiektywny O +przeglÄ…d O +kaw O +z O +mlekiem O +i O +rogalików O +. O + +# sent_id = 24 +Rogalik O +spÅ‚aszczony O +przez O +toster O +, O +cukier O +z O +wierzchu O +lekko O +przypalony O +, O +w O +stylu O +crema O +catalana O +. O + +# sent_id = 25 +CiepÅ‚y O +, O +podany O +ze O +sztućcami O +. O + +# sent_id = 26 +PociÄ™ty O +w O +paski O +. O + +# sent_id = 27 +Rogalik O +pociÄ™ty O +w O +paski O +, O +na O +zimno O +. O + +# sent_id = 28 +Podany O +ze O +sztućcami O +. O + +# sent_id = 29 +Zjadany O +w O +centrum O +przed O +wyjazdem O +autobusu O +na O +plażę O +. O + +# sent_id = 30 +Rogalik O +zwykÅ‚y O +i O +wymiÄ™toszony O +, O +do O +rÄ™ki O +w O +serwetce O +, O +kawa O +w O +szczycie O +dÅ‚ugiej O +przerwy O +podana O +w O +plastikowym O +kubku O +. O + +# sent_id = 31 +Ale O +za O +to O +caÅ‚y O +zestaw O +za O +marne O +1 O +euro B-nam_oth_currency +. O + +# sent_id = 32 +Stołówka O +uniwersytecka O +przerabia O +kawÄ™ O +i O +rogaliki O +w O +tempie O +kawiarni O +na O +dużym O +dworcu O +. O + +# sent_id = 33 +Kawa O +z O +mlekiem O +ciepÅ‚ym O +lub O +zimnym O +, O +cortado O +( O +maÅ‚o O +mleka O +) O +, O +cortado O +z O +mlekiem O +skondensowanym O +( O +ale O +naprawdÄ™ O +skondensowanym O +, O +cortado O +podaje O +siÄ™ O +w O +maÅ‚ych O +szklaneczkach O +, O +na O +dnie O +gruba O +warstwa O +mleka O +, O +trzeba O +energicznie O +wymieszać O +) O +, O +kawa O +( O +czyli O +solo O +, O +czyli O +espresso O +) O +. O +. O +. O + +# sent_id = 34 +Wszystko O +po O +pięćdziesiÄ…t O +centów O +. O + +# sent_id = 35 +Inna O +uniwersytecka O +stołówka O +, O +100 O +metrów O +dalej O +, O +ale O +ze O +stolikami O +na O +dworze O +. O + +# sent_id = 36 +Ceny O +zupeÅ‚nie O +inne O +- O +kawa O +z O +mlekiem O +i O +rogalik O +za O +1 O +, O +45 O +euro B-nam_oth_currency +, O +różne O +ceny O +na O +różne O +rodzaje O +kaw O +. O + +# sent_id = 37 +Ale O +siedzenie O +na O +sÅ‚oÅ„cu O +, O +w O +grudniu O +- O +bezcenne O +. O + +# sent_id = 38 +I O +jeszcze O +Å›rodek O +niedzieli O +- O +Alek B-nam_liv_person +Tarkowski I-nam_liv_person +zwróciÅ‚ O +uwagÄ™ O +na O +przejmowanie O +przez O +banki O +przestrzeni O +publicznej O +w O +Warszawie B-nam_loc_gpe_city +. O + +# sent_id = 39 +W O +niedzielÄ™ O +, O +okoÅ‚o O +godziny O +14 O +zachciaÅ‚o O +nam O +siÄ™ O +wyjść O +na O +kawÄ™ O +, O +a O +dzielnica O +jest O +dopiero O +w O +budowie O +( O +ma O +Å‚Ä…czyć O +wybudowany O +w O +szczerym O +polu O +uniwersytet O +oraz O +miasto O +) O +. O + +# sent_id = 40 +Na O +rondzie O +przy O +głównym O +wjeździe O +na O +teren O +uniwersytetu O +sÄ… O +dwa O +banki O +. O + +# sent_id = 41 +Na O +ulicy O +, O +która O +bÄ™dzie O +prowadzić O +do O +miasta O +, O +sÄ… O +jeszcze O +dwa O +, O +mimo O +że O +nie O +da O +siÄ™ O +niÄ… O +na O +razie O +jeździć O +. O + +# sent_id = 42 +Ale O +kawÄ™ O +( O +bez O +rogalika O +) O +udaÅ‚o O +nam O +siÄ™ O +znaleźć O +w O +jednym O +z O +dziewiÄ™ciu O +barów O +, O +które O +znajdujÄ… O +siÄ™ O +promieniu O +500 O +metrów O +od O +mieszkania O +( O +wiÄ™kszość O +byÅ‚a O +zamkniÄ™ta O +, O +bo O +niedziela O +/ O +sjesta O +, O +jeden O +nam O +siÄ™ O +nie O +podobaÅ‚ O +itp O +. O +) O +. O + +# sent_id = 43 +Wszystkie O +budynki O +( O +z O +których O +znaczna O +część O +nie O +jest O +zamieszkana O +) O +majÄ… O +przeznaczone O +miejsce O +na O +usÅ‚ugi O +/ O +sklepy O +, O +wiÄ™c O +można O +siÄ™ O +spodziewać O +, O +że O +za O +rok O +bÄ™dzie O +już O +można O +wybierać O +spoÅ›ród O +20 O +barów O +, O +rogalików O +na O +ciepÅ‚o O +i O +na O +zimno O +, O +rogalików O +pociÄ™tych O +w O +paseczki O +. O +. O +. O + +# sent_id = 44 +Koniec O +napisów O +. O + +# sent_id = 45 +Już O +od O +dawna O +trwaÅ‚a O +walka O +z O +amatorami O +tworzÄ…cymi O +napisy O +do O +filmów O +, O +teraz O +skoÅ„czyÅ‚ O +siÄ™ O +czas O +polemik O +w O +gazecie O +, O +pora O +na O +salÄ™ O +sÄ…dowÄ… O +. O + +# sent_id = 46 +OczywiÅ›cie O +to O +tylko O +jedno O +źródÅ‚o O +napisów O +, O +ale O +" O +sprawa O +jest O +rozwojowa O +" O +. O + +# sent_id = 47 +Åatwiej O +bÄ™dzie O +zapewne O +nauczyć O +siÄ™ O +angielskiego O +, O +niemieckiego O +, O +francuskiego O +, O +hiszpaÅ„skiego O +, O +wÅ‚oskiego O +, O +japoÅ„skiego O +i O +czeskiego O +( O +na O +poczÄ…tek O +powinno O +wystarczyć O +) O +niż O +zmienić O +prawo O +autorskie O +. O + +# sent_id = 48 +Tymczasem O +ze O +wszystkich O +ogniw O +" O +pirackiego O +" O +Å‚aÅ„cucha O +tÅ‚umacze O +napisów O +sÄ… O +pewnie O +najmniej O +groźni O +, O +ale O +za O +to O +najÅ‚atwiejsi O +do O +wyÅ›ledzenia O +. O + +# sent_id = 49 +I O +jak O +zabierze O +siÄ™ O +napisy O +, O +na O +pewno O +spadnie O +liczba O +Å›ciÄ…ganych O +i O +udostÄ™pnianych O +filmów O +. O + +# sent_id = 50 +Na O +pewno O +. O + +# sent_id = 51 +W O +okolicy O +obowiÄ…zkowo O +jest O +grób O +Hamleta B-nam_liv_person +, O +wiatraki O +, O +a O +domy O +kryte O +sÄ… O +strzechÄ… O +lub O +azbestem O +. O + +# sent_id = 52 +Każdy O +nosi O +ze O +sobÄ… O +termos O +, O +z O +prohibicjÄ… O +ma O +on O +jednak O +niewiele O +wspólnego O +, O +w O +Å›rodku O +jest O +kawa O +, O +bez O +której O +przeciÄ™tny O +DuÅ„czyk O +nie O +przetrwaÅ‚ O +by O +nawet O +godziny O +. O + +# sent_id = 53 +Kawa O +jest O +smolista O +, O +raczej O +przyzwoita O +i O +jak O +na O +polskie O +standardy O +raczej O +mocna O +, O +chociaż O +fani O +espresso O +mogÄ… O +poczuć O +siÄ™ O +obrażeni O +takim O +stwierdzeniem O +. O + +# sent_id = 54 +KawÄ™ O +podaje O +siÄ™ O +przy O +każdej O +okazji O +, O +również O +w O +porze O +na O +kawÄ™ O +( O +o O +15 O +) O +, O +do O +kawy O +sÄ… O +wówczas O +buÅ‚ki O +z O +serem O +żółtym O +i O +marmoladÄ… O +, O +ewentualnie O +ciasto O +. O + +# sent_id = 55 +Mleka O +do O +kawy O +siÄ™ O +nie O +dolewa O +, O +w O +koÅ„cu O +jest O +w O +serze O +. O + +# sent_id = 56 +Ser O +to O +tylko O +jeden O +element O +deja O +vu O +, O +które O +napadÅ‚o O +mnie O +w O +Danii B-nam_loc_gpe_country +, O +gdzie O +zaczÄ…Å‚ O +em O +czytać O +nazwy O +zgodnie O +z O +wymowÄ… O +holenderskÄ… O +. O + +# sent_id = 57 +Kawa O +zresztÄ… O +też O +podobna O +, O +wioski O +też O +jak O +z O +Might B-nam_pro_software +and I-nam_pro_software +Magic I-nam_pro_software +, O +szosy O +peÅ‚ne O +opli O +astra B-nam_pro_brand +i O +fordów O +focusów B-nam_pro_brand +, O +nawet O +zioÅ‚owa O +wódka O +taka O +sama O +. O + +# sent_id = 58 +I O +oczywiÅ›cie O +ser O +z O +kminkiem O +. O + +# sent_id = 59 +Goudse O +belegen O +met O +komijn O +byÅ‚ O +zawsze O +moim O +holenderskim O +faworytem O +, O +w O +Danii B-nam_loc_gpe_country +sery O +również O +wystÄ™pujÄ… O +w O +różnym O +stopniu O +dojrzaÅ‚oÅ›ci O +, O +a O +dojrzaÅ‚y O +danbo O +. O +. O +. O + +# sent_id = 60 +Wystarczy O +uchylić O +lodówkÄ™ O +, O +żeby O +wiedzieć O +, O +że O +zostaÅ‚ O +jeszcze O +kawaÅ‚ek O +. O + +# sent_id = 61 +Szkoda O +, O +że O +taki O +maÅ‚y O +. O + +# sent_id = 62 +W O +Danii B-nam_loc_gpe_country +trafiÅ‚ O +em O +akurat O +na O +ekspresowe O +żniwa O +, O +caÅ‚y O +lipiec O +padaÅ‚o O +, O +musieli O +nadrabiać O +. O + +# sent_id = 63 +PatrzÄ…c O +na O +gigantyczne O +traktory O +i O +imperialne O +kombajny O +trudno O +zrozumieć O +duÅ„ski O +dystans O +wobec O +Unii B-nam_org_organization +Europejskiej I-nam_org_organization +. O + +# sent_id = 64 +Chociaż O +rozmawiaÅ‚ O +em O +z O +mÅ‚odym O +rolnikiem O +, O +który O +ma O +dwadzieÅ›cia O +hektarów O +, O +biegle O +mówi O +po O +angielsku O +i O +na O +wakacje O +jeździ O +do O +Austrii B-nam_loc_gpe_country +na O +narty O +. O + +# sent_id = 65 +Uprawia O +ziemiÄ™ O +, O +bo O +lubi O +, O +uważa O +to O +za O +ciekawÄ… O +pracÄ™ O +i O +nie O +ma O +nic O +do O +UE B-nam_org_organization +. O + +# sent_id = 66 +ZresztÄ… O +duÅ„scy B-nam_adj_country +rolnicy O +, O +nie O +tylko O +mÅ‚odzi O +, O +wyposażeni O +sÄ… O +w O +komputery O +i O +staÅ‚e O +Å‚Ä…cza O +, O +na O +dodatek O +potrafiÄ… O +z O +nich O +korzystać O +. O + +# sent_id = 67 +MajÄ… O +swoje O +strony O +internetowe O +, O +wprawdzie O +na O +etapie O +1 B-nam_oth_tech +. I-nam_oth_tech +0 I-nam_oth_tech +, O +nikt O +mnie O +do O +facebooka O +nie O +zaprosiÅ‚ O +, O +ale O +sieciowe O +umiejÄ™tnoÅ›ci O +sÄ… O +tam O +nieporównywalnie O +wiÄ™ksze O +niż O +w O +Polsce B-nam_loc_gpe_country +. O + +# sent_id = 68 +Poza O +tym O +Dania B-nam_loc_gpe_country +to O +spokojne O +wakacje O +( O +jeÅ›li O +zignorujemy O +to O +, O +że O +nasz O +domek O +wakacyjny O +może O +być O +pokryty O +azbestem O +) O +, O +byÅ‚ O +em O +w O +szczycie O +sezonu O +, O +w O +weekend O +, O +pogoda O +momentami O +niezÅ‚a O +, O +a O +plaże O +i O +deptaki O +niemal O +puste O +. O + +# sent_id = 69 +We O +wszystkich O +wioskach O +osiedla O +domków O +letniskowych O +, O +kemping O +co O +dwa O +kilometry O +, O +ale O +znikÄ…d O +nie O +dobiega O +basowe O +dudnienie O +, O +może O +byÅ‚ O +em O +na O +zbyt O +gÅ‚Ä™bokiej O +prowincji O +, O +a O +może O +rozrywkowi O +Niemcy B-nam_org_nation +, O +DuÅ„czycy B-nam_org_nation +i O +Holendrzy B-nam_org_nation +jeżdżą O +raczej O +do O +Hiszpanii B-nam_loc_gpe_country +. O + +# sent_id = 70 +Morza O +szum O +( O +w O +koÅ„cu O +byÅ‚ O +em O +nad O +BaÅ‚tykiem O +) O +, O +tÅ‚uste O +jedzenie O +- O +bo O +oprócz O +serów O +wystÄ™puje O +sporo O +miÄ™sa O +, O +kotletów O +mielonych O +w O +różnych O +postaciach O +, O +boczku O +w O +grubych O +plastrach O +, O +piwo O +, O +które O +niegdyÅ› O +sprowadzaÅ‚o O +siÄ™ O +do O +dwóch O +różnych O +marek O +( O +w O +tym O +tego O +w O +prawdopodobnie O +najlepszej O +butelce O +) O +, O +a O +teraz O +rzekomo O +co O +tydzieÅ„ O +powstaje O +( O +wskrzesza O +siÄ™ O +? O +) O +nowy O +lokalny O +browar O +, O +chociaż O +trzeba O +uważać O +, O +bo O +ceny O +lokalnych O +specjałów O +raczej O +nie O +dla O +nas O +, O +chyba O +że O +w O +przeliczeniu O +na O +procenty O +, O +bo O +wtedy O +niektórym O +piwom O +bliżej O +zdecydowanie O +do O +wina O +, O +a O +smak O +bynajmniej O +nie O +przypomina O +polskich O +" O +mocnych O +" O +i O +powiewajÄ…ce O +wszÄ™dzie O +, O +przed O +domami O +, O +za O +domami O +i O +obok O +domów O +, O +duÅ„skie O +flagi O +. O + +# sent_id = 71 +Czy O +piosenka O +to O +samochód O +? O + +# sent_id = 72 +Problem O +, O +czy O +chronić O +informacje O +jak O +wÅ‚asność O +materialnÄ… O +nie O +jest O +nowy O +. O + +# sent_id = 73 +Dla O +amerykaÅ„skiego B-nam_adj_country +prawnika O +, O +który O +przerabia O +ekonomicznÄ… O +analizÄ™ O +prawa O +na O +studiach O +, O +to O +oczywistość O +. O + +# sent_id = 74 +Polski B-nam_adj_country +prawnik O +na O +studiach O +miaÅ‚ O +może O +wprowadzenie O +do O +mikro O +i O +makro O +, O +wiÄ™c O +przy O +odrobinie O +szczęścia O +odróżni O +inflacjÄ™ O +od O +elastycznoÅ›ci O +popytu O +. O + +# sent_id = 75 +W O +zwiÄ…zku O +z O +tym O +w O +Polsce B-nam_loc_gpe_country +można O +zawsze O +liczyć O +na O +to O +, O +że O +prawnik O +ZAiKSu B-nam_org_organization +powie O +, O +że O +przecież O +to O +oczywiste O +, O +że O +samochód O +i O +zdjÄ™cie O +to O +jest O +dokÅ‚adnie O +to O +samo O +. O + +# sent_id = 76 +Bo O +przyszedÅ‚ O +do O +niego O +zapÅ‚akany O +fotograf O +. O + +# sent_id = 77 +Bo O +gdyby O +chciaÅ‚ O +wejść O +do O +czyjegoÅ› O +samochodu O +, O +to O +musiaÅ‚ O +by O +zapytać O +o O +zgodÄ™ O +. O + +# sent_id = 78 +MusiaÅ‚ O +by O +zapytać O +o O +zgodÄ™ O +wÅ‚aÅ›ciciela O +? O + +# sent_id = 79 +A O +dlaczego O +samochód O +ma O +wÅ‚aÅ›ciciela O +? O + +# sent_id = 80 +Dlaczego O +o O +zgodÄ™ O +, O +oprócz O +użytkownika O +, O +nie O +trzeba O +pytać O +wiÄ™c O +producenta O +samochodu O +? O + +# sent_id = 81 +Albo O +koproducentów O +, O +producenta O +klamki O +, O +tapicerki O +? O + +# sent_id = 82 +OczywiÅ›cie O +, O +szybko O +można O +znaleźć O +wyjÄ…tki O +. O + +# sent_id = 83 +Samochód O +wypożyczony O +albo O +samochód O +w O +leasingu O +być O +może O +jest O +bardziej O +podobny O +do O +utworu O +, O +którego O +licencjÄ™ O +kupujemy O +. O + +# sent_id = 84 +Jest O +jednak O +pewna O +zasadnicza O +różnica O +, O +jak O +powiedziaÅ‚ O +Krzysztof B-nam_liv_person +Siewicz I-nam_liv_person +- O +nie O +ma O +zaczarowanego O +ołówka O +, O +którym O +możemy O +ten O +wypożyczony O +samochód O +sobie O +skopiować O +. O + +# sent_id = 85 +Wciąż O +jeden O +samochód O +w O +tym O +samym O +czasie O +może O +pokonać O +jednÄ… O +trasÄ™ O +, O +zmieÅ›ci O +siÄ™ O +w O +nim O +okreÅ›lona O +liczba O +osób O +. O + +# sent_id = 86 +Dlaczego O +producenci O +nie O +chcieli O +by O +zmienić O +modelu O +sprzedaży O +samochodów O +? O + +# sent_id = 87 +Może O +powinni O +zacząć O +lobbować O +za O +takim O +rozwiÄ…zaniem O +( O +oczywiÅ›cie O +, O +jak O +już O +uda O +im O +siÄ™ O +wylobbować O +ratunek O +przed O +kryzysem O +) O +, O +bo O +przecież O +nie O +ma O +nic O +lepszego O +niż O +wÅ‚asność O +dla O +producenta O +i O +ustawowo O +ograniczone O +prawa O +nabywców O +. O + +# sent_id = 88 +Dlaczego O +nie O +wprowadzić O +licencji O +na O +samochody O +zamiast O +wÅ‚asnoÅ›ci O +? O + +# sent_id = 89 +Albo O +licencji O +na O +wszystko O +? O + +# sent_id = 90 +Niech O +tylko O +pierwotny O +twórca O +- O +stolarz O +, O +murarz O +, O +Å›lusarz O +bÄ™dzie O +wÅ‚aÅ›cicielem O +, O +a O +użytkownik O +licencjobiorcÄ… O +. O + +# sent_id = 91 +A O +jeÅ›li O +produkcja O +jest O +bardziej O +skomplikowana O +? O + +# sent_id = 92 +Nic O +prostszego O +- O +można O +wprowadzić O +takie O +prawo O +, O +żeby O +współproducentom O +również O +przysÅ‚ugiwaÅ‚o O +wynagrodzenie O +. O + +# sent_id = 93 +Jak O +tego O +pilnować O +? O + +# sent_id = 94 +Czy O +na O +pewno O +nabywca O +krzesÅ‚a O +nie O +korzysta O +z O +niego O +na O +imprezie O +publicznej O +? O + +# sent_id = 95 +Może O +na O +imprezÄ™ O +przyszedÅ‚ O +ktoÅ› O +spoza O +krÄ™gu O +towarzyskiego O +? O + +# sent_id = 96 +Czy O +licencja O +obejmuje O +przewożenie O +autostopowiczów O +? O diff --git a/notebooks/example_data/train.txt b/notebooks/example_data/train.txt new file mode 100644 index 0000000000000000000000000000000000000000..a3cfb2157629e53ae2f69215fa61419fecf76a7b --- /dev/null +++ b/notebooks/example_data/train.txt @@ -0,0 +1,12562 @@ +# sent_id = 619 +Niewybaczalnym O +bÅ‚Ä™dem O +byÅ‚o O +by O +pominiÄ™cie O +bogatej O +gatunkowo O +ichtiofauny O +na O +terenie O +stawów O +. O + +# sent_id = 620 +SkupiajÄ… O +siÄ™ O +tam O +bardzo O +rzadkie O +w O +naszym O +kraju O +gatunki O +takie O +jak O +: O +kieÅ‚b O +biaÅ‚opÅ‚etwy O +( O +Gobio O +albipinnatus O +) O +czy O +koza O +zÅ‚ota O +( O +Cobitis O +aurata O +) O +. O + +# sent_id = 621 +Na O +masowÄ… O +skalÄ™ O +( O +90 O +% O +caÅ‚ej O +produkcji O +na O +tych O +terenach O +) O +hoduje O +siÄ™ O +karpia O +( O +Cyprinus O +carpio O +) O +, O +który O +jest O +szanowany O +ze O +wzglÄ™du O +na O +ekologicznÄ… O +hodowlÄ™ O +nie O +tylko O +w O +Polsce B-nam_loc_gpe_country +, O +ale O +i O +w O +Europie B-nam_loc_land_continent +. O + +# sent_id = 622 +W O +stawach O +hodowany O +jest O +także O +lin O +( O +Tinca O +tinca O +) O +, O +sandacz O +( O +Stizostedion O +lucioperca O +) O +czy O +szczupak O +( O +Esox O +Lucius O +) O +. O + +# sent_id = 623 +CiekawostkÄ… O +jest O +wystÄ™powanie O +pstrÄ…ga O +potokowego O +( O +Salmo O +trutta O +fario O +) O +w O +górnej O +części O +rzeki O +SÄ…siecznicy B-nam_loc_hydronym_river +. O + +# sent_id = 624 +Oprócz O +ptaków O +i O +ryb O +wystÄ™pujÄ… O +tam O +także O +cenne O +gatunki O +ssaków O +( O +których O +jest O +ponad O +50 O +) O +: O +m O +. O +in O +. O +gacek O +szary O +( O +Plecotus O +austriacus O +) O +, O +nocek O +wÄ…satek O +( O +Myotis O +mystacinus O +) O +, O +sarna O +( O +Capreolus O +capreolus O +) O +, O +daniel O +( O +Dama O +dama O +) O +, O +piżmak O +( O +Ondatra O +zibethicus O +) O +, O +wydra O +( O +Lutrinae O +) O +, O +a O +nawet O +jenot O +( O +Nyctereutes O +procyonoides O +) O +czy O +norka O +amerykaÅ„ska O +( O +Neovison O +vison O +) O +. O + +# sent_id = 625 +Z O +herpetofauny O +możemy O +wyróżnić O +m O +. O +in O +. O +: O +rzekotkÄ™ O +drzewnÄ… O +( O +Hyla O +arboreta O +) O +, O +traszkÄ™ O +zwyczajnÄ… O +( O +Lissotriton O +vulgaris O +) O +, O +traszkÄ™ O +grzebieniastÄ… O +( O +Triturus O +cristatus O +) O +, O +żabÄ™ O +moczarowÄ… O +( O +Rana O +arvalis O +) O +, O +padalca O +( O +Anguis O +fragilis O +) O +czy O +żmijÄ™ O +zygzakowatÄ… O +( O +Vipera O +berus O +) O +. O + +# sent_id = 626 +Czy O +na O +ptaki O +, O +czy O +na O +ssaki O +warto O +tam O +pojechać O +, O +zobaczyć O +– O +byle O +z O +bezpiecznej O +odlegÅ‚oÅ›ci O +– O +i O +sfotografować O +. O + +# sent_id = 627 +W O +tym O +celu O +wÅ‚aÅ›nie O +powstaÅ‚y O +Å›cieżki O +i O +punkty O +obserwatorskie O +, O +aby O +tereny O +nie O +byÅ‚y O +dewastowane O +przez O +skrywajÄ…cych O +siÄ™ O +potajemnie O +i O +pÅ‚oszÄ…cych O +zwierzaki O +pseudo O +- O +obserwatorów O +. O + +# sent_id = 628 +Dostosujmy O +siÄ™ O +zatem O +do O +przepisów O +wybierajÄ…c O +siÄ™ O +na O +teren O +Parku B-nam_loc_land +Krajobrazowego I-nam_loc_land +Doliny I-nam_loc_land +Baryczy I-nam_loc_land +. O + +# sent_id = 629 +Obszar O +jak O +każdy O +naturalny O +jest O +zagrożony O +zanikiem O +i O +wyginiÄ™ciem O +, O +przez O +ostatnie O +lata O +wiele O +gatunków O +ptaków O +straciÅ‚o O +status O +lÄ™gowych O +, O +a O +ssaki O +takie O +jak O +daniele O +( O +Dama O +dama O +) O +czy O +sarny O +( O +Capreolus O +capreolus O +) O +sÄ… O +co O +raz O +częściej O +kÅ‚usowane O +, O +a O +dzik O +( O +Sus O +strofa O +) O +, O +sarna O +( O +Capreolus O +capreolus O +) O +, O +jeleÅ„ O +( O +Cervus O +elaphus O +) O +– O +eliminowane O +przez O +myÅ›liwych O +w O +ramach O +" O +kontroli O +" O +nad O +populacjÄ… O +. O + +# sent_id = 630 +Inne O +uciekajÄ… O +od O +zbliżajÄ…cych O +siÄ™ O +zabudowaÅ„ O +w O +poszukiwaniu O +jedzenia O +. O + +# sent_id = 631 +Warto O +przyczynić O +siÄ™ O +do O +zadbania O +o O +nasze O +najbardziej O +cenne O +tereny O +, O +a O +takimi O +niewÄ…tpliwie O +sÄ… O +rezerwaty O +w O +Parku B-nam_loc_land +Krajobrazowym I-nam_loc_land +Doliny I-nam_loc_land +Baryczy I-nam_loc_land +. O + +# sent_id = 12065 +NawÅ‚oć O +kanadyjska B-nam_adj_country + +# sent_id = 12066 +NawÅ‚oć O +kanadyjska B-nam_adj_country +( O +Solidago O +canadensis O +L O +. O +) O +– O +gatunek O +roÅ›liny O +wieloletniej O +, O +należący O +do O +rodziny O +astrowatych O +. O + +# sent_id = 12067 +Nazwa O +ludowa O +: O +" O +Drzewko O +Matki B-nam_liv_person +Boskiej I-nam_liv_person +" O +. O + +# sent_id = 12068 +Pochodzi O +z O +wschodniej O +części O +Ameryki B-nam_loc_land_continent +Północnej I-nam_loc_land_continent +. O + +# sent_id = 12069 +Jako O +uciekinier O +z O +upraw O +rozprzestrzeniÅ‚ O +siÄ™ O +w O +Europie B-nam_loc_land_continent +. O + +# sent_id = 12070 +W O +Polsce B-nam_loc_gpe_country +roÅ›nie O +na O +wiÄ™kszoÅ›ci O +terytorium O +na O +siedliskach O +naturalnych O +i O +synantropijnych O +. O + +# sent_id = 12071 +Status O +gatunku O +w O +polskiej O +florze O +: O +kenofit O +, O +agriofit O +. O + +# sent_id = 12072 +Prawdopodobnie O +rozprzestrzeniÅ‚a O +siÄ™ O +z O +uprawy O +roÅ›lin O +ozdobnych O +, O +doskonale O +aklimatyzuje O +siÄ™ O +i O +zaczyna O +nawet O +wypierać O +gatunki O +rodzime O +. O + +# sent_id = 12073 +SpecjaliÅ›ci O +uważajÄ… O +, O +że O +z O +tego O +powodu O +powinna O +być O +zwalczana O +. O + +# sent_id = 12074 +Morfologia O + +# sent_id = 12075 +; O +Åodyga O +: O +Wysokość O +do O +1 O +, O +5 O +m O +, O +wzniesiona O +, O +sztywna O +, O +pojedyncza O +, O +naga O +lub O +rzadko O +owÅ‚osiona O +, O +cienka O +. O + +# sent_id = 12076 +WewnÄ…trz O +jest O +pusta O +. O + +# sent_id = 12077 +; O +LiÅ›cie O +: O +PodÅ‚ugowatolancetowate O +albo O +lancetowate O +, O +o O +zaostrzonych O +koÅ„cach O +, O +brzegi O +ostro O +piÅ‚kowane O +. O + +# sent_id = 12078 +Po O +liÅ›ciach O +najÅ‚atwiej O +odróżnić O +jÄ… O +od O +podobnego O +gatunku O +– O +nawÅ‚oci O +pospolitej O +. O + +# sent_id = 12079 +Wszystkie O +liÅ›cie O +z O +rzadka O +owÅ‚osione O +, O +zmniejszajÄ…ce O +siÄ™ O +ku O +wierzchoÅ‚kowi O +Å‚odygi O +. O + +# sent_id = 12080 +; O +Kwiaty O +: O +Drobne O +, O +żółte O +, O +zebrane O +w O +koszyczki O +( O +od O +5 O +do O +14 O +kwiatków O +w O +jednym O +koszyczku O +) O +, O +a O +te O +w O +wiechowate O +kwiatostany O +na O +szczycie O +Å‚odygi O +. O + +# sent_id = 12081 +Listki O +okrywy O +kwiatostanu O +stÄ™pione O +, O +bardzo O +nierówne O +, O +lancetowatego O +ksztaÅ‚tu O +. O + +# sent_id = 12082 +; O +Owoc O +: O +NieÅ‚upki O +z O +puchem O +kielichowym O +. O + +# sent_id = 12083 +; O +KÅ‚Ä…cze O +: O +Silnie O +rozgaÅ‚Ä™zione O +. O + +# sent_id = 12084 +Biologia O +i O +ekologia O + +# sent_id = 12085 +Siedlisko O +: O +aluwia O +nadrzeczne O +, O +rowy O +melioracyjne O +, O +mokre O +Å‚Ä…ki O +. O + +# sent_id = 12086 +W O +klasyfikacji O +zbiorowisk O +roÅ›linnych O +gatunek O +charakterystyczny O +dla O +zespoÅ‚u O +Ass O +. O + +# sent_id = 12087 +Rudbeckio O +- O +Solidaginetium O +. O + +# sent_id = 12088 +Kwiaty O +przedprÄ…tne O +, O +zapylane O +przez O +motyle O +i O +bÅ‚onkówki O +. O + +# sent_id = 12089 +RoÅ›lina O +miododajna O +. O + +# sent_id = 12090 +Kwitnie O +od O +lipca O +do O +sierpnia O +. O + +# sent_id = 12091 +Nasiona O +rozsiewane O +przez O +wiatr O +. O + +# sent_id = 12092 +RoÅ›lina O +trujÄ…ca O +: O +Podobnie O +jak O +nawÅ‚oć O +pospolita O +jest O +roÅ›linÄ… O +szkodliwÄ… O +dla O +bydÅ‚a O +domowego O +. O + +# sent_id = 12093 +Stosowana O +w O +odpowiednich O +dawkach O +posiada O +też O +podobne O +wÅ‚aÅ›ciwoÅ›ci O +lecznicze O +. O + +# sent_id = 12094 +Zastosowanie O + +# sent_id = 12095 +Jest O +uprawiana O +jako O +roÅ›lina O +ozdobna O +. O + +# sent_id = 12096 +Ciekawostki O + +# sent_id = 12097 +Indianie B-nam_org_nation +Odżibwejowie I-nam_org_nation +robili O +lewatywy O +z O +korzeni O +nawÅ‚oci O +. O + +# sent_id = 12098 +Z O +ziela O +i O +korzeni O +sporzÄ…dzali O +wyciÄ…gi O +, O +które O +używali O +wewnÄ™trznie O +jako O +Å›rodki O +pobudzajÄ…ce O +i O +wzmacniajÄ…ce O +. O + +# sent_id = 12099 +Indianie B-nam_org_nation +Alabama I-nam_org_nation +używali O +herbatki O +z O +nawÅ‚oci O +do O +leczenia O +przeziÄ™bieÅ„ O +, O +zaÅ› O +zewnÄ™trznie O +do O +leczenia O +obolaÅ‚ych O +miejsc O +. O + +# sent_id = 0 +RoboRally B-nam_pro_title +czy O +Wysokie B-nam_pro_title +napiÄ™cie I-nam_pro_title +? O + +# sent_id = 1 +Ponieważ O +nie O +mamy O +ostatnio O +czasu O +grać O +w O +żadne O +gry O +, O +czas O +kupić O +nowÄ… O +. O + +# sent_id = 2 +Zawsze O +jest O +to O +jakaÅ› O +dodatkowa O +motywacja O +do O +Å›ciÄ…gniÄ™cia O +znajomych O +. O + +# sent_id = 3 +Roboty B-nam_pro_title +majÄ… O +kilkanaÅ›cie O +lat O +i O +pochodzÄ… O +z O +USA B-nam_loc_gpe_country +, O +Wysokie B-nam_pro_title +napiÄ™cie I-nam_pro_title +jest O +dużo O +mÅ‚odsze O +, O +powstaÅ‚o O +w O +Niemczech B-nam_loc_gpe_country +. O + +# sent_id = 4 +Do O +robotów O +zachÄ™ca O +demo O +, O +w O +którym O +na O +próbÄ™ O +można O +sobie O +robota O +zaprogramować O +, O +do O +napiÄ™cia O +dodatkowa O +plansza O +z O +EuropÄ… B-nam_loc_land_region +Å›rodkowÄ… I-nam_loc_land_region +. O + +# sent_id = 5 +Dodatkowa O +oznacza O +dodatkowy O +wydatek O +. O + +# sent_id = 6 +Można O +go O +uniknąć O +decydujÄ…c O +siÄ™ O +na O +grÄ™ O +w O +USA B-nam_loc_gpe_country +lub O +w O +Niemczech B-nam_loc_gpe_country +( O +dwa O +najwiÄ™ksze O +rynki O +gier O +planszowych O +. O +. O +. O +) O +. O + +# sent_id = 7 +W O +sumie O +skoro O +już O +zbudowali O +Å›my O +w O +Stanach B-nam_loc_gpe_country +linie O +kolejowe O +, O +może O +czas O +na O +elektryfikacjÄ™ O +? O + +# sent_id = 8 +Za O +robotami O +przemawia O +zapowiedź O +kompletnego O +chaosu O +, O +co O +po O +dopracowanych O +do O +ostatniego O +szczegółu O +i O +uporzÄ…dkowanych O +grach O +niemieckich B-nam_adj_country +może O +być O +miÅ‚Ä… O +odmianÄ… O +. O + +# sent_id = 9 +W O +Wysokim B-nam_pro_title +napiÄ™ciu I-nam_pro_title +szans O +na O +robienie O +drugiemu O +co O +tobie O +niemiÅ‚e O +jakby O +mniej O +, O +szczęście O +też O +chyba O +odgrywa O +mniejszÄ… O +rolÄ™ O +. O + +# sent_id = 10 +Zamiast O +laserów O +atmosferÄ™ O +podgrzewajÄ… O +aukcje O +surowców O +, O +choć O +sÄ… O +elektrownie O +, O +których O +problem O +zaopatrzenia O +nie O +dotyczy O +. O + +# sent_id = 11 +Nie O +minęło O +wiele O +czasu O +od O +Nagrody B-nam_pro_award +Nobla I-nam_pro_award +, O +gdy O +pani O +Jelinek B-nam_liv_person +znów O +zrobiÅ‚a O +siÄ™ O +trochÄ™ O +zapomniana O +. O + +# sent_id = 12 +ZresztÄ… O +nagrodÄ™ O +też O +jej O +przyznano O +jakoÅ› O +tak O +półgÄ™bkiem O +i O +przy O +akompaniamencie O +szowinistycznych O +wyzwisk O +. O + +# sent_id = 13 +Weźmy O +takÄ… O +PianistkÄ™ B-nam_pro_title_book +- O +nie O +jest O +to O +raczej O +książka O +do O +omówienia O +przy O +rodzinnym O +obiedzie O +u O +wujostwa O +. O + +# sent_id = 14 +Nie O +porusza O +tematów O +górnolotnych O +, O +przy O +których O +dobrze O +upozować O +siÄ™ O +na O +czÅ‚owieka O +wrażliwego O +i O +przenikliwego O +, O +gdzieÅ› O +pomiÄ™dzy O +kaczkÄ… O +z O +jabÅ‚kami O +a O +likierem O +. O + +# sent_id = 15 +Porusza O +tematy O +bolesne O +i O +ciemne O +, O +przy O +czym O +porusza O +je O +uwodzicielsko O +, O +dawkuje O +napiÄ™cie O +i O +sprawnie O +snuje O +historiÄ™ O +. O + +# sent_id = 16 +Tworzy O +ponurÄ… O +i O +zÅ‚ożonÄ… O +strukturÄ™ O +powiÄ…zaÅ„ O +miÄ™dzy O +bohaterami O +, O +którÄ… O +później O +równie O +ponuro O +i O +w O +napiÄ™ciu O +rozplÄ…tuje O +. O + +# sent_id = 17 +I O +co O +to O +w O +ogóle O +ma O +znaczyć O +, O +że O +nie O +można O +siÄ™ O +oderwać O +od O +powieÅ›ci O +o O +przemocy O +psychicznej O +i O +napiÄ™ciu O +seksualnym O +! O + +# sent_id = 18 +Co O +wiÄ™cej O +, O +można O +jeszcze O +uznać O +, O +że O +historia O +ta O +jest O +czymÅ› O +wiÄ™cej O +niż O +historiÄ… O +tych O +konkretnie O +postaci O +, O +że O +pokazuje O +pewne O +uniwersalne O +sytuacje O +( O +jak O +to O +z O +dobrÄ… O +literaturÄ… O +czasem O +bywa O +) O +. O + +# sent_id = 19 +Oj O +, O +biada O +. O + +# sent_id = 20 +To O +może O +obrzydzić O +nam O +deser O +, O +lub O +sprawić O +, O +że O +zamyÅ›limy O +siÄ™ O +tak O +, O +że O +nam O +herbata O +wystygnie O +. O + +# sent_id = 21 +Jelinek B-nam_liv_person +jest O +jak O +piÄ™kny O +but O +, O +który O +ciÅ›nie O +. O + +# sent_id = 22 +Zamiast O +trzymać O +takie O +buty O +na O +widoku O +i O +cierpieć O +z O +powodu O +pytaÅ„ O +: O + +# sent_id = 23 +O O +, O +jakie O +piÄ™kne O +, O +czemu O +ich O +nie O +nosisz O +? O + +# sent_id = 24 +- O +lepiej O +wyrzucić O +je O +na O +Å›mietnik O +i O +zapomnieć O +. O + +# sent_id = 25 +Chyba O +, O +że O +przypadkiem O +bÄ™dzie O +tam O +buszowaÅ‚ O +jakiÅ› O +niepoważny O +szwedzki B-nam_adj_country +akademik O +. O + +# sent_id = 26 +W O +polskiej B-nam_adj_country +lidze O +koszykarzy O +rozpoczęła O +siÄ™ O +faza O +play O +- O +off O +, O +co O +oznacza O +, O +że O +mecze O +sÄ… O +czÄ™sto O +, O +nadszedÅ‚ O +czas O +weteranów O +itd O +. O + +# sent_id = 27 +CzÄ™ste O +mecze O +to O +czÄ™ste O +wizyty O +na O +stronie O +ligi O +. O + +# sent_id = 28 +Dlaczego O +wiÄ™c O +na O +tym O +nie O +skorzystać O +i O +nie O +wprowadzić O +nagle O +rejestracji O +użytkowników O +? O + +# sent_id = 29 +Ograniczenie O +dostÄ™pu O +nie O +dotyczy O +wszystkich O +informacji O +na O +stronie O +, O +ale O +mimo O +to O +jest O +ciosem O +poniżej O +pasa O +. O + +# sent_id = 30 +OglÄ…dasz O +serial O +? O + +# sent_id = 31 +Wszystko O +wyjaÅ›ni O +siÄ™ O +w O +ostatnim O +odcinku O +? O + +# sent_id = 32 +To O +pÅ‚ać O +. O + +# sent_id = 33 +OczywiÅ›cie O +rejestracja O +jest O +zupeÅ‚nie O +bezpÅ‚atna O +. O + +# sent_id = 34 +W O +koÅ„cu O +moje O +imiÄ™ O +i O +nazwisko O +, O +telefon O +, O +adres O +domowy O +i O +zgoda O +na O +otrzymywanie O +spamu O +od O +sponsora O +ligi O +nie O +sÄ… O +nic O +warte O +. O + +# sent_id = 35 +Chcesz O +posÅ‚uchać O +mÄ…droÅ›ci O +Radka B-nam_liv_person +Hyżego I-nam_liv_person +, O +to O +poczytaj O +o O +najnowszej O +karcie O +kredytowej O +. O + +# sent_id = 36 +Czy O +wypowiedzi O +RadosÅ‚awa B-nam_liv_person +sÄ… O +dla O +mnie O +warte O +wiÄ™cej O +niż O +dla O +banku O +dane O +o O +moich O +dochodach O +( O +te O +akurat O +podaje O +siÄ™ O +opcjonalnie O +) O +? O + +# sent_id = 37 +Czy O +skoro O +i O +tak O +dostajÄ™ O +tyle O +spamu O +( O +zamówionego O +i O +niezamówionego O +) O +, O +to O +czemu O +jeszcze O +jedno O +źródÅ‚o O +miaÅ‚o O +by O +mi O +zaszkodzić O +? O + +# sent_id = 38 +DodajÄ™ O +tutaj O +parÄ™ O +dalszych O +informacji O +o O +mnie O +i O +o O +blogu O +, O +w O +postaci O +Å‚atwej O +do O +czytania O +ankiety O +" O +pytanie O +i O +odpowiedź O +" O +: O +) O +Kim O +jest O +autorka O +tego O +bloga O +, O +gdzie O +mieszka O +, O +z O +kim O +, O +dlaczego O +i O +po O +co O +? O + +# sent_id = 39 +Kto O +wie O +? O + +# sent_id = 40 +Mieszkam O +na O +Plejadach B-nam_loc_astronomical +, O +włóczÄ™ O +siÄ™ O +po O +Å›wiecie O +, O +mąż O +blisko O +mnie O +albo O +daleko O +, O +zależy O +jak O +go O +praca O +pogoni O +, O +i O +czy O +wolno O +mi O +z O +nim O +jechać O +( O +ze O +wzglÄ™du O +na O +zdrowie O +) O +. O + +# sent_id = 41 +Od O +lat O +już O +moje O +raczej O +staÅ‚e O +miejsca O +pobytu O +sÄ… O +poza O +PolskÄ… B-nam_loc_gpe_country +. O + +# sent_id = 42 +A O +po O +za O +tym O +, O +bardzo O +ceniÄ™ O +ochronÄ™ O +sfery O +prywatnej O +. O + +# sent_id = 43 +Wiem O +że O +to O +w O +czasach O +internetowo O +- O +multimedialnych O +prawie O +niemożliwe O +. O + +# sent_id = 44 +Ale O +mimo O +wszystko O +próbujÄ™ O +. O + +# sent_id = 45 +Co O +to O +ma O +znaczyć O +" O +prawie O +wegetarianka O +" O +, O +jest O +to O +wege O +blog O +czy O +nie O +? O + +# sent_id = 46 +To O +jest O +mój O +prywatny O +blog O +o O +tematach O +kuchennych O +, O +bajecznych O +i O +innych O +. O + +# sent_id = 47 +Zwykle O +kuchennych O +i O +do O +tego O +zwiÄ…zanych O +. O + +# sent_id = 48 +Bo O +kuchnia O +to O +serce O +domu O +( O +i O +przede O +wszystkim O +osoba O +lub O +osoby O +które O +w O +tej O +kuchni O +gotujÄ… O +) O +, O +a O +dom O +to O +poczÄ…tek O +życia O +czÅ‚owieka O +. O + +# sent_id = 49 +Ja O +sama O +jestem O +praktycznie O +100 O +% O +wege O +- O +wegan O +( O +przede O +wszystkim O +z O +powodów O +zdrowotnych O +, O +ale O +nie O +tylko O +) O +. O + +# sent_id = 50 +Jedynie O +nie O +lubiÄ™ O +fanatyzmu O +, O +wszystko O +jedno O +w O +którym O +kierunku O +( O +wiÄ™cej O +o O +tym O +tutaj O +) O +Mąż O +nie O +jest O +wegetarianinem O +. O + +# sent_id = 51 +Co O +to O +znaczy O +" O +z O +powodów O +zdrowotnych O +" O +, O +o O +jakÄ… O +chorobÄ™ O +chodzi O +? O + +# sent_id = 52 +To O +niestety O +nie O +jest O +tylko O +jedna O +choroba O +, O +to O +mieszanka O +paru O +chorób O +i O +lekarze O +do O +dziÅ› O +siÄ™ O +kłócÄ… O +która O +jest O +która O +, O +gdzie O +i O +dlaczego O +: O +/ O +GłównÄ… O +częściÄ… O +jest O +choroba O +częściowo O +zapalna O +, O +reumatyczna O +i O +anty O +- O +komórkowa O +, O +która O +niszczy O +organizm O +od O +Å›rodka O +podobnie O +do O +raka O +, O +i O +wedÅ‚ug O +" O +nowoczesnych O +" O +lekarzy O +jest O +nieuleczalna O +. O + +# sent_id = 53 +JedynÄ… O +" O +opcjÄ… O +" O +dla O +mnie O +byÅ‚o O +by O +Å‚ykanie O +setek O +agresywnych O +" O +lekarstw O +" O +, O +które O +by O +mnie O +nawet O +nie O +wyleczyÅ‚y O +, O +ani O +bólów O +nie O +zatrzymaÅ‚y O +, O +jedynie O +trzymaÅ‚y O +w O +stanie O +pół O +umarÅ‚ym O +. O + +# sent_id = 54 +Albo O +i O +nawet O +ta O +sama O +chemio O +- O +terapia O +jak O +w O +przypadku O +raka O +: O +/ O +( O +ale O +bez O +żadnej O +szansy O +na O +wyleczenie O +) O +. O + +# sent_id = 55 +Ale O +ja O +przecież O +żyć O +chcÄ™ O +i O +muszÄ™ O +! O + +# sent_id = 56 +WiÄ™c O +siÄ™ O +sama O +wzięła O +m O +za O +to O +, O +i O +po O +wielu O +latach O +dosÅ‚ownych O +mÄ™k O +i O +ryzykowania O +wÅ‚asnego O +życia O +znalazÅ‚a O +m O +wiele O +odpowiedzi O +na O +pytania O +, O +na O +które O +nawet O +lekarze O +mi O +nie O +odpowiadali O +: O +) O +. O + +# sent_id = 57 +Ratuje O +mnie O +codziennie O +magia O +przyrody O +, O +i O +jedzenia O +prosto O +od O +przyrody O +pochodzÄ…cego O +: O +) O +. O + +# sent_id = 58 +Czy O +w O +tym O +blogu O +bÄ™dzie O +pisanie O +o O +chorobach O +, O +lekarzach O +i O +szpitalach O +? O + +# sent_id = 59 +Absolutnie O +nie O +: O +/ O +Chyba O +tylko O +gdy O +o O +tym O +napisać O +muszÄ™ O +, O +bo O +to O +zatrzymuje O +blogowanie O +, O +w O +stylu O +" O +mam O +wÅ‚aÅ›nie O +grypÄ™ O +i O +nie O +bÄ™dzie O +mnie O +tutaj O +" O +itp O +. O + +# sent_id = 60 +Mam O +bardzo O +wielkÄ… O +nadziejÄ™ O +że O +nigdy O +tu O +nic O +o O +żadnym O +szpitalu O +nie O +bÄ™dÄ™ O +pisać O +, O +chyba O +że O +by O +Å›my O +wreszcie O +dziecko O +mieli O +, O +ale O +i O +to O +mnie O +nie O +zmusi O +do O +pisania O +o O +szpitalach O +. O + +# sent_id = 61 +Tylko O +spowoduje O +otwarcie O +blogu O +specjalnie O +dla O +malucha O +: O +) O +. O + +# sent_id = 62 +Ja O +ten O +( O +i O +poprzedni O +) O +blog O +zaczęła O +m O +aby O +wÅ‚aÅ›nie O +pomóc O +sobie O +skoÅ„czyć O +z O +jakimikolwiek O +chorobami O +, O +na O +zawsze O +. O + +# sent_id = 63 +Ja O +tu O +piszÄ™ O +o O +dobrych O +rzeczach O +: O +sÅ‚oÅ„cu O +, O +przyrodzie O +, O +dobrej O +kuchni O +, O +poezji O +, O +bajkach O +, O +fantazji O +, O +muzyce O +, O +sztuce O +, O +dzieciach O +, O +humorze O +. O +. O +. O + +# sent_id = 64 +O O +mężu O +mniej O +, O +aby O +chronić O +jego O +sferÄ™ O +prywatnÄ… O +i O +szczególnie O +pracÄ™ O +. O + +# sent_id = 65 +Od O +kiedy O +blogujÄ™ O +bardziej O +prywatnie O +i O +ostrożnie O +mąż O +przestaÅ‚ O +siÄ™ O +tak O +gniewać O +, O +i O +nawet O +częściej O +moje O +blogi O +odwiedza O +( O +co O +mnie O +bardzo O +ucieszyÅ‚o O +! O +) O +. O + +# sent_id = 66 +SkÄ…d O +pochodzÄ… O +zdjÄ™cia O +w O +blogu O +? O + +# sent_id = 67 +WiÄ™kszość O +z O +nich O +jest O +z O +Wikimedii B-nam_pro_media_web +, O +z O +Flickr B-nam_pro_media_web +Creative B-nam_pro_title_treaty +Commons I-nam_pro_title_treaty +albo O +z O +Google B-nam_pro_media_web +Obrazy I-nam_pro_media_web +Creative B-nam_pro_title_treaty +Commons I-nam_pro_title_treaty +. O + +# sent_id = 68 +Część O +jest O +moja O +wÅ‚asna O +. O + +# sent_id = 69 +Staram O +siÄ™ O +nie O +dodawać O +tu O +żadnych O +zdjęć O +ani O +obrazów O +bez O +otwartej O +( O +publicznej O +) O +licencji O +. O + +# sent_id = 70 +JeÅ›li O +siÄ™ O +to O +nie O +udaje O +to O +sÄ… O +zwykle O +zdjÄ™cia O +ogólnie O +publikowane O +tysiÄ…ce O +razy O +w O +internecie O +. O + +# sent_id = 71 +Nigdy O +nie O +zabieram O +nikomu O +jego O +prywatnych O +, O +blogowych O +albo O +podobnych O +zdjęć O +, O +i O +nigdy O +nie O +udajÄ™ O +że O +coÅ› O +co O +nie O +moje O +jest O +moje O +! O + +# sent_id = 72 +Mój O +blog O +jest O +prywatny O +, O +nie O +jestem O +ani O +firma O +, O +ani O +biznes O +ani O +nic O +, O +nikomu O +nic O +nie O +niszczÄ™ O +ani O +nie O +zabieram O +, O +ani O +nie O +mam O +żadnych O +master O +planów O +do O +podboju O +wszechÅ›wiata O +: O +) O +. O + +# sent_id = 73 +Co O +to O +jest O +" O +Licencja O +Creative B-nam_pro_title_treaty +Commons I-nam_pro_title_treaty +" O +? O + +# sent_id = 74 +Nie O +ma O +praw O +autorskich O +? O + +# sent_id = 75 +Spokojnie O +, O +sÄ… O +prawa O +autorskie O +- O +podstawowe O +: O +) O +Znaczy O +siÄ™ O +że O +jeżeli O +ja O +zrobiÄ™ O +zdjÄ™cie O +, O +jak O +np O +to O +tutaj O +: O +. O +. O +to O +jest O +to O +zdjÄ™cie O +mojego O +autorstwa O +i O +to O +autorstwo O +nie O +znika O +: O +) O +. O + +# sent_id = 76 +Ale O +ja O +po O +prostu O +decydujÄ™ O +że O +to O +zdjÄ™cie O +może O +każdy O +kto O +chce O +używac O +, O +kiedy O +chce O +i O +do O +czego O +chce O +, O +również O +komercjalnie O +. O + +# sent_id = 77 +Na O +tym O +polega O +moja O +licencja O +: O +) O +. O + +# sent_id = 78 +Nie O +zezÅ‚oszczÄ™ O +siÄ™ O +ani O +nic O +jeÅ›li O +ktoÅ› O +nie O +pamiÄ™ta O +mojego O +adresu O +i O +tylko O +wie O +że O +Creative B-nam_pro_title_treaty +Commons I-nam_pro_title_treaty +, O +jedynie O +w O +przypadku O +użycia O +komercjalnego O +proszÄ™ O +o O +zawsze O +dodanie O +linku O +albo O +podpisu O +z O +adresem O +mojego O +bloga O +. O + +# sent_id = 79 +IstniejÄ… O +różne O +wersje O +licencji O +Creative B-nam_pro_title_treaty +Commons I-nam_pro_title_treaty +i O +należy O +trzymać O +siÄ™ O +tego O +co O +autor O +albo O +autorka O +mówi O +. O + +# sent_id = 80 +Co O +siÄ™ O +staÅ‚o O +z O +poprzednim O +blogiem O +" O +Kuchenka B-nam_pro_title +Internetowa I-nam_pro_title +" O +? O + +# sent_id = 81 +TÅ‚umaczÄ™ O +to O +tutaj O +. O + +# sent_id = 82 +Blog O +zostaje O +, O +tyle O +tylko O +że O +jako O +strona O +statyczna O +, O +bez O +dalszych O +wpisów O +. O + +# sent_id = 83 +Wszystkie O +przepisy O +i O +linki O +i O +zdjÄ™cia O +też O +sÄ… O +. O + +# sent_id = 84 +Ale O +główna O +strona O +to O +teraz O +Kuchenne B-nam_pro_title +bajki I-nam_pro_title +: O +) O +( O +czyli O +ta O +w O +której O +wÅ‚aÅ›nie O +jesteÅ› O +) O +. O + +# sent_id = 85 +Czemu O +nagłówek O +bloga O +jest O +taki O +zimowy O +i O +niekulinarny O +? O + +# sent_id = 86 +Bo O +teraz O +jest O +zima O +: O +) O +. O + +# sent_id = 87 +Ja O +zmieniam O +nagłówki O +z O +porami O +roku O +, O +mój O +poprzedni O +( O +nieaktywny O +) O +blog O +ma O +np O +. O +nadal O +nagłówek O +jesienny O +, O +bo O +skoÅ„czyÅ‚a O +m O +tam O +pisać O +na O +jesieni O +. O + +# sent_id = 88 +Czyli O +z O +poczÄ…tkiem O +wiosny O +bÄ™dzie O +tu O +nagłówek O +wiosenny O +, O +potem O +letni O +itd O +. O + +# sent_id = 89 +Zamierzam O +również O +nieco O +przystosowywać O +kolory O +bloga O +do O +pór O +roku O +: O +) O +. O + +# sent_id = 90 +A O +niekulinarny O +jest O +dlatego O +, O +bo O +ja O +kocham O +przyrodÄ™ O +nawet O +bardziej O +niż O +kuchniÄ™ O +, O +i O +bardzo O +pragnęła O +m O +mieć O +choć O +trochÄ™ O +przyrody O +w O +blogu O +: O +) O +. O + +# sent_id = 91 +Ben B-nam_liv_person +Vershbow I-nam_liv_person +na O +blogu O +if B-nam_pro_title +: I-nam_pro_title +book I-nam_pro_title +opisuje O +rozwój O +projektu O +Google B-nam_pro_software +Books I-nam_pro_software +, O +który O +umożliwia O +już O +nie O +tylko O +przeglÄ…danie O +zeskanowanych O +w O +ramach O +projektu O +książek O +, O +ale O +też O +przeglÄ…danie O +informacji O +o O +książkach O +, O +których O +skany O +sÄ… O +jeszcze O +niedostÄ™pne O +. O + +# sent_id = 92 +Vershbow B-nam_liv_person +zauważa O +, O +że O +w O +takiej O +postaci O +Google B-nam_pro_software +Books I-nam_pro_software +, O +podobnie O +jak O +katalog O +Amazon B-nam_pro_media_web +, O +stajÄ… O +siÄ™ O +cennym O +źródÅ‚em O +bibliograficznym O +. O + +# sent_id = 93 +Problem O +w O +tym O +, O +że O +nie O +sÄ… O +to O +źródÅ‚a O +otwarte O +. O + +# sent_id = 94 +StÄ…d O +pomysÅ‚ O +, O +by O +poÅ‚Ä…czyć O +razem O +różnorakie O +źródÅ‚a O +– O +biorÄ…c O +choćby O +opisy O +tytułów O +z O +Wikipedii B-nam_pro_media_web +– O +w O +jeden O +“ O +Katalog B-nam_pro_media_web +SpoÅ‚eczny I-nam_pro_media_web +†O +( O +People B-nam_pro_media_web +’ I-nam_pro_media_web +s I-nam_pro_media_web +Card I-nam_pro_media_web +Catalogue I-nam_pro_media_web +) O +. O + +# sent_id = 95 +Katalog O +ten O +byÅ‚ O +by O +swobodnie O +dostÄ™pny O +, O +także O +do O +edycji O +, O +a O +w O +nim O +każda O +książka O +posiadaÅ‚a O +by O +wÅ‚asnÄ… O +stronÄ™ O +, O +na O +której O +można O +by O +rozwijać O +jej O +opisy O +, O +toczyć O +dyskusje O +, O +itd O +. O + +# sent_id = 96 +“ O +Jak O +na O +razie O +Google B-nam_org_company +prowadzi O +w O +budowaniu O +nowoczesnego O +systemu O +bibliograficznego O +. O + +# sent_id = 97 +Przez O +to O +katalog O +tytułów O +przyszÅ‚oÅ›ci O +stanie O +siÄ™ O +równoczeÅ›nie O +źródÅ‚em O +znaczÄ…cych O +zysków O +reklamowych O +. O + +# sent_id = 98 +Niech O +Google B-nam_org_company +robi O +swoje O +– O +ale O +czy O +nie O +mogli O +by O +Å›my O +stworzyć O +czegoÅ› O +lepszego O +? O +†O + +# sent_id = 99 +Vershbow B-nam_liv_person +zwraca O +uwagÄ™ O +na O +kluczowÄ… O +kwestiÄ™ O +– O +że O +niezbÄ™dne O +sÄ… O +niekomercyjne O +, O +otwarte O +alternatywy O +dla O +dziaÅ‚aÅ„ O +dygitalizacyjnych O +podejmowanych O +dziÅ› O +przede O +wszystkim O +przez O +komercyjne O +firmy O +. O + +# sent_id = 123 +Otrzymali O +Å›my O +patronat O +PAN B-nam_org_organization + +# sent_id = 124 +Z O +przyjemnoÅ›ciÄ… O +informujemy O +, O +że O +nasz O +projekt O +zostaÅ‚ O +objÄ™ty O +honorowym O +patronatem O +przez O +PolskÄ… B-nam_org_organization +AkademiÄ™ I-nam_org_organization +Nauk I-nam_org_organization +. O + +# sent_id = 125 +Na O +podstawie O +pytaÅ„ O +zgÅ‚aszanych O +przez O +osoby O +, O +z O +którymi O +rozmawiamy O +o O +projekcie O +, O +przygotowali O +Å›my O +zbiór O +pytaÅ„ O +i O +odpowiedzi O +zwiÄ…zanych O +z O +projektem O +“ O +Otwórz B-nam_eve_human +książkÄ™ I-nam_eve_human +†O +. O + +# sent_id = 126 +Dokument O +skierowany O +jest O +do O +autorów O +chcÄ…cych O +udostÄ™pnić O +swoje O +książki O +w O +ramach O +projektu O +, O +jego O +współpracowników O +, O +oraz O +wszystkich O +osób O +zainteresowanych O +udziaÅ‚em O +w O +projekcie O +. O + +# sent_id = 127 +19 O +grudnia O +2008 O +roku O +odbyÅ‚o O +siÄ™ O +pierwsze O +nasze O +spotkanie O +ze O +współpracownikami O +projektu O +„ O +Otwórz B-nam_eve_human +książkÄ™ I-nam_eve_human +†O +. O + +# sent_id = 128 +W O +spotkaniu O +wziÄ™li O +udziaÅ‚ O +: O +Marta B-nam_liv_person +Zimniak I-nam_liv_person +- I-nam_liv_person +HaÅ‚ajko I-nam_liv_person +i O +Zofia B-nam_liv_person +Dworakowska I-nam_liv_person +z O +Instytutu B-nam_org_institution +Kultury I-nam_org_institution +Polskiej I-nam_org_institution +na O +UW B-nam_org_organization +, O +Jan B-nam_liv_person +ZajÄ…c I-nam_liv_person +z O +WydziaÅ‚u B-nam_org_institution +Psychologii I-nam_org_institution +UW B-nam_org_organization +, O +PaweÅ‚ B-nam_liv_person +Krzyworzeka I-nam_liv_person +z O +Instytutu B-nam_org_institution +Etnologii I-nam_org_institution +i I-nam_org_institution +Antropologi I-nam_org_institution +Kulturowej I-nam_org_institution +UW B-nam_org_organization +oraz O +MichaÅ‚ B-nam_liv_person +Piotr I-nam_liv_person +PrÄ™gowski I-nam_liv_person +z O +Politechniki B-nam_org_organization +Warszawskiej I-nam_org_organization +. O + +# sent_id = 129 +Na O +spotkaniu O +krótko O +opowiedzieli O +Å›my O +o O +projekcie O +i O +porozmawiali O +Å›my O +o O +istotnych O +kwestiach O +– O +takich O +jak O +prawne O +aspekty O +projektu O +( O +licencje O +Creative B-nam_oth_license +Commons I-nam_oth_license +, O +dozwolony O +użytek O +, O +pola O +eksploatacji O +, O +przejÅ›cie O +praw O +etc O +. O +) O +, O +sposób O +prezentacji O +książek O +w O +Internecie B-nam_oth_tech +( O +ksztaÅ‚t O +serwisu O +internetowego O +, O +formaty O +, O +w O +których O +bÄ™dÄ… O +udostÄ™pniane O +książki O +, O +strony O +– O +wizytówki O +autorów O +) O +, O +prawdopodobne O +pytania O +i O +obawy O +ze O +strony O +autorów O +( O +argumenty O +za O +wziÄ™ciem O +udziaÅ‚u O +w O +projekcie O +, O +konsekwencje O +udostÄ™pnienia O +książki O +w O +wersji O +elektronicznej O +, O +problem O +plagiatów O +) O +, O +relacje O +miÄ™dzy O +cyfrowymi O +wersjami O +książek O +a O +obiegiem O +wydawniczym O +, O +możliwość O +wÅ‚Ä…czenia O +instytucji O +, O +z O +którymi O +sÄ… O +zwiÄ…zani O +współpracownicy O +, O +w O +projekt O +jako O +jego O +partnerów O +. O + +# sent_id = 130 +PaweÅ‚ B-nam_liv_person +Krzyworzeka I-nam_liv_person +opowiedziaÅ‚ O +o O +ciekawym O +projekcie O +digitalizacji O +czasopism O +etnograficznych O +, O +który O +prowadzi O +w O +Polskim B-nam_org_organization +Instytucie I-nam_org_organization +Antropologii I-nam_org_organization +. O + +# sent_id = 131 +Celem O +projektu O +„ O +Otwórz B-nam_eve_human +książkÄ™ I-nam_eve_human +†O +jest O +stworzenie O +cyfrowej O +kolekcji O +współczesnych O +książek O +naukowych O +, O +udostÄ™pnionych O +przez O +autorów O +. O + +# sent_id = 132 +W O +jej O +skÅ‚ad O +wejdÄ… O +również O +książki O +niedostÄ™pne O +już O +na O +rynku O +w O +wersji O +drukowanej O +, O +a O +mimo O +to O +posiadajÄ…ce O +wartość O +naukowÄ… O +i O +edukacyjnÄ… O +. O + +# sent_id = 133 +Chcemy O +z O +pomocÄ… O +nowych O +technologii O +zwiÄ™kszyć O +dostÄ™pność O +publikacji O +naukowych O +. O + +# sent_id = 134 +Ich O +niskie O +nakÅ‚ady O +drukiem O +powodujÄ… O +, O +że O +z O +czasem O +korzysta O +siÄ™ O +z O +nich O +rzadziej O +, O +niż O +to O +mogÅ‚o O +by O +i O +powinno O +mieć O +miejsce O +. O + +# sent_id = 135 +Do O +współpracy O +w O +projekcie O +zapraszamy O +samodzielnych O +pracowników O +naukowych O +, O +którzy O +posiadajÄ… O +prawa O +autorskie O +do O +opublikowanych O +przez O +siebie O +książek O +. O + +# sent_id = 136 +BÄ™dÄ… O +one O +udostÄ™pnione O +w O +internecie B-nam_oth_tech +z O +poszanowaniem O +praw O +autorów O +i O +na O +zasadach O +przez O +nich O +okreÅ›lonych O +. O + +# sent_id = 137 +W O +tym O +celu O +wykorzystamy O +miÄ™dzy O +innymi O +system O +licencji O +Creative B-nam_oth_license +Commons I-nam_oth_license +, O +opartych O +na O +zasadzie O +„ O +Pewne O +prawa O +zastrzeżone O +†O +. O + +# sent_id = 138 +Projekt O +„ O +Otwórz B-nam_eve_human +książkÄ™ I-nam_eve_human +†O +jest O +prowadzony O +przez O +Interdyscyplinarne B-nam_org_institution +Centrum I-nam_org_institution +Modelowania I-nam_org_institution +na O +Uniwersytecie B-nam_org_organization +Warszawskim I-nam_org_organization +( O +ICM B-nam_org_institution +UW B-nam_org_organization +) O +, O +w O +ramach O +projektów O +Biblioteka B-nam_eve_human +Wirtualna I-nam_eve_human +Nauki I-nam_eve_human +oraz O +Creative B-nam_org_organization +Commons I-nam_org_organization +Polska I-nam_org_organization +. O + +# sent_id = 139 +Po O +drugie O +, O +książki O +z O +naszej O +kolekcji O +, O +które O +sÄ… O +dostÄ™pne O +na O +licencji O +Creative B-nam_oth_license +Commons I-nam_oth_license +umieÅ›cili O +Å›my O +– O +na O +mocy O +licencji O +– O +w O +serwisie O +Scribd B-nam_pro_media_web +. O + +# sent_id = 140 +Pozwala O +on O +publikować O +w O +sieci O +PDFy B-nam_oth_tech +, O +które O +sÄ… O +widoczne O +bezpoÅ›rednio O +w O +przeglÄ…darce O +i O +dajÄ… O +siÄ™ O +Å‚atwo O +i O +elegancko O +umieszczać O +na O +innych O +stronach O +. O + +# sent_id = 141 +Do O +tego O +system O +Scribd B-nam_pro_media_web +umożliwia O +licencjonowanie O +Creative B-nam_oth_license +Commons I-nam_oth_license +. O + +# sent_id = 142 +Po O +trzecie O +– O +i O +chyba O +najważniejsze O +– O +uruchomili O +Å›my O +protokół O +OAI B-nam_oth_tech +PMH I-nam_oth_tech +, O +pozwalajÄ…cy O +automatycznie O +katalogować O +z O +zewnÄ…trz O +metadane O +książek O +z O +naszej O +kolekcji O +( O +opisujemy O +je O +w O +modelu O +Dublin B-nam_oth_tech +Core I-nam_oth_tech +) O +. O + +# sent_id = 143 +Oznacza O +to O +miÄ™dzy O +innymi O +, O +że O +treÅ›ci O +naszego O +serwisu O +mogÄ… O +być O +przeszukiwane O +w O +ramach O +wyszukiwarek O +agregujÄ…cych O +zbiory O +różnych O +kolekcji O +. O + +# sent_id = 144 +WykorzystujÄ…c O +OAI B-nam_oth_tech +PMH I-nam_oth_tech +podpiÄ™li O +Å›my O +nasz O +serwis O +do O +Federacji B-nam_fac_system +Bibliotek I-nam_fac_system +Cyfrowych I-nam_fac_system +– O +ogólnopolskiej O +wyszukiwarki O +pozwalajÄ…cej O +przeszukiwać O +naraz O +kilkadziesiÄ…t O +bibliotek O +cyfrowych O +i O +kilkaset O +tysiÄ™cy O +pozycji O +. O + +# sent_id = 145 +KolejnÄ… O +kwestiÄ… O +spornÄ… O +, O +dotyczÄ…cÄ… O +mieszkanka O +, O +jest O +rozstaw O +mebli O +w O +kuchni O +, O +tak O +by O +wygodnie O +móc O +z O +niej O +korzystać O +, O +ale O +przy O +jednoczesnym O +ocaleniu O +istnienia O +drzwi O +kuchennych O +. O + +# sent_id = 146 +OsobiÅ›cie O +wydaje O +mi O +siÄ™ O +to O +nierealne O +i O +optujÄ™ O +za O +wywaleniem O +tych O +drzwi O +niezgody O +w O +cholerÄ™ O +( O +czyli O +do O +piwnicy O +; O +dla O +informatyków O +: O +/ O +dev O +/ O +null O +) O +, O +ale O +mÄ™ska O +część O +decydentów O +jest O +najwidoczniej O +w O +nich O +zakochana O +, O +wiÄ™c O +nie O +wiem O +czy O +mojej O +despotycznoÅ›ci O +wystarczy O +, O +by O +postawić O +na O +swoim O +. O + +# sent_id = 147 +KolejnÄ… O +kolejnÄ… O +kwestiÄ… O +spornÄ… O +jest O +szafka O +pod O +umywalkÄ™ O +. O + +# sent_id = 148 +Posiadamy O +dwie O +opcje O +, O +które O +majÄ… O +swoich O +zwolenników O +i O +przeciwników O +. O + +# sent_id = 149 +JednÄ… O +z O +nich O +jest O +odratowanie O +szafki O +z O +tarasu O +, O +która O +wymagaÅ‚a O +by O +zaÅ›lepienia O +dziur O +z O +jednej O +strony O +i O +wydrążenia O +ich O +z O +drugiej O +. O + +# sent_id = 150 +Istnieje O +też O +prawdopodobieÅ„stwo O +niekompatybilnoÅ›ci O +szafki O +ze O +starÄ… O +umywalkÄ… O +. O + +# sent_id = 151 +Natomiast O +drugÄ… O +jest O +kupienie O +nowej O +szafki O +z O +umywalkÄ… O +i O +co O +za O +tym O +idzie O +brak O +problemów O +Å‚atajÄ…co O +- O +dopasowawczych O +. O + +# sent_id = 152 +I O +chyba O +na O +tym O +zakoÅ„czymy O +doniesienia O +z O +linii O +frontu O +. O + +# sent_id = 153 +Pozdrawiamy O +sÄ…siadki O +- O +podglÄ…daczki O +z O +wieżowca O +obok O +. O + +# sent_id = 154 +Obiecujemy O +zainspirować O +Panie O +do O +lepszego O +zaspokajania O +potrzeb O +Waszych O +mężów O +. O + +# sent_id = 155 +OglÄ…dajcie O +nas O +już O +od O +Nowego B-nam_eve_human_holiday +Roku I-nam_eve_human_holiday +. O + +# sent_id = 156 +Jak O +wykorzystywane O +sÄ… O +licencje O +CC B-nam_oth_license + +# sent_id = 157 +JakiÅ› O +czas O +temu O +Unia B-nam_org_organization +Europejska I-nam_org_organization +wsparÅ‚a O +eksperyment O +dotyczÄ…cy O +planowania O +przestrzeni O +zurbanizowanej O +, O +w O +ramach O +którego O +kilka O +europejskich B-nam_adj +miast O +usunęło O +wszystkie O +znaki O +drogowe O +ze O +swoich O +ulic O +. O + +# sent_id = 158 +Na O +razie O +nic O +tutaj O +nie O +ma O +– O +co O +nawet O +daje O +siÄ™ O +zauważyć O +. O + +# sent_id = 159 +DostosowujÄ™ O +spokojnie O +i O +bez O +poÅ›piechu O +ten O +blog O +. O + +# sent_id = 160 +Szablon O +do O +WordPress B-nam_pro_software +’ O +a O +– O +co O +także O +widać O +– O +nie O +jest O +mojego O +autorstwa O +. O + +# sent_id = 161 +ZmieniÄ™ O +go O +dopiero O +na O +koÅ„cu O +– O +zresztÄ… O +jeszcze O +nie O +wiem O +, O +czy O +bÄ™dÄ™ O +robiÅ‚a O +szablon O +od O +podstaw O +, O +czy O +przerobiÄ™ O +jakiÅ› O +dostÄ™pny O +w O +sieci O +. O + +# sent_id = 162 +Póki O +co O +dopiero O +poznajÄ… O +możliwoÅ›ci O +WordPress B-nam_pro_software +. O + +# sent_id = 163 +Już O +dawno O +miaÅ‚a O +m O +zamiar O +postawić O +blog O +na O +skrypcie O +WordPress B-nam_pro_software +. O + +# sent_id = 164 +TestowaÅ‚a O +m O +różne O +skrypty O +, O +ale O +WordPress B-nam_pro_software +spodobaÅ‚ O +mi O +siÄ™ O +najbardziej O +. O + +# sent_id = 165 +Moim O +zdaniem O +jego O +popularność O +jest O +w O +peÅ‚ni O +zasÅ‚użona O +. O + +# sent_id = 166 +Co O +prawda O +– O +przy O +okazji O +– O +„ O +mÄ™czÄ… O +†O +na O +innym O +serwerze O +Textpattern B-nam_pro_software +. O + +# sent_id = 167 +Skrypt O +, O +o O +którym O +pisaÅ‚a O +m O +kiedyÅ› O +na O +swoim O +bloggerowym O +blogu O +. O + +# sent_id = 168 +Od O +tamtego O +czasu O +Textpattern B-nam_pro_software +doczekaÅ‚ O +siÄ™ O +polskiego O +tÅ‚umaczenia O +– O +i O +w O +koÅ„cu O +postanowiÅ‚a O +m O +go O +przetestować O +. O + +# sent_id = 169 +Nadal O +twierdzÄ™ O +, O +że O +to O +ciekawy O +skrypt O +– O +ale O +nie O +specjalnie O +odpowiada O +mi O +rozwiÄ…zanie O +dotyczÄ…ce O +szablonów O +. O + +# sent_id = 170 +Co O +prawda O +sama O +struktura O +szablonu O +jest O +przejrzysta O +– O +i O +tworzenie O +nowych O +szablonów O +nie O +sprawia O +problemu O +. O + +# sent_id = 171 +Jednak O +mniej O +przyjemnÄ… O +okolicznoÅ›ciÄ… O +przyrody O +jest O +to O +, O +iż O +w O +Textpattern B-nam_pro_software +nie O +ma O +odrÄ™bnego O +folderu O +z O +szablonami O +. O + +# sent_id = 172 +Każdy O +szablon O +trzeba O +tworzyć O +edytujÄ…c O +podstawowy O +. O + +# sent_id = 173 +To O +wyklucza O +możliwość O +wyboru O +szablonu O +z O +poziomu O +panelu O +administracyjnego O +. O + +# sent_id = 174 +TÄ™ O +niedogodność O +częściowo O +likwiduje O +dodatek O +umożliwiajÄ…cy O +eksport O +i O +import O +szablonów O +– O +jednak O +nie O +do O +koÅ„ca O +. O + +# sent_id = 175 +WracajÄ…c O +do O +tego O +blogu O +– O +powstaje O +ponieważ O +coraz O +bardziej O +denerwuje O +mnie O +blogger B-nam_pro_media_web +. O + +# sent_id = 176 +Przeszkadza O +mi O +brak O +kategorii O +i O +podziaÅ‚u O +wpisów O +na O +wstÄ™p O +i O +peÅ‚en O +tekst O +. O + +# sent_id = 177 +Nie O +zrezygnujÄ™ O +z O +Bloggera B-nam_pro_media_web +– O +wÅ‚ożyÅ‚a O +m O +trochÄ™ O +pracy O +w O +pozycjonowanie O +tego O +blogu O +– O +i O +zwyczajnie O +szkoda O +mi O +jej O +efektów O +. O + +# sent_id = 178 +Jednak O +dalsze O +pisanie O +o O +wszystkich O +moich O +zainteresowaniach O +zwiÄ…zanych O +z O +internetem B-nam_oth_tech +na O +Bloggerze B-nam_pro_media_web +mija O +siÄ™ O +z O +celem O +. O + +# sent_id = 179 +Czym O +wiÄ™cej O +wpisów O +– O +tym O +wiÄ™kszy O +baÅ‚agan O +– O +i O +trudniej O +cokolwiek O +znaleść O +. O + +# sent_id = 180 +PostanowiÅ‚a O +m O +wiÄ™c O +na O +Bloggerze B-nam_pro_media_web +pisać O +drobne O +„ O +zajawki O +†O +dotyczÄ…ce O +wpisów O +na O +tym O +blogu O +. O + +# sent_id = 181 +MyÅ›laÅ‚a O +m O +teÄ… O +nad O +innÄ… O +opcjÄ… O +. O + +# sent_id = 182 +Ograniczenia O +tematyki O +blogu O +tylko O +do O +wpisów O +na O +temat O +blogosfery O +. O + +# sent_id = 183 +W O +sumie O +nadal O +nie O +jestem O +pewna O +, O +którÄ… O +opcjÄ™ O +wybraÅ‚ O +– O +zdecydowanie O +nie O +jest O +mojÄ… O +mocnÄ… O +stronÄ… O +. O + +# sent_id = 184 +Na O +razie O +dostosowujÄ™ O +WordPress B-nam_pro_software +’ I-nam_pro_software +a I-nam_pro_software +– O +jak O +skoÅ„czÄ™ O +to O +pomyÅ›lÄ™ O +. O + +# sent_id = 185 +Recenzowany O +gatunek O +: O +Flor B-nam_pro_brand +de I-nam_pro_brand +Obera I-nam_pro_brand +. O + +# sent_id = 186 +TroszkÄ™ O +testów O +nie O +zaszkodzi O +, O +a O +że O +ostatnio O +pijam O +przeróżne O +gatunki O +Yerba O +Mate O +postanowiÅ‚ O +em O +stworzyć O +cykl O +, O +majÄ…cy O +na O +celu O +przedstawić O +moje O +subiektywne O +odczucia O +na O +temat O +różniastych O +gatunków O +. O + +# sent_id = 187 +Na O +poczÄ…tek O +chciaÅ‚ O +by O +m O +opisać O +maÅ‚o O +popularny O +w O +naszym O +kraju O +gatunek O +, O +mianowicie O +" O +Flor B-nam_pro_brand +de I-nam_pro_brand +Obera I-nam_pro_brand +" O +. O + +# sent_id = 188 +Do O +wyboru O +tej O +Yerby O +namówiÅ‚a O +mnie O +zawsze O +przyjemna O +Pani O +ze O +Szczypty B-nam_fac_goe +, O +kiedy O +to O +szukaÅ‚ O +em O +mocniejszego O +gatunku O +. O + +# sent_id = 189 +PostanowiÅ‚ O +em O +jÄ… O +zamówić O +w O +Herbatkowo B-nam_pro_media_web +- O +wybieram O +zawsze O +najtaÅ„szy O +sklep O +, O +także O +nie O +traktujcie O +tego O +jako O +reklamÄ™ O +, O +jeden O +gatunek O +jest O +tam O +droższy O +inny O +- O +jak O +ten O +- O +taÅ„szy O +. O + +# sent_id = 190 +ZamówiÅ‚ O +em O +w O +piÄ…tek O +, O +powinien O +em O +dostać O +w O +poniedziaÅ‚ek O +, O +jednak O +ze O +wzglÄ™du O +na O +chochliki O +komputerowe O +powstaÅ‚o O +jednodniowe O +opóźnienie O +( O +za O +co O +dostaÅ‚ O +em O +gratisowÄ… O +przesyÅ‚kÄ™ O +. O +) O + +# sent_id = 191 +Kurier O +dostarczyÅ‚ O +mi O +jÄ… O +we O +wtorek O +, O +po O +otworzeniu O +, O +niestety O +kiepskiej O +jakoÅ›ci O +papierowego O +opakowania O +wydobyÅ‚ O +siÄ™ O +przyjemny O +zapach O +, O +lekko O +gorzkawy O +, O +jednak O +charakterystyczny O +dla O +argentyÅ„skich O +gatunków O +. O + +# sent_id = 192 +Bardzo O +maÅ‚a O +ilość O +pyÅ‚u O +zrobiÅ‚a O +na O +mnie O +dobre O +wrażenie O +. O + +# sent_id = 193 +WiÄ™cej O +listków O +niż O +gaÅ‚Ä…zek O +daje O +kolejny O +plus O +. O + +# sent_id = 194 +PoczÄ…tek O +Å›wietny O +. O + +# sent_id = 195 +3 O +/ O +4 O +matero O +zostaÅ‚o O +napeÅ‚nione O +, O +La B-nam_pro_brand +Mulata I-nam_pro_brand +AL I-nam_pro_brand +17 I-nam_pro_brand +" O +wkrÄ™cona O +" O +w O +susz O +, O +woda O +w O +temperaturze O +niższej O +niż O +standardowo O +- O +75 O +st O +. O +C O +( O +tak O +miaÅ‚em O +polecane O +i O +to O +siÄ™ O +sprawdziÅ‚o O +) O +gotowa O +wiÄ™c O +czas O +na O +pierwsze O +zalanie O +. O + +# sent_id = 196 +ÅšwiÄ™ty B-nam_liv_person +Tomasz I-nam_liv_person +ciÄ…gnie O +bardzo O +szybko O +, O +wiÄ™c O +kolejne O +zalanie O +po O +krótkiej O +chwili O +. O + +# sent_id = 197 +Pierwszy O +siorb O +i O +. O +. O +. O +miÅ‚e O +zaskoczenie O +, O +mimo O +zapowiadanej O +dużej O +mocy O +, O +czuć O +dużo O +Yerby O +, O +maÅ‚o O +goryczy O +, O +lekko O +- O +kwaÅ›ny O +smak O +. O + +# sent_id = 198 +Nie O +chcÄ…c O +używać O +porównaÅ„ O +do O +innych O +gatunków O +i O +nie O +majÄ…c O +takiej O +potrzeby O +stwierdzam O +oryginalny O +smak O +. O + +# sent_id = 199 +Po O +szóstym O +zalaniu O +nadal O +czuć O +porzÄ…dny O +smak O +Yerby O +, O +bez O +wiÄ™kszej O +straty O +, O +do O +tego O +mocne O +pobudzenie O +. O + +# sent_id = 200 +PodsumowujÄ…c O +- O +Gatunek O +dla O +tych O +, O +którzy O +nie O +lubiÄ… O +kwaÅ›nych O +, O +cierpkich O +smaków O +, O +natomiast O +uwielbiajÄ… O +duże O +, O +a O +do O +tego O +dÅ‚ugie O +pobudzenie O +. O + +# sent_id = 201 +Yerba O +ta O +stawia O +na O +nogi O +szybko O +i O +przyjemnie O +, O +pobudza O +ciaÅ‚o O +, O +uspokaja O +ducha O +. O + +# sent_id = 202 +Opakowanie O +: O +3 O +/ O +10 O +- O +jak O +zwykle O +Å›liski O +papier O +, O +pÄ™kajÄ…cy O +od O +spodu O +. O + +# sent_id = 203 +Zapach O +: O +9 O +/ O +10 O +- O +lekko O +gorzkawy O +, O +ogólnie O +przyjemny O +i O +delikatny O +. O + +# sent_id = 204 +Smak O +: O +9 O +/ O +10 O +- O +lekko O +kwaÅ›ny O +, O +czuć O +dużo O +Yerby O +. O + +# sent_id = 205 +Pobudzenie O +: O +10 O +/ O +10 O +- O +jedna O +z O +najmocniejszych O +jakie O +piÅ‚ O +em O +i O +chyba O +jakie O +sÄ… O +. O + +# sent_id = 206 +WytrzymaÅ‚ość O +: O +8 O +/ O +10 O +- O +po O +szóstym O +zalaniu O +bez O +wiÄ™kszej O +utraty O +smaku O +. O + +# sent_id = 207 +Cena O +- O +jakość O +: O +10 O +/ O +10 O +- O +Jak O +na O +tÄ… O +jakość O +cena O +( O +od O +20PLN O +/ O +0 O +, O +5kg O +) O +jest O +bardzo O +niska O +. O + +# sent_id = 208 +TworzÄ… O +siÄ™ O +w O +okresach O +przelotów O +, O +konstruujÄ…c O +przeróżne O +figury O +na O +niebie O +. O + +# sent_id = 209 +Niekiedy O +jest O +to O +najpopularniejszy O +klucz O +, O +innym O +razem O +znane O +laikom O +tylko O +z O +popularnego O +powiedzenia O +szyk O +" O +gÄ™siego O +" O +. O + +# sent_id = 210 +Naukowcy O +od O +lat O +badajÄ… O +fenomen O +tego O +zjawiska O +. O + +# sent_id = 211 +Najbardziej O +skrajne O +opinie O +mówiÅ‚y O +o O +przypadku O +, O +a O +drugie O +o O +zamontowanym O +systemie O +nawigacji O +GPS B-nam_oth_tech +. O + +# sent_id = 212 +Jak O +jest O +naprawdÄ™ O +z O +ptasimi O +formacjami O +? O + +# sent_id = 213 +Dlaczego O +i O +kiedy O +wÄ™drujÄ… O +? O + +# sent_id = 214 +Podstawowym O +powodem O +jest O +oczywiÅ›cie O +zmiana O +pór O +roku O +. O + +# sent_id = 215 +Szczególnie O +jest O +to O +zauważane O +w O +krajach O +w O +klimacie O +umiarkowanym O +, O +na O +półkuli O +północnej O +. O + +# sent_id = 216 +Można O +wtedy O +odróżnić O +porÄ™ O +ciepÅ‚Ä… O +( O +okres O +od O +poczÄ…tku O +marca O +do O +poÅ‚owy O +października O +) O +i O +porÄ™ O +mroźnÄ… O +( O +listopad O +- O +luty O +) O +. O + +# sent_id = 217 +Warunkuje O +to O +dostÄ™p O +do O +pożywienia O +, O +dlatego O +ptaki O +sÄ… O +zmuszone O +przenosić O +siÄ™ O +zimÄ… O +w O +tereny O +ciepÅ‚e O +, O +na O +półkulÄ™ O +poÅ‚udniowÄ… O +. O + +# sent_id = 218 +Dla O +przykÅ‚adu O +weźmy O +jeden O +popularnych O +polskich O +gatunków O +– O +obecnÄ… O +niemal O +w O +każdym O +regionie O +kaczkÄ™ O +krzyżówkÄ™ O +. O + +# sent_id = 219 +Jest O +to O +gatunek O +wÄ™drowny O +, O +jednak O +kilkanaÅ›cie O +procent O +osobników O +z O +naszego O +kraju O +pozostaje O +u O +nas O +nawet O +w O +okresie O +zimowym O +. O + +# sent_id = 220 +Ulubionymi O +zimowiskami O +sÄ… O +dla O +nich O +regiony O +poÅ‚udniowo O +- O +zachodniej O +i O +północnej O +Polski B-nam_loc_gpe_country +przy O +niezamarzajÄ…cych O +rzekach O +. O + +# sent_id = 221 +PozostaÅ‚e O +osobniki O +odlatujÄ… O +znad O +WisÅ‚y B-nam_loc_hydronym_river +miÄ™dzy O +wrzeÅ›niem O +, O +a O +grudniem O +. O + +# sent_id = 222 +PowracajÄ… O +do O +nas O +od O +lutego O +do O +kwietnia O +. O + +# sent_id = 223 +SÄ… O +także O +gatunki O +, O +którym O +wystarczy O +przemieszczenie O +siÄ™ O +o O +kilkaset O +kilometrów O +np O +. O +z O +północy O +Polski B-nam_loc_gpe_country +na O +poÅ‚udnie O +, O +gdzie O +różnica O +temperatur O +może O +wynieść O +kilka O +stopni O +C O +. O +, O +dla O +nich O +jednak O +to O +wystarczy O +. O + +# sent_id = 224 +Tak O +jest O +z O +myszoÅ‚owem O +, O +który O +wÄ™drujÄ…c O +np O +. O +z O +Norwegii B-nam_loc_gpe_country +znajdzie O +cieplejsze O +rejony O +już O +w O +Polsce B-nam_loc_gpe_country +powiÄ™kszajÄ…c O +tym O +samym O +populacjÄ™ O +tego O +gatunku O +w O +naszym O +kraju O +. O + +# sent_id = 225 +Wtedy O +powstaje O +walka O +o O +pokarm O +i O +myszoÅ‚owy O +muszÄ… O +szukać O +pożywienia O +nawet O +na O +północnych O +częściach O +Afryki B-nam_loc_land_continent +. O + +# sent_id = 226 +Co O +ciekawe O +sÄ… O +w O +stanie O +pokonać O +podczas O +przelotu O +ponad O +dziesięć O +tysiÄ™cy O +kilometrów O +. O + +# sent_id = 227 +Gatunek O +ten O +nie O +ukÅ‚ada O +siÄ™ O +w O +formacje O +, O +lecÄ…c O +w O +parach O +. O + +# sent_id = 228 +Dlaczego O +wiÄ™c O +ptaki O +poÅ›wiÄ™cajÄ… O +tyle O +czasu O +na O +odloty O +i O +później O +majÄ… O +chęć O +powrócić O +na O +letnisko O +? O + +# sent_id = 229 +Odpowiedź O +jest O +prosta O +– O +pokarm O +. O + +# sent_id = 230 +Na O +północnej O +części O +globu O +sezon O +letni O +oznacza O +wielkÄ… O +stołówkÄ™ O +, O +od O +owadów O +do O +gryzoni O +– O +wszystkie O +gatunki O +stajÄ… O +siÄ™ O +liczniejsze O +, O +co O +daje O +pole O +do O +popisu O +dla O +wygÅ‚odniaÅ‚ych O +ptaków O +, O +przebywajÄ…cych O +w O +okresie O +lÄ™gowym O +, O +bÄ…dź O +już O +dokarmiajÄ…cych O +mÅ‚ode O +. O + +# sent_id = 231 +Można O +napisać O +" O +co O +gatunek O +to O +obyczaj O +" O +– O +każdy O +z O +nich O +charakteryzuje O +siÄ™ O +innym O +okresem O +odlotu O +oraz O +innym O +sposobem O +zimowania O +. O + +# sent_id = 232 +Dr O +WiesÅ‚aw B-nam_liv_person +Nowicki I-nam_liv_person +dÅ‚ugo O +staÅ‚ O +w O +obronie O +polskiej O +przyrody O +i O +walczyÅ‚ O +z O +różnymi O +samorzÄ…dami O +w O +Polsce B-nam_loc_gpe_country +. O + +# sent_id = 233 +Przez O +wiele O +lat O +blokowaÅ‚ O +on O +budowÄ™ O +mostu O +północnego O +w O +Warszawie B-nam_loc_gpe_city +. O + +# sent_id = 234 +Jednak O +, O +kiedy O +ulegÅ‚ O +- O +postanowiÅ‚ O +skutecznie O +doradzić O +jak O +usuwać O +drzewa O +zgodnie O +z O +naturÄ… O +i O +ustaliÅ‚ O +miejsca O +wystawienia O +budek O +lÄ™gowych O +dla O +ptaków O +itd O +. O + +# sent_id = 235 +DostaÅ‚ O +za O +to O +wynagrodzenie O +i O +co O +? O + +# sent_id = 236 +" O +PrzepiÄ™kny O +artykuÅ‚ O +" O +w O +naszej O +naczelnej O +, O +narodowej O +GAZECIE O +. O + +# sent_id = 237 +ZostaÅ‚ O +skrytykowany O +za O +pobór O +wynagrodzenia O +, O +zwany O +przez O +redaktorów O +jako O +" O +haracz O +" O +. O + +# sent_id = 238 +Każdy O +pracuje O +i O +stara O +siÄ™ O +jak O +może O +, O +jeÅ›li O +dostaÅ‚ O +za O +to O +pieniÄ…dze O +to O +chwaÅ‚a O +mu O +za O +to O +co O +robiÅ‚ O +. O + +# sent_id = 239 +Gorzej O +jakby O +dostaÅ‚ O +pieniÄ…dze O +, O +a O +nic O +nie O +zrobiÅ‚ O +. O + +# sent_id = 240 +Inni O +siedzÄ… O +za O +biurkiem O +, O +zbijajÄ… O +przysÅ‚owiowe O +bÄ…ki O +i O +biorÄ… O +wynagrodzenie O +. O + +# sent_id = 241 +On O +przynajmniej O +spróbowaÅ‚ O +walczyć O +z O +systemem O +. O + +# sent_id = 242 +Lepsze O +otrzymywanie O +pieniÄ™dzy O +za O +" O +doradzanie O +" O +jak O +to O +napisaÅ‚a O +wypiórcza O +, O +niż O +za O +kÅ‚amstwa O +i O +oszczerstwa O +jakie O +sÄ… O +wypisywane O +na O +Å‚amach O +w O +/ O +w O +gazety O +. O + +# sent_id = 243 +Pozdrawiam O +serdecznie O +i O +życzÄ™ O +wszystkim O +optymizmu O +. O + +# sent_id = 244 +Do O +tej O +pory O +myÅ›laÅ‚ O +em O +, O +że O +gwoździem O +do O +trumny O +promocji O +Krakowa B-nam_loc_gpe_city +byÅ‚ O +szaliczek O +, O +ale O +ostatnio O +zobaczyÅ‚ O +em O +reklamÄ™ O +piguÅ‚ki O +z O +hasÅ‚em O +" O +72 O +godziny O +po O +. O +. O +. O +Krakowie B-nam_loc_gpe_city +" O +. O + +# sent_id = 245 +Ostatni O +weekend O +spÄ™dzili O +Å›my O +wÅ‚aÅ›nie O +w O +Krakowie B-nam_loc_gpe_city +, O +gdzie O +mieli O +Å›my O +nadziejÄ™ O +przekonać O +siÄ™ O +, O +że O +WrocÅ‚aw B-nam_loc_gpe_city +wciąż O +jest O +miastem O +pustym O +i O +cichym O +. O + +# sent_id = 246 +W O +poszukiwaniu O +boozy O +Brits B-nam_org_nation +obeszli O +Å›my O +rynek B-nam_fac_square +i O +planty B-nam_fac_park +, O +ale O +o O +dziwo O +trafiÅ‚a O +siÄ™ O +tylko O +jedna O +grupa O +. O + +# sent_id = 247 +Czyli O +tyle O +ile O +w O +piÄ…tkowy O +wieczór O +można O +spotkać O +we O +WrocÅ‚awiu B-nam_loc_gpe_city +. O + +# sent_id = 248 +Może O +jednak O +byÅ‚o O +za O +wczeÅ›nie O +. O + +# sent_id = 249 +Może O +nie O +byÅ‚o O +na O +ten O +weekend O +tanich O +biletów O +. O + +# sent_id = 250 +CiÄ…gle O +zastanawia O +mnie O +czemu O +Kraków B-nam_loc_gpe_city +, O +miasto O +, O +które O +wygraÅ‚o O +na O +loterii O +brak O +zniszczeÅ„ O +wojennych O +i O +jeszcze O +zamek O +na O +wzgórzu O +, O +wybraÅ‚ O +turystykÄ™ O +imprezowÄ… O +. O + +# sent_id = 251 +Chociaż O +może O +billboardy O +z O +nierozpakowanÄ… O +czekoladkÄ… O +miaÅ‚y O +odnosić O +siÄ™ O +do O +nocy O +muzeów O +, O +podobnie O +jak O +ostatnia O +akcja O +ze O +zmÄ™czonymi O +pomnikami O +? O + +# sent_id = 252 +Inna O +sprawa O +, O +że O +w O +zabawie O +nie O +uczestniczyÅ‚ O +Aleksander B-nam_liv_person +Fredro I-nam_liv_person +, O +co O +być O +może O +Å›wiadczy O +o O +tym O +, O +że O +WrocÅ‚aw B-nam_loc_gpe_city +nie O +dorobiÅ‚ O +siÄ™ O +rozpoznawalnego O +symbolu O +. O + +# sent_id = 253 +Tymczasem O +każdy O +zna O +wierszyk O +o O +starym O +pierniku O +, O +kto O +nie O +pamiÄ™ta O +magazynu O +morze O +( O +zaczynajÄ…cego O +siÄ™ O +piosenkÄ… O +o O +piratach O +) O +, O +syrenka O +straszy O +też O +od O +dawna O +. O + +# sent_id = 254 +PoznaÅ„ B-nam_loc_gpe_city +litoÅ›ciwie O +też O +pominiÄ™to O +, O +bo O +kto O +chciaÅ‚ O +by O +wybrać O +siÄ™ O +do O +miasta O +, O +do O +którego O +imprezować O +jeżdżą O +kozioÅ‚ki O +? O + +# sent_id = 255 +OkazaÅ‚o O +siÄ™ O +, O +że O +rowerzyÅ›ci O +jednak O +robiÄ… O +zakupy O +. O + +# sent_id = 256 +Å»eby O +siÄ™ O +zbytnio O +nie O +rozochocili O +, O +stojaki O +pod O +centrum B-nam_fac_goe +arkady I-nam_fac_goe +sÄ… O +maÅ‚e O +, O +w O +koÅ„cu O +parking O +też O +mieÅ›ci O +marne O +kilkaset O +samochodów O +, O +a O +ilu O +może O +być O +rowerzystów O +? O + +# sent_id = 257 +Dwudziestu O +? O + +# sent_id = 258 +OczywiÅ›cie O +to O +miÅ‚e O +, O +że O +już O +po O +dwóch O +miesiÄ…cach O +od O +otwarcia O +stojaki O +siÄ™ O +pojawiÅ‚y O +, O +chociaż O +pod O +konkurencjÄ… O +na O +placu B-nam_fac_square +Grunwaldzkim I-nam_fac_square +byÅ‚y O +już O +od O +otwarcia O +. O + +# sent_id = 259 +Na O +obrazku O +można O +dodatkowo O +podziwiać O +uprzejmość O +kierowcy O +, O +który O +zaparkowaÅ‚ O +na O +koÅ„cu O +Å›cieżki O +rowerowej O +, O +a O +mógÅ‚ O +przecież O +stanąć O +pod O +stojakiem O +na O +rowery O +. O + +# sent_id = 260 +Tymczasem O +w O +poszukiwaniu O +wszystkich O +wrocÅ‚awskich O +rowerzystów O +wystarczy O +wybrać O +siÄ™ O +kawaÅ‚ek O +dalej O +. O + +# sent_id = 261 +Za O +centrum O +handlowym O +znajduje O +siÄ™ O +stary O +dom O +towarowy O +, O +w O +którym O +mieÅ›ci O +siÄ™ O +obecnie O +HP B-nam_org_company +, O +a O +wkrótce O +znajdzie O +siÄ™ O +tam O +także O +kolejna O +galeria O +, O +mimo O +że O +tuż O +obok O +jest O +już O +jeszcze O +inna O +galeria O +, O +znana O +ostatnio O +z O +tego O +, O +że O +radny O +pobiÅ‚ O +tam O +policjanta O +. O + +# sent_id = 262 +Pracownicy O +HP B-nam_org_company +udajÄ… O +, O +że O +sÄ… O +w O +Amsterdamie B-nam_loc_gpe_city +czy O +innej O +Kopenhadze B-nam_loc_gpe_city +: O +Zaraz O +za O +rowerami O +znajdujÄ… O +siÄ™ O +wizualizacje O +nowego O +domu O +towarowego O +, O +zielona O +trawa O +, O +szkÅ‚o O +i O +beton O +. O + +# sent_id = 263 +Ludzie O +Å‚ażą O +, O +samochody O +jeżdżą O +( O +tu O +spory O +optymizm O +twórców O +widać O +- O +na O +pierwszym O +obrazku O +trzy O +samochody O +: O +BMW B-nam_org_company +, O +saab B-nam_org_company +i O +porsche B-nam_pro_brand +, O +na O +kolejnych O +zresztÄ… O +dość O +podobnie O +) O +, O +nawet O +autobus O +siÄ™ O +pojawiÅ‚ O +. O + +# sent_id = 264 +Å»adnych O +rowerzystów O +, O +żadnych O +stojaków O +. O + +# sent_id = 265 +Dobra O +wiadomość O +dla O +pracowników O +HP B-nam_org_company +: O +szykujÄ… O +siÄ™ O +podwyżki O +. O + +# sent_id = 266 +Sam O +by O +m O +chciaÅ‚ O +, O +żeby O +byÅ‚o O +różowo O +i O +żeby O +w O +zwiÄ…zku O +z O +euro B-nam_eve_human_sport +powstaÅ‚y O +drogi O +, O +linie O +kolejowe O +, O +lotniska O +i O +kina O +studyjne O +. O + +# sent_id = 267 +Euro B-nam_eve_human_sport +to O +piÄ™kny O +pretekst O +i O +być O +może O +nawet O +Komisja B-nam_org_institution +Europejska I-nam_org_institution +zmiÄ™kÅ‚a O +by O +i O +dorzuciÅ‚a O +kilka O +miliardów O +albo O +chociaż O +uproÅ›ciÅ‚a O +procedury O +. O + +# sent_id = 268 +Minęło O +kilka O +miesiÄ™cy O +od O +przyznania O +Polsce B-nam_loc_gpe_country +imprezy O +i O +nic O +siÄ™ O +jednak O +nie O +dzieje O +. O + +# sent_id = 269 +OdwoÅ‚ywane O +sÄ… O +kolejne O +plany O +( O +autostrad O +, O +szybkich O +pociÄ…gów O +) O +, O +gazety O +i O +blogi O +lamentujÄ… O +. O + +# sent_id = 270 +Czym O +biedny O +kibic O +dojedzie O +na O +mecz O +? O + +# sent_id = 271 +Jak O +siÄ™ O +przemieÅ›ci O +? O + +# sent_id = 272 +Tymczasem O +organizatorzy O +ewidentnie O +nic O +sobie O +z O +tego O +nie O +robiÄ… O +. O + +# sent_id = 273 +I O +nie O +bÄ™dÄ… O +. O + +# sent_id = 274 +Nie O +muszÄ… O +. O + +# sent_id = 275 +Wizja O +wielkich O +stadionów O +i O +setek O +tysiÄ™cy O +kibiców O +wÄ™drujÄ…cych O +po O +Polsce B-nam_loc_gpe_country +wzdÅ‚uż O +i O +wszerz O +zostaÅ‚a O +stworzona O +przez O +media O +, O +a O +organizatorzy O +wiedzÄ… O +, O +że O +ratuje O +ich O +system O +sprzedaży O +biletów O +. O + +# sent_id = 276 +WedÅ‚ug O +tego O +systemu O +poÅ‚owÄ™ O +miejsc O +na O +stadionach O +dostajÄ… O +sponsorzy O +. O + +# sent_id = 277 +Ci O +organizujÄ… O +konkursy O +, O +ale O +przede O +wszystkim O +wysyÅ‚ajÄ… O +swoich O +pracowników O +. O + +# sent_id = 278 +Reszta O +biletów O +jest O +w O +wiÄ™kszoÅ›ci O +losowana O +( O +gospodarz O +dostaje O +nikÅ‚Ä… O +pulÄ™ O +) O +i O +szansa O +, O +że O +ten O +sam O +kibic O +wylosuje O +bilet O +w O +Poznaniu B-nam_loc_gpe_city +i O +we O +WrocÅ‚awiu B-nam_loc_gpe_city +jest O +raczej O +maÅ‚a O +. O + +# sent_id = 279 +W O +ten O +sposób O +potrzebne O +sÄ… O +po O +pierwsze O +stadiony O +, O +a O +po O +drugie O +lotniska O +. O + +# sent_id = 280 +OczywiÅ›cie O +- O +też O +nie O +jest O +tak O +Å‚atwo O +w O +Polsce B-nam_loc_gpe_country +je O +zbudować O +, O +ale O +dużo O +Å‚atwiej O +niż O +zbudować O +stadiony O +, O +lotniska O +i O +tysiÄ…ce O +kilometrów O +dróg O +. O + +# sent_id = 281 +Zdecydowana O +wiÄ™kszość O +kibiców O +przyjedzie O +na O +jeden O +mecz O +, O +co O +najmniej O +poÅ‚owa O +( O +bo O +gospodarz O +też O +część O +biletów O +rozdaje O +) O +przyjedzie O +nie O +za O +wÅ‚asne O +pieniÄ…dze O +w O +ramach O +wycieczek O +korporacyjno O +- O +szkolnych O +, O +a O +na O +takich O +czas O +pÅ‚ynie O +inaczej O +i O +45 O +godzin O +w O +podróży O +też O +ma O +swoje O +zalety O +. O +. O +. O + +# sent_id = 282 +Pozostaje O +ewentualnie O +niewielka O +grupa O +kibiców O +, O +którzy O +kibicujÄ… O +samej O +imprezie O +- O +lubiÄ… O +po O +prostu O +malować O +sobie O +twarze O +i O +pić O +ciepÅ‚e O +piwo O +przed O +telebimem O +albo O +wybijać O +szyby O +w O +sklepach O +. O + +# sent_id = 283 +Srebrni O +siatkarze O +i O +srebrni O +piÅ‚karze O +rÄ™czni O +grali O +Å›wietnie O +przez O +caÅ‚e O +turnieje O +( O +jedni O +bez O +, O +drudzy O +z O +jednÄ… O +porażkÄ… O +) O +, O +żeby O +gÅ‚adko O +przegrać O +w O +finaÅ‚ach O +. O + +# sent_id = 284 +Jedni O +i O +drudzy O +zapewniali O +, O +że O +sÄ… O +gotowi O +wygrać O +, O +że O +jeÅ›li O +nie O +teraz O +to O +kiedy O +, O +a O +Niemców B-nam_org_nation +mogÄ… O +zlać O +z O +marszu O +. O + +# sent_id = 285 +Najwyraźniej O +jednak O +zapomnieli O +o O +dwóch O +równych O +skokach O +i O +zasÅ‚użyli O +, O +żeby O +maskotkÄ… O +drużyny O +zostaÅ‚ O +Robert B-nam_liv_person +Mateja I-nam_liv_person +( O +uczeÅ„ O +Masahiko B-nam_liv_person +Harady I-nam_liv_person +) O +. O + +# sent_id = 286 +To O +też O +przypomina O +mi O +być O +może O +Steca B-nam_liv_person +, O +być O +może O +Zczuba B-nam_pro_media_web +, O +że O +polski B-nam_adj_country +piÅ‚karz O +stara O +siÄ™ O +do O +pierwszej O +beemki O +. O + +# sent_id = 287 +Nie O +bramki O +, O +beemki O +. O + +# sent_id = 288 +Srebrny O +medal O +jest O +Å‚adny O +, O +Polacy B-nam_org_nation +lubiÄ… O +gadać O +, O +zawsze O +mogÄ… O +powiedzieć O +, O +że O +Niemcy B-nam_org_nation +specjalnie O +zesÅ‚ali O +ich O +do O +Hamburga B-nam_loc_gpe_city +, O +miasta O +, O +które O +daÅ‚o O +swojÄ… O +nazwÄ™ O +frankfurterkom O +, O +żeby O +do O +zÅ‚ota O +Renu B-nam_loc_hydronym_river +mieli O +daleko O +. O + +# sent_id = 289 +OczywiÅ›cie O +na O +szczęście O +ani O +piÅ‚karze O +rÄ™czni O +, O +ani O +siatkarze O +aż O +tak O +gÅ‚upio O +siÄ™ O +nie O +tÅ‚umaczyli O +, O +to O +tylko O +telewizja O +publiczna O +, O +w O +koÅ„cu O +po O +tylu O +latach O +transmisji O +ze O +skoków O +trudno O +siÄ™ O +odzwyczaić O +. O + +# sent_id = 290 +Nic O +siÄ™ O +nie O +staÅ‚o O +panie O +trenerze O +, O +powiedziaÅ‚ O +buldog O +. O + +# sent_id = 291 +Z O +jednej O +strony O +medale O +w O +sportach O +zespoÅ‚owych O +i O +wÅ›ród O +seniorów O +to O +miÅ‚a O +odmiana O +, O +z O +caÅ‚ym O +szacunkiem O +dla O +siatkarek O +i O +koszykarek O +. O + +# sent_id = 292 +NietÅ‚umaczenie O +siÄ™ O +gÅ‚upio O +z O +porażki O +też O +raczej O +po O +stronie O +korzyÅ›ci O +. O + +# sent_id = 293 +Jednak O +dlaczego O +w O +kluczowych O +momentach O +siÄ™ O +zawodzi O +? O + +# sent_id = 294 +Po O +co O +heroiczne O +boje O +z O +RosjÄ… B-nam_loc_gpe_country +i O +DaniÄ… B-nam_loc_gpe_country +? O + +# sent_id = 295 +Czy O +polscy B-nam_adj_country +zawodnicy O +sÄ… O +zbyt O +skromni O +? O + +# sent_id = 296 +JakiÅ› O +kompleks O +nie O +pozwala O +im O +uznać O +, O +że O +sÄ… O +godni O +zÅ‚ota O +? O + +# sent_id = 297 +Holendrzy B-nam_org_nation +popijajÄ…cy O +mlekiem O +buÅ‚kÄ™ O +z O +salami O +nigdy O +nie O +przestali O +mnie O +dziwić O +, O +chociaż O +może O +to O +tylko O +wariacja O +na O +temat O +baraniny O +z O +jogurtem O +czy O +woÅ‚owiny O +z O +parmezanem O +. O + +# sent_id = 298 +W O +koÅ„cu O +Kalwin B-nam_liv_person +popijaÅ‚ O +jajko O +kieliszkiem O +wina O +, O +a O +Holendrzy B-nam_org_nation +wszystko O +potrafiÄ… O +zreformować O +. O + +# sent_id = 299 +Winiarze O +zresztÄ… O +nie O +byli O +w O +Holandii B-nam_loc_gpe_country +mile O +widziani O +, O +a O +tymczasem O +to O +wÅ‚aÅ›nie O +w O +depresji O +winnica O +wydaÅ‚a O +plon O +, O +w O +wyniku O +ponadÅ›wiatowego O +spisku O +, O +w O +którym O +epizodyczne O +role O +odegrali O +Fidel B-nam_liv_person +Castro I-nam_liv_person +, O +GawriÅ‚o B-nam_liv_person +Princip I-nam_liv_person +i O +pewien O +zakonnik O +, O +który O +dodatkowo O +potwierdza O +nabiaÅ‚owe O +podÅ‚oże O +caÅ‚ej O +sprawy O +. O + +# sent_id = 300 +Bohaterowie O +Å›wiadomi O +swojej O +wartoÅ›ci O +, O +znawcy O +wszechÅ›wiata O +i O +wszechrzeczy O +, O +taÅ„czÄ… O +jak O +im O +zagrajÄ… O +na O +wiolonczeli O +i O +innych O +strunach O +. O + +# sent_id = 301 +UczestniczÄ… O +w O +miÅ‚osnym O +trójkÄ…cie O +, O +który O +okazuje O +siÄ™ O +raczej O +trapezem O +, O +na O +dodatek O +z O +obeliskiem O +poÅ›rodku O +. O + +# sent_id = 302 +ZostajÄ… O +ojcami O +, O +chociaż O +przede O +wszystkim O +swoimi O +, O +nawet O +jeżeli O +sÄ… O +tylko O +ich O +caÅ‚kowitym O +przeciwieÅ„stwem O +. O + +# sent_id = 303 +W O +poszukiwaniu O +Å›rodka O +, O +bo O +tylko O +Å›rodka O +nam O +brakuje O +, O +jeÅ›li O +historia O +ma O +toczyć O +siÄ™ O +koÅ‚em O +, O +musi O +mieć O +Å›rodek O +, O +którego O +nie O +majÄ… O +przecież O +rzeczy O +, O +które O +nie O +majÄ… O +koÅ„ca O +, O +takie O +jak O +wszechÅ›wiat O +, O +trzeba O +przedostać O +siÄ™ O +przez O +grafiki O +Eschera B-nam_liv_person +, O +ukryte O +za O +renesansowÄ… O +fasadÄ… O +. O + +# sent_id = 304 +WczeÅ›niej O +jednak O +trzeba O +odbyć O +podróż O +w O +towarzystwie O +Francisa B-nam_liv_person +Bacona I-nam_liv_person +, O +który O +może O +siÄ™ O +nawet O +okazać O +Å›rodkiem O +tej O +opowieÅ›ci O +. O + +# sent_id = 305 +O O +ile O +" O +Odkrycie B-nam_pro_title +nieba I-nam_pro_title +" O +ma O +koniec O +, O +a O +Harry B-nam_liv_person +Mulisch I-nam_liv_person +nie O +napisaÅ‚ O +dzieÅ‚ O +Szekspira B-nam_liv_person +. O + +# sent_id = 306 +Cyniczny O +student O +otworzyÅ‚ O +portal O +z O +mapÄ… O +nalotów O +, O +o O +którym O +czytaÅ‚ O +em O +kiedyÅ› O +, O +ale O +dopiero O +absurdalny O +tekst O +z O +czÄ™stochowskiej O +wyborczej O +zwróciÅ‚ O +na O +Å„ O +mojÄ… O +uwagÄ™ O +. O + +# sent_id = 307 +Strona O +ma O +promować O +używanie O +legalnego O +oprogramowania O +. O + +# sent_id = 308 +TrochÄ™ O +jak O +mapa O +fotoradarów O +lub O +znaki O +z O +czarnym O +punktem O +, O +które O +w O +tym O +kontekÅ›cie O +powinny O +siÄ™ O +oczywiÅ›cie O +znajdować O +w O +wiÄ™kszych O +skupiskach O +politechnicznych O +akademików O +. O + +# sent_id = 309 +O O +ile O +mapa O +fotoradarów O +ma O +sens O +: O +tam O +gdzie O +fotoradar O +kierowca O +zwalnia O +i O +być O +może O +jest O +bezpieczniej O +, O +podobnie O +dziaÅ‚ajÄ… O +czarne O +punkty O +. O + +# sent_id = 310 +Ale O +mapa O +nalotów O +jako O +promocja O +legalnego O +oprogramowania O +? O + +# sent_id = 311 +Chyba O +w O +formie O +budzenia O +paranoi O +? O + +# sent_id = 312 +Bo O +jeÅ›li O +na O +ulicy O +Sienkiewicza B-nam_fac_road +, O +róg O +Ukrytej B-nam_fac_road +zgÅ‚oszono O +nalot O +, O +to O +co O +zrobi O +pirat O +? O + +# sent_id = 313 +Przejdzie O +z O +odtwarzaczem O +naÅ‚adowanym O +nielegalnymi O +empetrójkami O +na O +drugÄ… O +stronÄ™ O +ulicy O +? O + +# sent_id = 314 +Przebojem O +jest O +jednak O +to O +zgÅ‚oszenie O +. O + +# sent_id = 315 +Potwierdzone O +, O +bo O +notka O +z O +serwisu O +policji O +. O + +# sent_id = 316 +Ale O +policja O +nie O +podaÅ‚a O +ulicy O +, O +wiÄ™c O +zmyÅ›lny O +zgÅ‚aszajÄ…cy O +zaznaczyÅ‚ O +siedzibÄ™ O +komendy O +wojewódzkiej O +. O + +# sent_id = 317 +Najciemniej O +jest O +pod O +latarniÄ… O +? O + +# sent_id = 318 +Trzeba O +byÅ‚o O +tak O +Billowi B-nam_liv_person +dokÅ‚adać O +? O + +# sent_id = 319 +Zemsta O +nadeszÅ‚a O +szybko O +, O +choć O +nie O +niespodziewanie O +. O + +# sent_id = 320 +Pierwsza O +rocznica O +wydania O +Visty B-nam_pro_software +w O +rÄ™ce O +konsumentów O +już O +niedÅ‚ugo O +, O +a O +ja O +w O +ramach O +prezentu O +postanowiÅ‚ O +em O +sprawić O +sobie O +egzemplarz O +. O + +# sent_id = 321 +Już O +po O +trzech O +dniach O +mam O +dziaÅ‚ajÄ…cy O +system O +. O + +# sent_id = 322 +WydawaÅ‚o O +by O +siÄ™ O +, O +że O +po O +instalacji O +kilku O +dystrybucji O +Linuksa B-nam_pro_software +na O +różnych O +komputerach O +, O +instalacja O +Windowsa B-nam_pro_software +nie O +bÄ™dzie O +sprawiaÅ‚a O +wiÄ™kszych O +problemów O +. O + +# sent_id = 323 +SpodziewaÅ‚ O +em O +siÄ™ O +, O +że O +nie O +bÄ™dzie O +siÄ™ O +ViÅ›cie B-nam_pro_software +podobaÅ‚o O +, O +że O +dysk O +zarażony O +jest O +Linuksem B-nam_pro_software +, O +wiÄ™c O +dostaÅ‚a O +wÅ‚asny O +. O + +# sent_id = 324 +InstalowaÅ‚ O +em O +w O +ciemno O +, O +bo O +program O +do O +sprawdzenia O +, O +czy O +komputer O +speÅ‚nia O +wymagania O +Visty B-nam_pro_software +, O +dziaÅ‚a O +tylko O +pod O +Windows B-nam_pro_software +. O + +# sent_id = 325 +Po O +uruchomieniu O +pÅ‚yty O +instalacyjnej O +, O +po O +chwili O +pojawia O +siÄ™ O +kolorowy O +ekran O +i O +kursor O +. O + +# sent_id = 326 +I O +nic O +siÄ™ O +nie O +dzieje O +. O + +# sent_id = 327 +Na O +ekranie O +żadnych O +informacji O +, O +najmniejszego O +choćby O +" O +czekaj O +" O +( O +które O +później O +za O +to O +wystÄ™puje O +obficie O +) O +. O + +# sent_id = 328 +Można O +siÄ™ O +domyÅ›lać O +, O +co O +siÄ™ O +dzieje O +. O + +# sent_id = 329 +Vista B-nam_pro_software +formatuje O +dysk O +z O +Linuksem B-nam_pro_software +? O + +# sent_id = 330 +Vista B-nam_pro_software +próbuje O +poÅ‚Ä…czyć O +siÄ™ O +z O +Redmond B-nam_loc_gpe_city +i O +przesÅ‚ać O +tam O +moje O +dane O +osobowe O +? O + +# sent_id = 331 +Vista B-nam_pro_software +sprawdza O +numery O +seryjne O +wszystkich O +podzespołów O +? O + +# sent_id = 332 +Wciąż O +nic O +siÄ™ O +nie O +dzieje O +, O +akurat O +dotarÅ‚ O +" O +Lód B-nam_pro_title +" O +Dukaja B-nam_liv_person +, O +może O +zdążę O +przeczytać O +, O +zanim O +wreszcie O +system O +zacznie O +siÄ™ O +instalować O +? O + +# sent_id = 333 +Nareszcie O +- O +można O +wybrać O +dysk O +dla O +instalacji O +. O + +# sent_id = 334 +Wybieram O +, O +ale O +" O +System O +Windows B-nam_pro_software +nie O +może O +znaleźć O +woluminu O +systemowego O +speÅ‚niajÄ…cego O +kryteria O +instalacji O +" O +. O + +# sent_id = 335 +Pomaga O +to O +, O +czego O +siÄ™ O +spodziewaÅ‚ O +em O +- O +fizyczne O +odÅ‚Ä…czenie O +dysku O +linuksowego O +. O + +# sent_id = 336 +Instalator O +idzie O +dalej O +, O +dochodzi O +do O +" O +koÅ„czenia O +instalacji O +" O +, O +które O +trwa O +nieznoÅ›nie O +dÅ‚ugo O +, O +tak O +, O +już O +wiem O +, O +że O +ważne O +jest O +, O +jak O +siÄ™ O +koÅ„czy O +, O +byle O +dziÅ› O +, O +resetuje O +siÄ™ O +i O +siÄ™ O +nie O +wÅ‚Ä…cza O +. O + +# sent_id = 337 +Okazuje O +siÄ™ O +, O +że O +reset O +nie O +byÅ‚ O +jednak O +w O +planach O +. O + +# sent_id = 338 +Instalacji O +nie O +można O +naprawić O +ani O +przywrócić O +, O +zaczynam O +od O +nowa O +. O + +# sent_id = 339 +" O +Lód B-nam_pro_title +" O +leży O +i O +kusi O +, O +może O +zdążę O +. O + +# sent_id = 340 +Licencja O +przewiduje O +, O +że O +pomoc O +techniczna O +przysÅ‚uguje O +mi O +od O +producenta O +i O +montera O +sprzÄ™tu O +. O + +# sent_id = 341 +Hmm O +. O + +# sent_id = 342 +To O +chyba O +ja O +. O + +# sent_id = 343 +Mam O +szczęście O +, O +trafiÅ‚ O +em O +na O +kogoÅ› O +, O +do O +kogo O +dodzwoniÄ™ O +siÄ™ O +za O +pierwszym O +razem O +i O +nie O +bÄ™dÄ™ O +musiaÅ‚ O +sÅ‚uchać O +przez O +pół O +godziny O +Stinga B-nam_liv_person +w O +czasie O +Å‚Ä…czenia O +z O +wÅ‚aÅ›ciwym O +dziaÅ‚em O +. O + +# sent_id = 344 +Rada O +pierwsza O +- O +rekompilacja O +jÄ…dra O +. O + +# sent_id = 345 +Nie O +? O + +# sent_id = 346 +To O +może O +wyjść O +i O +wejść O +? O + +# sent_id = 347 +WychodzÄ™ O +i O +wchodzÄ™ O +od O +nowa O +. O + +# sent_id = 348 +Nie O +pomaga O +. O + +# sent_id = 349 +Nie O +pomaga O +z O +przyÅ‚Ä…czonym O +ponownie O +dyskiem O +, O +nie O +pomaga O +z O +wyjÄ™tÄ… O +kartÄ… O +wifi B-nam_oth_tech +, O +nie O +pomaga O +z O +przyÅ‚Ä…czonym O +kablem O +sieciowym O +( O +żeby O +aktualizacje O +mogÅ‚y O +siÄ™ O +zaktualizować O +) O +, O +nie O +pomaga O +z O +odÅ‚Ä…czonym O +w O +BIOSie B-nam_pro_software +ethernetem B-nam_oth_tech +( O +tak O +niby O +sugeruje O +strona O +producenta O +pÅ‚yty O +głównej O +) O +. O + +# sent_id = 350 +Przychodzi O +czas O +rozwiÄ…zaÅ„ O +desperackich O +, O +mija O +już O +okoÅ‚o O +8 O +godzin O +od O +rozpoczÄ™cia O +zabawy O +, O +" O +Lód B-nam_pro_title +" O +siÄ™ O +już O +czyta O +siÄ™ O +, O +na O +jakimÅ› O +forum O +przeczytaÅ‚o O +siÄ™ O +, O +że O +należy O +zamienić O +miejscami O +koÅ›ci O +pamiÄ™ci O +RAM B-nam_oth_tech +. O + +# sent_id = 351 +PiÅ‚ O +em O +wczoraj O +herbatÄ™ O +z O +fusami O +i O +proszÄ™ O +- O +fusy O +uÅ‚ożyÅ‚y O +siÄ™ O +znaczÄ…co O +, O +pozwalajÄ…c O +na O +przeprowadzenie O +dokÅ‚adnej O +analizy O +globalnej O +sytuacji O +gospodarczej O +. O + +# sent_id = 352 +Zainteresowanych O +metodami O +badawczymi O +odsyÅ‚am O +do O +źródeÅ‚ O +. O + +# sent_id = 353 +Niestety O +, O +nie O +udaÅ‚o O +mi O +siÄ™ O +ustalić O +kolejnoÅ›ci O +chronologicznej O +tych O +wszystkich O +tragicznych O +zdarzeÅ„ O +, O +do O +tego O +trzeba O +byÅ‚o O +by O +zrobić O +mocnÄ… O +zalewanÄ… O +kawÄ™ O +. O + +# sent_id = 354 +Jeszcze O +nie O +jestem O +gotów O +na O +takie O +poÅ›wiÄ™cenie O +dla O +ludzkoÅ›ci O +. O + +# sent_id = 355 +Przedstawiam O +wiÄ™c O +zdarzenia O +w O +kolejnoÅ›ci O +przypadkowej O +. O + +# sent_id = 356 +Wielki O +pożar O +zniszczy O +chiÅ„ski B-nam_adj_country +bank O +centralny O +, O +gdzie O +spÅ‚onÄ… O +żywcem O +miliardy O +dolarów B-nam_oth_currency +w O +dolarach B-nam_oth_currency +i O +obligacjach O +. O + +# sent_id = 357 +PociÄ…gnie O +to O +za O +sobÄ… O +wielki O +krach O +na O +tamtejszej O +gieÅ‚dzie O +i O +odwoÅ‚anie O +Expo B-nam_eve_human +w O +Szanghaju B-nam_loc_gpe_city +( O +WrocÅ‚aw B-nam_loc_gpe_city +bÄ™dzie O +staraÅ‚ O +siÄ™ O +naprÄ™dce O +zorganizować O +wystawÄ™ O +w O +Hali B-nam_fac_goe +Stulecia I-nam_fac_goe +, O +ale O +BIE B-nam_org_organization +zdecyduje O +, O +że O +lepszym O +miejscem O +jest O +boisko O +do O +krykieta O +w O +Saint B-nam_loc_gpe_city +John's I-nam_loc_gpe_city +) O +. O + +# sent_id = 358 +PrawdopodobnÄ… O +przyczynÄ… O +pożaru O +bÄ™dzie O +chęć O +zniszczenia O +dowodów O +na O +doping O +chiÅ„skich B-nam_adj_country +sportowców O +podczas O +igrzysk O +w O +Pekinie B-nam_loc_gpe_city +. O + +# sent_id = 359 +Krach O +na O +chiÅ„skiej B-nam_adj_country +gieÅ‚dzie O +pociÄ…gnie O +za O +sobÄ… O +krach O +w O +USA B-nam_loc_gpe_country +, O +wyginÄ… O +najpierw O +najsÅ‚absze O +firmy O +internetowe B-nam_adj +i O +telekomunikacyjne O +, O +róbcie O +kopie O +zapasowe O +danych O +z O +Facebooka B-nam_pro_media_web +! O + +# sent_id = 360 +RosnÄ…ce O +ceny O +energii O +( O +ropa O +po O +206 O +, O +50 O +$ B-nam_oth_currency +za O +baryÅ‚kÄ™ O +, O +byÅ‚o O +to O +wyraźnie O +widoczne O +w O +uÅ‚ożeniu O +kawaÅ‚ka O +Å‚odyżki O +na O +połówce O +liÅ›cia O +) O +doprowadzÄ… O +do O +szybkiego O +wzrostu O +cen O +przechowywania O +danych O +, O +co O +uderzy O +bezpoÅ›rednio O +w O +Google B-nam_org_company +, O +Yahoo B-nam_org_company +i O +inne O +duże O +firmy O +, O +których O +nie O +dobiÅ‚ O +jeszcze O +krach O +na O +gieÅ‚dzie O +. O + +# sent_id = 361 +Fundacja O +paÅ„stwa O +Gatesów B-nam_liv_person +przeznaczy O +ogromne O +Å›rodki O +na O +ratowanie O +internetu B-nam_oth_tech +, O +ale O +niewiele O +to O +pomoże O +- O +konieczne O +bÄ™dzie O +wprowadzenie O +opÅ‚at O +. O + +# sent_id = 362 +Koniec O +z O +trzymaniem O +caÅ‚ej O +poczty O +na O +serwerach O +, O +koniec O +z O +WikipediÄ… B-nam_pro_media_web +, O +iTunes B-nam_pro_software +, O +Second B-nam_pro_media_web +Life I-nam_pro_media_web +, O +Last.fm B-nam_pro_media_web +i O +inne O +pożeracze O +przepustowoÅ›ci O +tylko O +dla O +prawdziwych O +bogaczy O +, O +kampanie O +reklamowe O +we O +flashu O +również O +nie O +dla O +każdego O +. O + +# sent_id = 363 +Internet B-nam_oth_tech +schudnie O +, O +w O +wiÄ™kszoÅ›ci O +wróci O +do O +formatu O +tekstowego O +z O +poczÄ…tku O +lat O +dziewięćdziesiÄ…tych O +, O +a O +za O +wszystkie O +wodotryski O +trzeba O +bÄ™dzie O +sÅ‚ono O +pÅ‚acić O +. O + +# sent_id = 364 +Handel O +internetowy O +wróci O +do O +źródeÅ‚ O +- O +na O +łóżka O +polowe O +i O +do O +szczÄ™k O +, O +najwiÄ™cej O +bÄ™dzie O +można O +zaoszczÄ™dzić O +na O +elektrycznoÅ›ci O +. O + +# sent_id = 365 +Coraz O +wiÄ™kszy O +koszt O +alternatywny O +czasu O +wolnego O +i O +coraz O +droższe O +przechowywanie O +danych O +, O +przepustowość O +i O +nawet O +wÅ‚Ä…czanie O +wÅ‚asnego O +komputera O +na O +caÅ‚y O +dzieÅ„ O +spowodujÄ… O +, O +że O +dramatycznie O +zmaleje O +liczba O +ochotników O +w O +projektach O +wolnego O +oprogramowania O +i O +wolnej O +kultury O +. O + +# sent_id = 366 +Creative B-nam_org_organization +Commons I-nam_org_organization +zejdzie O +do O +podziemia O +, O +gdzie O +dziÄ™ki O +energii O +pozyskiwanej O +z O +kiszonej O +kapusty O +bÄ™dzie O +wciąż O +promować O +wolne O +( O +i O +niewolne O +) O +licencje O +, O +w O +tym O +nowÄ… O +licencjÄ™ O +Attribution B-nam_oth_license +- I-nam_oth_license +ShareAlike I-nam_oth_license +- I-nam_oth_license +LowWatt I-nam_oth_license +. O + +# sent_id = 385 +Punktem O +wyjÅ›cia O +dla O +Benklera B-nam_liv_person +jest O +analiza O +ekonomiczna O +, O +ale O +to O +z O +wyksztaÅ‚cenia O +prawnik O +, O +wiÄ™c O +nic O +dziwnego O +, O +że O +szuka O +nastÄ™pnie O +drugiego O +dna O +. O + +# sent_id = 386 +Potem O +trzeciego O +, O +czwartego O +. O +. O +. O + +# sent_id = 387 +" O +Bogactwo B-nam_pro_title +sieci I-nam_pro_title +" O +, O +instrukcja O +do O +rozdziałów O +1 O +- O +6 O +. O + +# sent_id = 388 +RozdziaÅ‚ O +1 O +- O +Wprowadzenie O +CaÅ‚a O +książka O +w O +piguÅ‚ce O +. O + +# sent_id = 389 +Przed O +zażyciem O +skonsultuj O +siÄ™ O +z O +lekarzem O +lub O +farmaceutÄ… O +. O + +# sent_id = 390 +JeÅ›li O +przeÅ‚kniesz O +bez O +trudu O +- O +to O +książka O +dla O +Ciebie O +, O +nawet O +najbardziej O +zawiÅ‚e O +rozważania O +przeczytasz O +od O +razu O +. O + +# sent_id = 391 +JeÅ›li O +nie O +- O +wciąż O +możesz O +dać O +reszcie O +rozdziałów O +szansÄ™ O +, O +a O +wprowadzenie O +przeczytać O +sobie O +na O +koÅ„cu O +. O + +# sent_id = 392 +Chociaż O +to O +wÅ‚aÅ›nie O +tu O +Benkler B-nam_liv_person +wyjaÅ›nia O +, O +że O +nie O +jest O +anarchokomunistÄ… O +. O + +# sent_id = 393 +RozdziaÅ‚ O +2 O +- O +Wybrane O +cechy O +ekonomiczne O +produkcji O +informacji O +oraz O +innowacyjnoÅ›ci O +. O + +# sent_id = 394 +WÅ‚aÅ›ciwie O +dlaczego O +informacja O +nie O +powinna O +być O +przedmiotem O +wÅ‚asnoÅ›ci O +tak O +samo O +jak O +samochód O +? O + +# sent_id = 395 +Czy O +prawo O +powinno O +uwzglÄ™dniać O +różne O +strategie O +produkcji O +i O +wymiany O +informacji O +? O + +# sent_id = 396 +Dlaczego O +oparcie O +siÄ™ O +na O +prawach O +wyÅ‚Ä…cznych O +faworyzuje O +jeden O +model O +, O +który O +Benkler B-nam_liv_person +niewinnie O +nazywa O +MyszkÄ… B-nam_liv_character +Miki I-nam_liv_character +? O + +# sent_id = 397 +RozdziaÅ‚ O +3 O +- O +Produkcja O +partnerska O +i O +dzielenie O +siÄ™ O +Istota O +rozważaÅ„ O +Benklera B-nam_liv_person +. O + +# sent_id = 398 +Może O +o O +kilka O +zdaÅ„ O +za O +dużo O +, O +ale O +jeÅ›li O +pominie O +siÄ™ O +rozdziaÅ‚ O +o O +produkcji O +partnerskiej O +, O +to O +można O +już O +zacząć O +siÄ™ O +zastanawiać O +, O +czy O +jak O +na O +przycisk O +do O +papieru O +ksiÄ…zka O +nie O +jest O +jednak O +zbyt O +nieporÄ™czna O +. O + +# sent_id = 399 +RozdziaÅ‚ O +4 O +- O +Ekonomika O +produkcji O +spoÅ‚ecznej O +Produkcja O +partnerska O +to O +piÄ™kna O +teoria O +, O +ale O +czy O +to O +przypadkiem O +nie O +Adam B-nam_liv_person +Smith I-nam_liv_person +przewraca O +siÄ™ O +w O +grobie O +? O + +# sent_id = 400 +Chyba O +jednak O +nie O +, O +Benkler B-nam_liv_person +konsekwentnie O +dowodzi O +, O +że O +produkcja O +spoÅ‚eczna O +, O +to O +nie O +to O +samo O +co O +gospodarka O +uspoÅ‚eczniona O +. O + +# sent_id = 401 +To O +wiÄ™cej O +liberalizmu O +w O +liberalizmie O +niż O +na O +Wall B-nam_fac_road +Street I-nam_fac_road +. O + +# sent_id = 402 +RozdziaÅ‚ O +5 O +- O +Wolność O +osobista O +- O +autonomia O +, O +informacja O +i O +prawo O +. O + +# sent_id = 403 +Masz O +problemy O +z O +zasypianiem O +? O + +# sent_id = 404 +Dość O +rozwlekÅ‚e O +tÅ‚umaczenia O +prostej O +prawdy O +, O +że O +duży O +wybór O +jest O +dobry O +. O + +# sent_id = 405 +Owszem O +, O +ciekawe O +, O +ale O +nastÄ™pne O +rozdziaÅ‚y O +sÄ… O +ciekawsze O +. O + +# sent_id = 406 +RozdziaÅ‚ O +6 O +- O +Wolność O +polityczna O +- O +część O +1 O +; O +Problem O +ze O +Å›rodkami O +masowego O +przekazu O +. O + +# sent_id = 407 +Rozważania O +o O +tym O +, O +jak O +dziaÅ‚ajÄ… O +media O +i O +dlaczego O +, O +a O +w O +nagrodÄ™ O +bardzo O +ciekawa O +historia O +radia O +i O +tabelka O +, O +która O +wyjaÅ›nia O +jak O +to O +siÄ™ O +dzieje O +, O +że O +wiÄ™kszość O +stacji O +telewizyjnych O +pokazuje O +seriale O +. O + +# sent_id = 415 +Microsoft B-nam_org_company +po O +raz O +kolejny O +udaje O +, O +że O +ugiÄ…Å‚ O +siÄ™ O +pod O +naciskiem O +Komisji B-nam_org_institution +Europejskiej I-nam_org_institution +i O +po O +raz O +kolejny O +ogÅ‚asza O +zasady O +interoperacyjnoÅ›ci O +obiecujÄ…c O +wolność O +, O +równość O +i O +braterstwo O +. O + +# sent_id = 416 +W O +" O +zasadach O +interoperacyjnoÅ›ci O +" O +sÅ‚owo O +" O +otwarte O +" O +powtórzone O +zostaÅ‚o O +czterdzieÅ›ci O +razy O +. O + +# sent_id = 417 +Jednak O +" O +otwartość O +" O +wedÅ‚ug O +Microsoftu B-nam_org_company +oznacza O +po O +pierwsze O +, O +że O +za O +korzystanie O +z O +otwartych O +protokołów O +trzeba O +pÅ‚acić O +. O + +# sent_id = 418 +Gest O +polega O +na O +tym O +, O +że O +opÅ‚ata O +nie O +bÄ™dzie O +nikogo O +dyskryminować O +, O +nazwijmy O +to O +równoÅ›ciÄ… O +. O + +# sent_id = 419 +Nie O +ma O +co O +siÄ™ O +czepiać O +, O +że O +patenty O +na O +oprogramowanie O +jako O +takie O +w O +Europie B-nam_loc_land_continent +nie O +powinny O +obowiÄ…zywać O +- O +Amerykanie B-nam_org_nation +pÅ‚acÄ… O +, O +wiÄ™c O +Europejczycy B-nam_org_nation +też O +powinni O +. O + +# sent_id = 420 +Niech O +to O +bÄ™dzie O +braterstwo O +. O + +# sent_id = 421 +Po O +drugie O +" O +otwartość O +" O +( O +protokołów O +) O +oznacza O +, O +że O +programiÅ›ci O +tworzÄ…cy O +wolne O +oprogramowanie O +mogÄ… O +owszem O +z O +niej O +korzystać O +, O +ale O +tylko O +do O +celów O +niekomercyjnych O +, O +na O +podstawie O +jednostronnego O +oÅ›wiadczenia O +MS B-nam_org_company +, O +że O +nie O +zostanÄ… O +pozwani O +. O + +# sent_id = 422 +No O +i O +mamy O +wolność O +. O + +# sent_id = 423 +A O +gdyby O +tak O +pójść O +w O +drugÄ… O +stronÄ™ O +i O +zmusić O +MS B-nam_org_company +do O +zamkniÄ™cia O +wszystkich O +API O +i O +protokołów O +, O +dać O +im O +możliwość O +zrobienia O +kompletnego O +systemu O +z O +programami O +do O +wszystkiego O +- O +z O +Officem B-nam_pro_software +od O +razu O +wbudowanym O +w O +Windows B-nam_pro_software +, O +nieusuwalnym O +Internet B-nam_pro_software +Explorerem I-nam_pro_software +, O +odtwarzaczem O +multimedialnym O +grajÄ…cym O +wyÅ‚Ä…cznie O +microsoftowy O +DRM O +. O +. O +. O + +# sent_id = 424 +Chcesz O +grać O +? O + +# sent_id = 425 +Kup O +konsolÄ™ O +, O +a O +najlepiej O +trzy O +. O + +# sent_id = 426 +Chcesz O +Photoshopa B-nam_pro_software +? O + +# sent_id = 427 +Kup O +maka O +. O + +# sent_id = 428 +Chcesz O +system O +do O +sprawdzania O +poczty O +i O +profilu O +na O +naszej B-nam_pro_media_web +klasie I-nam_pro_media_web +? O + +# sent_id = 429 +Jest O +Ubuntu B-nam_pro_software +. O + +# sent_id = 430 +Może O +zamiast O +otwierać O +oprogramowanie O +wszystkich O +urzÄ…dzeÅ„ O +, O +żeby O +byÅ‚y O +podobne O +do O +pecetów O +( O +droga O +, O +którÄ… O +wybraÅ‚a O +FSF B-nam_org_organization +wydajÄ…c O +GPLv3 B-nam_oth_license +) O +, O +należy O +rynek O +pecetów O +upodobnić O +do O +rynku O +komórek O +? O + +# sent_id = 431 +W O +koÅ„cu O +już O +teraz O +kupujÄ…c O +dostÄ™p O +do O +internetu B-nam_oth_tech +można O +dostać O +na O +kredyt O +laptopa O +. O + +# sent_id = 432 +Czy O +to O +aż O +tak O +wielka O +różnica O +, O +jaki O +system O +jest O +na O +nim O +preinstalowany O +? O + +# sent_id = 433 +W O +dzieÅ„ B-nam_eve_human_holiday +matki I-nam_eve_human_holiday +program B-nam_pro_media_radio +trzeci I-nam_pro_media_radio +polskiego B-nam_org_company +radia I-nam_org_company +zapowiedziaÅ‚ O +- O +przygotowany O +specjalnie O +na O +tÄ™ O +okazjÄ™ O +( O +sic O +! O +) O +- O +reportaż O +o O +zagrożeniach O +czyhajÄ…cych O +na O +mÅ‚odzież O +w O +dzisiejszych O +czasach O +. O + +# sent_id = 434 +Redaktor O +jednym O +tchem O +wymieniÅ‚ O +" O +alkohol O +, O +narkotyki O +i O +internet B-nam_oth_tech +" O +. O + +# sent_id = 435 +Reportaż O +jakoÅ› O +mi O +umknÄ…Å‚ O +, O +zostaÅ‚a O +w O +gÅ‚owie O +zapowiedź O +, O +która O +wraca O +przy O +okazji O +rzÄ…dowych O +planów O +rozdawania O +laptopów O +gimnazjalistom O +. O + +# sent_id = 436 +RzÄ…dowe O +plany O +też O +nie O +sÄ… O +szczególnie O +nowe O +, O +przypomniaÅ‚y O +mi O +siÄ™ O +w O +zwiÄ…zku O +z O +RybczyÅ„skim B-nam_liv_person +( O +wkrótce O +) O +, O +a O +wczoraj O +nawet O +trafiÅ‚ O +siÄ™ O +w O +Wyborczej B-nam_pro_media_periodic +artykuÅ‚ O +na O +ten O +temat O +, O +w O +którym O +dziennikarz O +zaczyna O +od O +odkrywczego O +pytania O +, O +dlaczego O +Intelowi B-nam_org_company +zależy O +na O +sprzedaży O +wiÄ™kszej O +liczby O +komputerów O +, O +a O +pani O +z O +Intela B-nam_org_company +opowiada O +ciekawe O +anegdoty O +o O +podobnych O +programach O +w O +innych O +krajach O +( O +mimo O +pierwszego O +pytania O +warto O +przeczytać O +) O +. O + +# sent_id = 437 +Na O +dodatek O +w O +ostatniej O +swojej O +wypowiedzi O +wypowiada O +kluczowÄ… O +kwestiÄ™ O +" O +a O +co O +potem O +? O +" O + +# sent_id = 438 +PomijajÄ…c O +chwilowo O +kwestiÄ™ O +tego O +, O +co O +znajdzie O +siÄ™ O +w O +rozdawanych O +komputerach O +, O +zastanawiam O +siÄ™ O +, O +w O +jaki O +sposób O +te O +laptopy O +bÄ™dÄ… O +Å‚Ä…czyć O +siÄ™ O +z O +internetem B-nam_oth_tech +. O + +# sent_id = 439 +Rozumiem O +, O +że O +zasypywanie O +cyfrowych O +podziałów O +wymaga O +dofinansowania O +Intela B-nam_org_company +/ O +AMD B-nam_org_company +i O +Microsoftu B-nam_org_company +( O +który O +bez O +mrugniÄ™cia O +może O +wyposażyć O +każdÄ… O +ilość O +komputerów O +dla O +uczniów O +w O +oprogramowanie O +za O +symbolicznÄ… O +opÅ‚atÄ… O +) O +, O +ale O +czyżby O +również O +ostatecznie O +paÅ„stwo O +miaÅ‚o O +by O +sfinansować O +infrastrukturÄ™ O +3G B-nam_oth_tech +dla O +firm O +telekomunikacyjnych O +? O + +# sent_id = 440 +Czy O +może O +poza O +miastami O +gimnazjaliÅ›ci O +mieli O +by O +zadowolić O +siÄ™ O +dostÄ™pem O +przez O +GPRS B-nam_oth_tech +/ O +EDGE B-nam_oth_tech +? O + +# sent_id = 441 +To O +może O +równie O +dobrze O +od O +razu O +rozdać O +im O +Playstation B-nam_pro_brand +? O + +# sent_id = 442 +W O +ramach O +programu O +pojawiajÄ… O +siÄ™ O +hasÅ‚a O +o O +" O +doprowadzaniu O +szerokopasmowego O +internetu B-nam_oth_tech +" O +, O +czy O +to O +zapowiedź O +Å›wiatÅ‚owodów O +traktowanych O +na O +równi O +z O +kanalizacjÄ… O +? O + +# sent_id = 443 +Nawet O +jeżeli O +zapomnieć O +o O +polskich B-nam_adj_country +realiach O +i O +uznać O +, O +że O +do O +każdej O +gminnej O +szkoÅ‚y O +dociÄ…gniÄ™ty O +zostanie O +Å›wiatÅ‚owód O +, O +co O +z O +ostatniÄ… O +milÄ… O +? O + +# sent_id = 444 +Czy O +szkoÅ‚a O +ma O +stać O +siÄ™ O +podstawÄ… O +dla O +gminnej O +sieci O +WiFi B-nam_oth_tech +? O + +# sent_id = 445 +Czy O +za O +darmo O +bÄ™dÄ… O +mogli O +z O +niej O +korzystać O +jedynie O +gimnazjaliÅ›ci O +w O +swoich O +laptopach O +? O + +# sent_id = 446 +Można O +siÄ™gnąć O +do O +utopijnych O +wizji O +wszechobecnych O +sieci O +bezprzewodowych O +Benklera B-nam_liv_person +, O +który O +liczy O +na O +to O +, O +że O +domyÅ›lnym O +ustawieniem O +ruterów O +pozostanÄ… O +sieci O +otwarte O +, O +bez O +żadnych O +wepów O +i O +wapów O +, O +dziÄ™ki O +czemu O +każdy O +zawsze O +i O +wszÄ™dzie O +bÄ™dzie O +mógÅ‚ O +skorzystać O +z O +zasiÄ™gu O +. O + +# sent_id = 447 +Benkler B-nam_liv_person +starannie O +omija O +problem O +gapowicza O +- O +w O +przypadku O +rzÄ…dowego O +programu O +ominiÄ™cie O +problemu O +można O +wymusić O +rozdajÄ…c O +oprócz O +laptopów O +bezprzewodowe O +rutery O +, O +w O +których O +żadnych O +zabezpieczeÅ„ O +nie O +można O +wÅ‚Ä…czyć O +( O +o O +matko O +! O +) O +. O + +# sent_id = 448 +A O +może O +pójść O +jeszcze O +dalej O +i O +zaplanować O +rzeczywiÅ›cie O +nowatorskie O +oprogramowanie O +i O +odpowiednie O +urzÄ…dzenia O +w O +laptopach O +, O +które O +odtwarzaÅ‚y O +by O +windowsowego O +buga O +, O +sÅ‚ynne O +" O +Free B-nam_eve_human +Public I-nam_eve_human +WiFi I-nam_eve_human +" O +. O + +# sent_id = 449 +SzkoÅ‚a O +byÅ‚a O +by O +źródÅ‚em O +pierwszego O +sygnaÅ‚u O +, O +który O +rozprzestrzeniaÅ‚ O +by O +siÄ™ O +dziÄ™ki O +laptopom O +dziaÅ‚ajÄ…cym O +w O +trybie O +ad O +hoc O +( O +czy O +też O +" O +komputer O +- O +komputer O +" O +) O +. O + +# sent_id = 450 +W O +koÅ„cu O +nastÄ…piÅ‚o O +oficjalne O +kulinarne O +rozpoczÄ™cie O +sezonu O +wiosennego O +. O + +# sent_id = 451 +Flamandzkie O +radioaktywne O +pomidory O +i O +nadmuchiwane O +rzodkiewki O +nas O +nie O +zmyliÅ‚y O +, O +czekali O +Å›my O +na O +poczÄ…tek O +maja O +, O +który O +kojarzy O +nam O +siÄ™ O +z O +jednym O +. O + +# sent_id = 452 +KieÅ‚basÄ… O +z O +grilla O +. O + +# sent_id = 453 +KarkówkÄ… O +. O + +# sent_id = 454 +DÅ‚ugim O +weekendem O +. O + +# sent_id = 455 +Kilka O +dni O +temu O +po O +raz O +pierwszy O +w O +naszym O +sklepie O +mignęły O +nam O +jakieÅ› O +wysuszone O +, O +w O +zaporowej O +cenie O +, O +nie O +dali O +Å›my O +siÄ™ O +nabrać O +. O + +# sent_id = 456 +Czekali O +Å›my O +. O + +# sent_id = 457 +Wreszcie O +pojawiÅ‚y O +siÄ™ O +warzywniaku O +. O + +# sent_id = 458 +BiaÅ‚e O +, O +w O +dwóch O +rozmiarach O +. O + +# sent_id = 459 +Grube O +i O +cienkie O +. O + +# sent_id = 460 +Wiosna O +w O +kuchni O +rozpoczÄ™ta O +. O + +# sent_id = 461 +Na O +debiut O +wybrali O +Å›my O +od O +razu O +dwie O +paczki O +( O +w O +koÅ„cu O +sezon O +jednak O +krótki O +) O +. O + +# sent_id = 462 +Å»eby O +podkreÅ›lić O +rangÄ™ O +wydarzenia O +zrobili O +Å›my O +suflety O +z O +kozim O +serem O +. O + +# sent_id = 463 +Sufletów O +nie O +robili O +Å›my O +nigdy O +wczeÅ›niej O +, O +ale O +skoro O +dorobili O +Å›my O +siÄ™ O +foremek O +, O +należaÅ‚o O +siÄ™ O +kiedyÅ› O +odważyć O +. O + +# sent_id = 464 +Nie O +byÅ‚ O +to O +na O +pewno O +ostatni O +raz O +. O + +# sent_id = 465 +Zestaw O +okazaÅ‚ O +siÄ™ O +w O +ogóle O +udany O +, O +może O +byÅ‚o O +by O +ciekawiej O +z O +zielonymi O +zamiast O +biaÅ‚ych O +. O + +# sent_id = 466 +Szparagami O +oczywiÅ›cie O +. O + +# sent_id = 467 +IdÄ™ O +do O +sklepu O +. O + +# sent_id = 468 +SÄ…dy O +wciąż O +nie O +rozumiejÄ… O +koncepcji O +linków O +, O +poza O +tym O +orzecznictwo O +w O +Polsce B-nam_loc_gpe_country +ma O +zastosowanie O +w O +konkretnych O +sprawach O +. O + +# sent_id = 469 +Gdyby O +polegać O +na O +wyrokach O +z O +caÅ‚ego O +Å›wiata O +w O +sprawie O +linków O +, O +internetu B-nam_oth_tech +już O +dawno O +by O +nie O +byÅ‚o O +. O + +# sent_id = 470 +Pewnym O +uÅ‚atwieniem O +w O +dyskusji O +byÅ‚o O +by O +odróżnienie O +linków O +do O +youtube B-nam_pro_media_web +( O +które O +na O +przykÅ‚ad O +ja O +mam O +u O +siebie O +, O +głównie O +dlatego O +, O +że O +flash O +pod O +linuksem B-nam_pro_software +to O +wciąż O +nie O +jest O +miÅ‚e O +doÅ›wiadczenie O +mimo O +wersji O +stabilnej O +i O +oficjalnej O +, O +na O +moim O +starym O +notebooku O +można O +grillować O +kieÅ‚baski O +już O +w O +drugiej O +minucie O +filmu O +) O +od O +" O +embedów O +" O +, O +które O +jednak O +sÄ… O +bliżej O +rozpowszechniania O +. O + +# sent_id = 471 +MiÅ‚oÅ›nikom O +takich O +terminów O +jak O +" O +pola O +eksploatacji O +" O +i O +" O +gÅ‚Ä™bokie O +linki O +" O +polecam O +archiwum O +listy O +CC B-nam_org_organization +, O +gdzie O +odbyÅ‚a O +siÄ™ O +miÅ‚a O +dyskusja O +na O +ten O +temat O +. O + +# sent_id = 472 +Na O +youtube B-nam_pro_media_web +poszkodowanym O +przysÅ‚ugujÄ… O +wedÅ‚ug O +prawa O +amerykaÅ„skiego O +caÅ‚kiem O +sprawne O +procedury O +żądania O +usuniÄ™cia O +treÅ›ci O +, O +może O +nie O +tak O +dobre O +jak O +wizyta O +policji O +o O +szóstej O +rano O +, O +ale O +równie O +skuteczne O +- O +na O +hasÅ‚o O +DMCA B-nam_pro_title_document +niejeden O +twardziel O +sformatowaÅ‚ O +dysk O +twardy O +. O + +# sent_id = 473 +Z O +ciekawych O +naciÄ…ganych O +analogii O +jest O +jeszcze O +sprawa O +chÅ‚opaka O +, O +który O +linkowaÅ‚ O +do O +teledysku O +depeche B-nam_org_group_band +mode I-nam_org_group_band +. O + +# sent_id = 474 +PodaÅ‚ O +link O +, O +a O +potem O +poddaÅ‚ O +siÄ™ O +dobrowolnie O +karze O +, O +co O +automatycznie O +oznacza O +, O +że O +przyznaÅ‚ O +siÄ™ O +do O +winy O +, O +a O +winÄ™ O +tÄ™ O +zaklepaÅ‚ O +sÄ…d O +. O + +# sent_id = 475 +Czyli O +jest O +precedens O +, O +oczywiÅ›cie O +w O +kraju O +bez O +precedensów O +. O + +# sent_id = 476 +Z O +drugiej O +strony O +też O +mi O +oczywiÅ›cie O +przychodzi O +do O +gÅ‚owy O +art O +. O +29 O +ust O +. O +1 O +prawa B-nam_pro_title_document +autorskiego I-nam_pro_title_document +, O +w O +koÅ„cu O +" O +prawami O +gatunku O +twórczoÅ›ci O +blog O +" O +jest O +wÅ‚Ä…czanie O +filmów O +z O +youtube B-nam_pro_media_web +, O +szczególnie O +gdy O +siÄ™ O +o O +nich O +pisze O +. O + +# sent_id = 516 +BrnÄ…c O +dalej O +w O +brodatÄ… O +metaforÄ™ O +drogi O +, O +nowÄ… O +propozycjÄ™ O +UKE B-nam_org_institution +trudno O +nawet O +porównać O +z O +typowÄ… O +polskÄ… O +drogÄ… O +krajowÄ… O +, O +peÅ‚nÄ… O +kolein O +, O +źle O +wyprofilowanych O +zakrÄ™tów O +i O +nieoznakowanych O +skrzyżowaÅ„ O +. O + +# sent_id = 517 +To O +raczej O +droga O +polna O +, O +na O +niektórych O +odcinkach O +wyasfaltowana O +, O +ale O +też O +niechlujnie O +. O + +# sent_id = 518 +PomysÅ‚ O +UKE B-nam_org_institution +zakÅ‚ada O +, O +że O +darmowy O +bezprzewodowy O +internet B-nam_oth_tech +- O +do O +którego O +, O +podobnie O +jak O +w O +projekcie O +FCC B-nam_org_institution +w O +USA B-nam_loc_gpe_country +- O +zmuszÄ… O +operatora O +warunki O +koncesji O +na O +czÄ™stotliwość O +, O +bÄ™dzie O +ograniczony O +w O +każdy O +możliwy O +sposób O +: O +256kb O +na O +sekundÄ™ O +, O +500MB O +danych O +na O +miesiÄ…c O +i O +jedna O +sesja O +maksymalnie O +na O +30 O +minut O +. O + +# sent_id = 519 +Nawet O +na O +korzystanie O +z O +naszej B-nam_pro_media_web +klasy I-nam_pro_media_web +to O +za O +maÅ‚o O +. O + +# sent_id = 520 +To O +kolejny O +projekt O +na O +froncie O +walki O +z O +cyfrowym O +wykluczeniem O +, O +który O +nawet O +w O +statystykach O +nie O +bÄ™dzie O +widoczny O +. O + +# sent_id = 521 +DostÄ™p O +szerokopasmowy O +jako O +wskaźnik O +zaawansowania O +cywilizacyjnego O +przestaje O +bowiem O +wystarczać O +. O + +# sent_id = 522 +Unia B-nam_org_organization +Europejska I-nam_org_organization +- O +do O +niedawna O +zwracajÄ…ca O +szczególnÄ… O +uwagÄ™ O +na O +zasiÄ™g O +szerokopasmowego O +internetu B-nam_oth_tech +, O +który O +pozwalaÅ‚ O +twierdzić O +, O +że O +w O +rozwoju O +spoÅ‚eczeÅ„stwa O +informacyjnego O +wyprzedzamy O +( O +przynajmniej O +w O +rozmiarze O +EU B-nam_org_organization_sub +- I-nam_org_organization_sub +15 I-nam_org_organization_sub +) O +USA B-nam_loc_gpe_country +- O +poszerza O +perspektywÄ™ O +i O +przyjmuje O +wskaźnik O +zÅ‚ożony O +- O +Broadband B-nam_oth_tech +Performance I-nam_oth_tech +Index I-nam_oth_tech +( O +BPI B-nam_oth_tech +) O +. O + +# sent_id = 523 +Sam O +ogólny O +zasiÄ™g O +dostÄ™pu O +szerokopasmowego O +nie O +jest O +nawet O +skÅ‚adnikiem O +BPI B-nam_oth_tech +. O + +# sent_id = 524 +Liczy O +siÄ™ O +dostÄ™p O +na O +obszarach O +wiejskich O +i O +to O +tylko O +przez O +kabel O +( O +czyli O +caÅ‚e O +darmowe O +wifi B-nam_oth_tech +na O +razie O +siÄ™ O +nie O +zaÅ‚apie O +) O +, O +a O +ze O +starego O +wskaźnika O +zostaÅ‚y O +elementy O +jakoÅ›ciowe O +- O +prÄ™dkość O +, O +cena O +i O +korzystanie O +z O +usÅ‚ug O +. O + +# sent_id = 525 +Co O +ciekawe O +- O +bierze O +siÄ™ O +pod O +uwagÄ™ O +ceny O +wyÅ‚Ä…cznie O +w O +zakresach O +1 O +- O +2 O +Mbit O +oraz O +2 O +- O +8 O +Mbit O +( O +w O +EU B-nam_org_organization_sub +- I-nam_org_organization_sub +15 I-nam_org_organization_sub +niski O +standard O +to O +1 O +, O +5 O +Mbit O +) O +, O +czyli O +presja O +na O +operatorów O +, O +żeby O +obniżyli O +ceny O +w O +przedziale O +poniżej O +1 O +Mbit O +, O +którÄ… O +swoim O +pomysÅ‚em O +ma O +zamiar O +wywierać O +UKE B-nam_org_institution +, O +niewiele O +pomoże O +. O + +# sent_id = 526 +A O +tymczasem O +w O +kategorii O +" O +cena O +" O +Polska B-nam_loc_gpe_country +wypada O +w O +zasadzie O +najgorzej O +w O +caÅ‚ej O +UE B-nam_org_organization +( O +porównywalnie O +źle O +jedynie O +SÅ‚owacja B-nam_loc_gpe_country +) O +. O + +# sent_id = 527 +Internet B-nam_oth_tech +wedÅ‚ug O +UKE B-nam_org_institution +nie O +pomoże O +też O +w O +rozwoju O +zaawansowanych O +usÅ‚ug O +. O + +# sent_id = 528 +500MB O +pójdzie O +na O +samo O +pobieranie O +aktualizacji O +systemu O +operacyjnego O +, O +baz O +danych O +wirusów O +( O +w O +każdym O +razie O +na O +niektórych O +systemach O +) O +, O +sprawdzanie O +prognozy O +pogody O +na O +portalu O +i O +czytanie O +poczty O +przez O +www B-nam_oth_tech +. O + +# sent_id = 529 +Może O +i O +nie O +bÄ™dzie O +darmowych O +torrentów O +, O +ale O +nie O +bÄ™dzie O +też O +Å›ciÄ…gania O +wolnego O +oprogramowania O +, O +tekstów O +naukowych O +udostÄ™pnianych O +w O +sposób O +otwarty O +, O +ani O +kupowania O +jakichkolwiek O +cyfrowych O +treÅ›ci O +. O + +# sent_id = 530 +UKE B-nam_org_institution +ma O +zamiar O +zaoferować O +wiÄ™c O +wykluczonym O +internet B-nam_oth_tech +( O +inne O +ministerstwo O +da O +im O +laptopy O +? O +) O +, O +który O +pozostawi O +ich O +w O +tym O +samym O +miejscu O +, O +bo O +granica O +cyfrowego O +wykluczenia O +siÄ™ O +przesunie O +. O + +# sent_id = 531 +Tymczasem O +wciąż O +nowoczesnych O +usÅ‚ug O +w O +polskim O +internecie B-nam_oth_tech +jest O +maÅ‚o O +- O +e O +- O +administracja O +prawie O +nie O +istnieje O +( O +tu O +wskaźniki O +siÄ™ O +pewnie O +pogorszÄ… O +, O +gdy O +wygasnÄ… O +certyfikaty O +rozdawane O +przez O +ZUS B-nam_org_institution +, O +a O +mikroprzedsiÄ™biorcy O +zamiast O +pÅ‚acić O +za O +bezpieczny O +podpis O +elektroniczny O +stanÄ… O +w O +kolejce O +na O +poczcie O +) O +, O +uniwersytety O +bardzo O +sÅ‚abo O +radzÄ… O +sobie O +z O +obecnoÅ›ciÄ… O +w O +sieci O +, O +a O +telewizja O +plÄ…cze O +siÄ™ O +w O +DRMach O +. O + +# sent_id = 545 +W O +odstÄ™pie O +kilku O +dni O +pojawiÅ‚y O +siÄ™ O +dwie O +zupeÅ‚nie O +sprzeczne O +diagnozy O +wpÅ‚ywu O +kryzysu O +na O +wolne O +oprogramowanie O +i O +wolnÄ… O +kulturÄ™ O +. O + +# sent_id = 546 +W O +narożniku O +czerwonym O +, O +po O +stronie O +popytu O +, O +szef O +Red B-nam_org_company +Hata I-nam_org_company +Jim B-nam_liv_person +Whitehurst I-nam_liv_person +widzi O +te O +wszystkie O +zamówienia O +na O +oprogramowanie O +bez O +opÅ‚at O +licencyjnych O +. O + +# sent_id = 547 +W O +narożniku O +czarnym O +, O +po O +stronie O +podaży O +, O +dyżurny O +krytyk O +Web B-nam_oth_tech +2 I-nam_oth_tech +. I-nam_oth_tech +0 I-nam_oth_tech +, O +Andrew B-nam_liv_person +Keen I-nam_liv_person +, O +z O +wizjÄ… O +bezrobotnych O +pożerajÄ…cych O +ostatnie O +myszki O +. O + +# sent_id = 548 +W O +skrócie O +: O +popyt O +na O +" O +darmowe O +" O +towary O +siÄ™ O +zwiÄ™kszy O +, O +bo O +trzeba O +ciąć O +koszty O +( O +tak O +mówi O +Whitehurst B-nam_liv_person +) O +, O +a O +podaż O +pracowników O +chÄ™tnych O +do O +poÅ›wiÄ™cania O +swojego O +czasu O +siÄ™ O +zmniejszy O +, O +bo O +ludzie O +poznajÄ… O +wartość O +swojej O +pracy O +( O +tak O +mówi O +Keen B-nam_liv_person +) O +. O + +# sent_id = 549 +Czyli O +Red B-nam_org_company +Hat I-nam_org_company +bÄ™dzie O +miaÅ‚ O +wiÄ™cej O +zamówieÅ„ O +, O +ale O +nikt O +mu O +za O +darmo O +już O +nie O +pomoże O +. O + +# sent_id = 550 +Keen B-nam_liv_person +zaczyna O +po O +swojemu O +od O +bÅ‚Ä™dnego O +zaÅ‚ożenia O +- O +że O +darmowa O +praca O +w O +ramach O +wolnego O +oprogramowania O +, O +wolnej O +kultury O +i O +Web B-nam_oth_tech +2 I-nam_oth_tech +. I-nam_oth_tech +0 I-nam_oth_tech +pozbawiona O +jest O +wartoÅ›ci O +, O +bo O +nikt O +nie O +pÅ‚aci O +za O +niÄ… O +pieniÄ™dzmi O +, O +a O +ludzie O +pracujÄ… O +bez O +sensu O +za O +darmo O +, O +bo O +w O +czasach O +dobrobytu O +nisko O +ceniÄ… O +pracÄ™ O +i O +pieniÄ…dze O +( O +jest O +to O +jakieÅ› O +alternatywne O +wytÅ‚umaczenie O +dla O +recesji O +- O +ludziom O +przewraca O +siÄ™ O +w O +gÅ‚owach O +i O +kiepsko O +pracujÄ… O +) O +, O +wiÄ™c O +sÄ… O +skÅ‚onni O +pisać O +hasÅ‚a O +do O +Wikipedii B-nam_pro_media_web +. O + +# sent_id = 551 +I O +można O +byÅ‚o O +by O +tutaj O +wesoÅ‚o O +popastwić O +siÄ™ O +nad O +jednowymiarowym O +punktem O +widzenia O +Keena B-nam_liv_person +, O +zÅ‚oÅ›liwa O +notka O +na O +poniedziaÅ‚ek O +, O +w O +koÅ„cu O +tylko O +Bill B-nam_liv_person +Gates I-nam_liv_person +to O +lepszy O +temat O +do O +żartów O +. O + +# sent_id = 552 +Jednak O +im O +dÅ‚użej O +pisaÅ‚ O +em O +notkÄ™ O +, O +tym O +bardziej O +szÅ‚a O +ona O +do O +kosza O +. O + +# sent_id = 553 +I O +poszÅ‚a O +. O + +# sent_id = 554 +Nie O +to O +, O +żeby O +m O +miaÅ‚ O +teraz O +siÄ™ O +z O +diagnozÄ… O +Keena B-nam_liv_person +zgodzić O +, O +produkcja O +partnerska O +nie O +zniknie O +, O +ale O +- O +jeÅ›li O +rzeczywiÅ›cie O +dojdzie O +do O +gÅ‚Ä™bokiej O +recesji O +- O +coÅ› O +musi O +siÄ™ O +zmienić O +. O + +# sent_id = 555 +Produkcja O +partnerska O +opiera O +siÄ™ O +na O +kilku O +elementach O +, O +uproszczony O +podziaÅ‚ O +może O +być O +taki O +: O + +# sent_id = 556 +1 O +. O + +# sent_id = 557 +Nadmiar O +mocy O +obliczeniowych O +i O +pojemnoÅ›ci O +sieci O +w O +wielu O +krajach O +. O + +# sent_id = 558 +Komputer O +wÅ‚Ä…czony O +bez O +przerwy O +i O +bez O +przerwy O +przesyÅ‚ajÄ…cy O +dane O +w O +internecie B-nam_oth_tech +jest O +normÄ… O +: O +przy O +zaÅ‚ożeniu O +, O +że O +nie O +jest O +to O +obciążeniem O +dla O +budżetu O +użytkownika O +. O + +# sent_id = 559 +Niby O +drobiazg O +, O +ale O +projekty O +, O +które O +najbardziej O +korzystajÄ… O +z O +tego O +nadmiaru O +( O +wszelkie O +WielkieObliczenia O +@ O +home O +albo O +torrenty O +) O +polegajÄ… O +głównie O +na O +iloÅ›ci O +i O +każde O +jej O +zmniejszenie O +bÄ™dzie O +bolesne O +. O + +# sent_id = 560 +2 O +. O + +# sent_id = 561 +OpÅ‚acalność O +w O +porównaniu O +z O +innymi O +formami O +organizacji O +i O +ochrony O +praw O +wÅ‚asnoÅ›ci O +( O +rynek O +, O +firma O +) O +. O + +# sent_id = 562 +W O +przypadku O +gdy O +Å‚atwo O +znaleźć O +dochody O +z O +innych O +źródeÅ‚ O +, O +budowanie O +typowej O +struktury O +organizacyjnej O +albo O +poszukiwanie O +ochrony O +( O +w O +prawie O +wÅ‚asnoÅ›ci O +intelektualnej O +) O +może O +być O +rzeczywiÅ›cie O +maÅ‚o O +opÅ‚acalne O +w O +porównaniu O +z O +produkcjÄ… O +partnerskÄ… O +. O + +# sent_id = 563 +Jednak O +nietrudno O +sobie O +wyobrazić O +, O +że O +dla O +wielu O +uczestników O +produkcji O +partnerskiej O +ten O +rachunek O +wyjdzie O +zupeÅ‚nie O +inaczej O +w O +warunkach O +walki O +o O +przetrwanie O +. O + +# sent_id = 564 +Być O +może O +jeszcze O +za O +wczeÅ›nie O +obstawiać O +, O +kiedy O +Bill B-nam_liv_person +Gates I-nam_liv_person +wróci O +do O +Microsoftu B-nam_org_company +, O +skoro O +oficjalnie O +ma O +odejść O +dopiero O +w O +lipcu O +. O + +# sent_id = 565 +William B-nam_liv_person +Hill I-nam_liv_person +z O +Londynu B-nam_loc_gpe_city +takich O +zakÅ‚adów O +jeszcze O +nie O +przyjmuje O +. O + +# sent_id = 566 +Na O +razie O +Gates B-nam_liv_person +zaliczyÅ‚ O +ostatnie O +wystÄ…pienie O +na O +CES B-nam_eve_human +. O + +# sent_id = 567 +W O +ostatnich O +latach O +wystÄ…pienia O +Gatesa B-nam_liv_person +pozostawaÅ‚y O +w O +cieniu O +przemówieÅ„ O +Steve'a B-nam_liv_person +Jobsa I-nam_liv_person +. O + +# sent_id = 568 +Steve B-nam_liv_person +wystÄ…pi O +dopiero O +za O +tydzieÅ„ O +( O +15 O +stycznia O +) O +, O +ale O +nawet O +jeżeli O +ogÅ‚osi O +, O +że O +odchodzi O +, O +nie O +bÄ™dzie O +to O +wielka O +sensacja O +. O + +# sent_id = 569 +Raz O +już O +to O +zrobiÅ‚ O +. O + +# sent_id = 570 +I O +wróciÅ‚ O +. O + +# sent_id = 571 +Michael B-nam_liv_person +Jordan I-nam_liv_person +też O +odszedÅ‚ O +i O +wróciÅ‚ O +, O +żeby O +po O +raz O +kolejny O +upokorzyć O +konkurencjÄ™ O +. O + +# sent_id = 572 +TrochÄ™ O +siÄ™ O +może O +zapÄ™dziÅ‚ O +em O +. O + +# sent_id = 573 +Gates B-nam_liv_person +jako O +Jordan B-nam_liv_person +oprogramowania O +? O + +# sent_id = 574 +W O +takim O +razie O +kim O +byÅ‚ O +by O +Linus B-nam_liv_person +Torvalds I-nam_liv_person +? O + +# sent_id = 575 +Karlem B-nam_liv_person +Malone I-nam_liv_person +? O + +# sent_id = 576 +W O +tegorocznym O +wystÄ…pieniu O +Bill B-nam_liv_person +Gates I-nam_liv_person +zaprezentowaÅ‚ O +film O +ze O +swojego O +ostatniego O +dnia O +w O +pracy O +. O + +# sent_id = 577 +NaoglÄ…daÅ‚ O +siÄ™ O +chyba O +za O +dużo O +" O +The B-nam_pro_title +Office I-nam_pro_title +" O +( O +warto O +też O +odwiedzić O +stronÄ™ O +firmy O +) O +. O + +# sent_id = 578 +Nie O +dziwiÄ™ O +siÄ™ O +, O +ale O +nie O +oto O +chodzi O +. O + +# sent_id = 579 +Bill B-nam_liv_person +Gates I-nam_liv_person +zaprezentowaÅ‚ O +typowy O +film O +z O +youtube B-nam_pro_media_web +, O +typowy O +przykÅ‚ad O +kultury O +remiksu O +, O +typowy O +mashup O +. O + +# sent_id = 580 +ByÅ‚y O +wypowiedzi O +znanych O +gwiazd O +, O +zmontowane O +fragmenty O +filmów O +i O +trochÄ™ O +wygÅ‚upów O +wÅ‚asnych O +. O + +# sent_id = 581 +Z O +tÄ… O +różnicÄ… O +, O +że O +Gates B-nam_liv_person +nic O +nie O +remiksowaÅ‚ O +ani O +nie O +mashupowaÅ‚ O +. O + +# sent_id = 582 +Nie O +spÄ™dziÅ‚ O +kilku O +godzin O +szukajÄ…c O +Baracka B-nam_liv_person +Obamy I-nam_liv_person +ani O +Hillary B-nam_liv_person +Clinton I-nam_liv_person +mówiÄ…cych O +pasujÄ…ce O +kwestie O +. O + +# sent_id = 583 +( O +SwojÄ… O +drogÄ… O +, O +gdyby O +to O +byÅ‚ O +prawdziwy O +remiks O +, O +mógÅ‚ O +by O +próbować O +zgÅ‚osić O +siÄ™ O +też O +do O +Mitta B-nam_liv_person +Romneya I-nam_liv_person +czy O +Freda B-nam_liv_person +Thompsona I-nam_liv_person +) O +. O + +# sent_id = 584 +Nie O +szukaÅ‚ O +przypadkowych O +wycinków O +dla O +pozostaÅ‚ych O +postaci O +. O + +# sent_id = 585 +Paradoksalnie O +, O +wiarygodnoÅ›ci O +nie O +uzyskaÅ‚ O +dużo O +wiÄ™kszej O +niż O +kiepsko O +podÅ‚ożony O +gÅ‚os O +do O +montowanego O +na O +siÅ‚Ä™ O +ruchu O +warg O +, O +ale O +może O +o O +dramaturgiÄ™ O +reklamy O +proszku O +do O +prania O +tu O +chodziÅ‚o O +( O +wyróżniajÄ…cÄ… O +siÄ™ O +rolÄ™ O +odegraÅ‚ O +jedynie O +Barack B-nam_liv_person +Obama I-nam_liv_person +) O +. O + +# sent_id = 586 +Doskonale O +podsumowaÅ‚ O +filozofiÄ™ O +Microsoftu B-nam_org_company +- O +zrobimy O +to O +samo O +, O +co O +inni O +robili O +przed O +nami O +, O +tylko O +za O +wiÄ™ksze O +pieniÄ…dze O +. O + +# sent_id = 587 +Można O +wprawdzie O +powiedzieć O +, O +że O +zawsze O +porównanie O +dwóch O +krajów O +trafi O +na O +różne O +problemy O +, O +na O +nieporównywalne O +elementy O +. O + +# sent_id = 588 +Jednak O +można O +pewnie O +wybrać O +sobie O +porównanie O +lepiej O +albo O +gorzej O +. O + +# sent_id = 589 +Irlandia B-nam_loc_gpe_country +i O +Finlandia B-nam_loc_gpe_country +należą O +do O +kategorii O +" O +zdecydowanie O +gorzej O +" O +. O + +# sent_id = 590 +Kilka O +powodów O +: O +Liczba O +ludnoÅ›ci O +jest O +niewielka O +, O +inna O +skala O +wszelkich O +typowych O +problemów O +paÅ„stwa O +dobrobytu O +( O +emerytury O +, O +sÅ‚użba O +zdrowia O +) O +. O + +# sent_id = 591 +Nie O +dość O +, O +że O +ludzi O +maÅ‚o O +, O +to O +jeszcze O +kraje O +leżą O +na O +uboczu O +, O +tranzyt O +ich O +nie O +potrzebuje O +, O +wiÄ™c O +nie O +trzeba O +ponosić O +takich O +nakÅ‚adów O +na O +infrastrukturÄ™ O +( O +drogi O +, O +koleje O +) O +. O + +# sent_id = 592 +Nie O +byÅ‚o O +za O +wiele O +starego O +przemysÅ‚u O +( O +ciężkiego O +, O +lekkiego O +) O +, O +wiÄ™c O +można O +oszczÄ™dzić O +na O +poważnych O +reformach O +strukturalnych O +i O +skupić O +siÄ™ O +na O +innych O +polach O +dziaÅ‚ania O +. O + +# sent_id = 593 +Irlandia B-nam_loc_gpe_country +miaÅ‚a O +szczęście O +- O +należaÅ‚a O +do O +EWG B-nam_org_organization +, O +byÅ‚a O +biednym O +krajem O +, O +w O +którym O +wszyscy O +mówiÄ… O +w O +jÄ™zyku O +zbliżonym O +do O +angielskiego O +, O +a O +akurat O +na O +fali O +nowej O +gospodarki O +pÅ‚ynęły O +inwestycje O +technologiczne O +( O +Microsoft B-nam_org_company +- O +1985 O +, O +Intel B-nam_org_company +- O +1989 O +, O +Dell B-nam_org_company +- O +1990 O +i O +już O +w O +innych O +czasach O +Google B-nam_org_company +- O +2003 O +) O +. O + +# sent_id = 594 +Irlandia B-nam_loc_gpe_country +szczęściu O +wprawdzie O +pomogÅ‚a O +, O +obniżajÄ…c O +podatki O +i O +utrzymujÄ…c O +wspólnym O +wysiÅ‚kiem O +niższe O +wynagrodzenia O +( O +przynajmniej O +na O +poczÄ…tku O +) O +. O + +# sent_id = 595 +Może O +jednak O +te O +inwestycje O +stracić O +równie O +szybko O +, O +jak O +je O +zyskaÅ‚a O +. O + +# sent_id = 596 +Pytanie O +, O +czy O +jest O +silna O +na O +tyle O +, O +żeby O +poradzić O +sobie O +bez O +nich O +. O + +# sent_id = 597 +I O +ten O +proces O +radzenia O +sobie O +bez O +korporacji O +, O +które O +wytwarzajÄ… O +wiÄ™kszość O +B O ++ O +R O +, O +sporÄ… O +część O +PKB B-nam_oth +i O +eksportu O +, O +bÄ™dzie O +chyba O +dla O +Polski B-nam_loc_gpe_country +najciekawszy O +. O + +# sent_id = 598 +Chociaż O +na O +pewno O +nie O +uda O +siÄ™ O +Polsce B-nam_loc_gpe_country +zdobyć O +aż O +tylu O +inwestycji O +- O +konkurencja O +jest O +dość O +duża O +- O +w O +samej O +Unii B-nam_org_organization +z O +PolskÄ… B-nam_loc_gpe_country +konkuruje O +co O +najmniej O +8 O +krajów O +( O +Czechy B-nam_loc_gpe_country +, O +SÅ‚owacja B-nam_loc_gpe_country +, O +WÄ™gry B-nam_loc_gpe_country +, O +BuÅ‚garia B-nam_loc_gpe_country +, O +Rumunia B-nam_loc_gpe_country +, O +Litwa B-nam_loc_gpe_country +, O +Åotwa B-nam_loc_gpe_country +, O +Estonia B-nam_loc_gpe_country +) O +. O + +# sent_id = 599 +Finlandia B-nam_loc_gpe_country +startowaÅ‚a O +już O +w O +latach O +osiemdziesiÄ…tych O +z O +wysokiego O +puÅ‚apu O +( O +wiÄ™kszy O +PKB B-nam_oth +per O +capita O +niż O +w O +Irlandii B-nam_loc_gpe_country +w O +latach O +dziewięćdziesiÄ…tych O +) O +, O +wiÄ™c O +miaÅ‚a O +kapitaÅ‚ O +niezbÄ™dny O +do O +inwestycji O +w O +edukacjÄ™ O +i O +tworzenie O +wÅ‚asnej O +gospodarki O +opartej O +na O +wiedzy O +bez O +pomocy O +zagranicznych O +inwestycji O +. O + +# sent_id = 600 +Ani O +Irlandia B-nam_loc_gpe_country +( O +w O +1990 O +) O +, O +ani O +Polska B-nam_loc_gpe_country +( O +w O +2000 O +) O +takiego O +kapitaÅ‚u O +nie O +miaÅ‚y O +. O + +# sent_id = 601 +Dlaczego O +nie O +porównuje O +siÄ™ O +Polski B-nam_loc_gpe_country +z O +ArabiÄ… B-nam_loc_gpe_country +SaudyjskÄ… I-nam_loc_gpe_country +albo O +NorwegiÄ… B-nam_loc_gpe_country +? O + +# sent_id = 602 +" O +Oni O +ropÄ™ O +majÄ… O +" O +- O +każdy O +powie O +i O +utnie O +dyskusjÄ™ O +. O + +# sent_id = 603 +Duży O +kapitaÅ‚ O +to O +równie O +ważny O +zasób O +, O +ale O +może O +trudniejszy O +do O +wyobrażenia O +. O + +# sent_id = 604 +No O +dobrze O +, O +ale O +czy O +to O +znaczy O +, O +że O +nie O +warto O +siÄ™ O +starać O +? O + +# sent_id = 605 +Warto O +. O + +# sent_id = 606 +Finlandia B-nam_loc_gpe_country +powinna O +być O +przykÅ‚adem O +choćby O +na O +to O +, O +że O +lepiej O +czasem O +zaryzykować O +rozwój O +nowych O +technologii O +zamiast O +Å‚adować O +fortuny O +w O +bezpieczne O +przetrwalniki O +( O +Krauze B-nam_liv_person +i O +Kulczyk B-nam_liv_person +- O +ropa O +, O +Czarnecki B-nam_liv_person +- O +nieruchomoÅ›ci O +) O +. O + +# sent_id = 607 +JednoczeÅ›nie O +jednak O +, O +jak O +widzÄ™ O +bezrefleksyjne O +porównywanie O +wskaźników O +makro O +Polski B-nam_loc_gpe_country +z O +FinlandiÄ… B-nam_loc_gpe_country +i O +IrlandiÄ… B-nam_loc_gpe_country +, O +nóż O +mi O +siÄ™ O +w O +kieszeni O +otwiera O +. O + +# sent_id = 608 +Bardzo O +chciaÅ‚a O +m O +wziąć O +udziaÅ‚ O +w O +" O +Cytrusowym B-nam_eve_human +Tygodniu I-nam_eve_human +" O +, O +ale O +oczywiÅ›cie O +, O +byÅ‚a O +m O +nadal O +tak O +osÅ‚abiona O +że O +ledwo O +siÄ™ O +mogÅ‚a O +m O +ruszać O +, O +o O +gotowaniu O +, O +pieczeniu O +, O +i O +jeszcze O +mniej O +o O +blogowaniu O +nie O +byÅ‚o O +mowy O +: O +p O +Ale O +przynajmniej O +jeden O +przepis O +na O +cytrusy O +podam O +, O +mimo O +że O +na O +udziaÅ‚ O +w O +akcji O +za O +późno O +, O +to O +i O +tak O +" O +poakcjowo O +" O +bez O +udziaÅ‚u O +też O +można O +. O + +# sent_id = 609 +Ta O +lemoniada O +zawsze O +przypomina O +mi O +bajkÄ™ O +Perraulta B-nam_liv_person +, O +nie O +pamiÄ™tam O +która O +to O +byÅ‚a O +, O +ale O +wystÄ™powaÅ‚y O +tam O +krasnoludki O +które O +mieszkaÅ‚y O +pod O +korzeniami O +drzew O +, O +i O +tam O +wÅ‚aÅ›nie O +pichciÅ‚y O +i O +gotowaÅ‚y O +, O +i O +miÄ™dzy O +innymi O +robiÅ‚y O +pysznÄ… O +lemoniadÄ™ O +. O + +# sent_id = 610 +I O +o O +tych O +bÄ…belkach O +w O +lemoniadzie O +ja O +najlepiej O +zapamiÄ™taÅ‚a O +m O +z O +caÅ‚ej O +tej O +bajki O +: O +o O +) O +( O +zdjÄ™cia O +mam O +tylko O +gdy O +mi O +siÄ™ O +udaje O +ucelować O +w O +czas O +i O +sÅ‚oÅ„ce O +, O +bez O +sÅ‚oÅ„ca O +i O +bez O +czasu O +zdjÄ™cia O +" O +wychodzÄ… O +" O +takie O +że O +: O +p O +) O +ZÅ‚ocista O +lemoniada O +SkÅ‚adniki O +( O +na O +okoÅ‚o O +1 O +litr O +napoju O +) O +: O +1 O +litr O +wody O +mineralnej O +albo O +gazowanej O +( O +można O +też O +normalnÄ… O +przegotowanÄ… O +zimnÄ… O +wodÄ™ O +) O +3 O +- O +4 O +duże O +Å‚yżki O +stoÅ‚owe O +syropu O +z O +agawy O +( O +albo O +wiÄ™cej O +, O +do O +smaku O +) O +3 O +- O +4 O +duże O +Å‚yżki O +stoÅ‚owe O +Å›wieżego O +soku O +z O +cytryny O +( O +bez O +miąższu O +i O +pestek O +) O +Wyciskamy O +sok O +cytrynowy O +, O +odcedzamy O +miąższ O +i O +pestki O +, O +mieszamy O +z O +syropem O +z O +agawy O +, O +zalewamy O +wodÄ… O +mineralnÄ… O +albo O +gazowanÄ… O +. O + +# sent_id = 611 +Mieszamy O +, O +sprawdzamy O +czy O +nam O +odpowiada O +, O +w O +razie O +czego O +dodajemy O +wiÄ™cej O +syropu O +lub O +cytryny O +. O + +# sent_id = 612 +Trzyma O +siÄ™ O +w O +lodówce O +przez O +okoÅ‚o O +tydzieÅ„ O +. O + +# sent_id = 613 +Co O +prawda O +lemoniada O +nie O +brzmi O +jak O +przepis O +na O +zimÄ™ O +, O +ale O +tak O +naprawdÄ™ O +to O +u O +nas O +czasem O +kaloryfery O +w O +zimie O +sÄ… O +tak O +mocno O +rozgrzane O +, O +że O +jest O +aż O +za O +ciepÅ‚o O +. O + +# sent_id = 614 +A O +regulować O +nie O +ma O +jak O +, O +najwyżej O +caÅ‚kowicie O +wyÅ‚Ä…czyć O +, O +a O +to O +nawet O +czasem O +nie O +pomaga O +i O +dalej O +grzeje O +: O +p O +WiÄ™c O +czÄ™sto O +zdarza O +siÄ™ O +że O +marzÄ™ O +o O +chÅ‚odnych O +napojach O +, O +a O +szczególnie O +lemoniadzie O +" O +jak O +z O +bajki O +" O +: O +) O + +# sent_id = 615 +16 O +lat O +temu O +Polska B-nam_loc_gpe_country +wyszÅ‚a O +z O +okresu O +PRL-u B-nam_loc_gpe_country +jako O +paÅ„stwo O +odbudowane O +, O +ale O +sÅ‚abe O +, O +drÄ™czone O +rozÅ‚amami O +spoÅ‚ecznymi O +, O +które O +mimo O +usilnych O +staraÅ„ O +socjalistycznego O +reżimu O +, O +choć O +nieco O +przytuszowane O +, O +nigdy O +nie O +zostaÅ‚y O +zażegnane O +… O + +# sent_id = 616 +Epoka O +przemian O +gospodarczych O +uderzyÅ‚a O +w O +nasz O +kraj O +rozwiewajÄ…c O +wszelkie O +miraże O +. O + +# sent_id = 617 +Tu O +nie O +byÅ‚o O +już O +miejsca O +na O +jedynie O +sÅ‚usznÄ… O +politykÄ™ O +partii O +, O +tu O +realia O +wzięły O +górÄ™ O +nad O +socjalistycznymi O +mrzonkami O +, O +których O +nigdy O +zresztÄ… O +nie O +zrealizowano O +. O + +# sent_id = 618 +Wszystko O +to O +sprawiÅ‚o O +, O +że O +w O +Polsce B-nam_loc_gpe_country +daÅ‚y O +o O +sobie O +znać O +dotychczas O +czÄ™sto O +ignorowane O +i O +marginalizowane O +zjawiska O +– O +bezrobocie O +, O +bezdomność O +, O +skrajna O +bieda O +i O +ubóstwo O +. O + +# sent_id = 632 +Autorzy O +miesiÄ™cznika O +. B-nam_pro_media_periodic +psd I-nam_pro_media_periodic +Photoshop I-nam_pro_media_periodic +postanowili O +uproÅ›cić O +czytelnikom O +drogÄ™ O +do O +nabycia O +magazynu O +. O + +# sent_id = 633 +UdostÄ™pnili O +oni O +za O +darmo O +magazyn O +w O +formacie O +pdf B-nam_oth_tech +jak O +i O +pliki O +źródÅ‚owe O +do O +pobrania O +w O +Internecie B-nam_oth_tech +. O + +# sent_id = 634 +Do O +tej O +pory O +można O +byÅ‚o O +kupić O +jedynie O +wersjÄ™ O +papierowÄ… O +z O +pÅ‚ytÄ… O +. O + +# sent_id = 635 +Warto O +dodać O +, O +że O +magazyn O +jest O +po O +raz O +ostatni O +w O +sklepach O +i O +salonikach O +prasowych O +- O +od O +czerwca O +dostÄ™pna O +już O +bÄ™dzie O +jedynie O +wersja O +elektroniczna O +. O + +# sent_id = 636 +W O +niedalekiej O +przyszÅ‚oÅ›ci O +dostÄ™pne O +bÄ™dÄ… O +także O +archiwa O +. O + +# sent_id = 637 +Czy O +caÅ‚y O +czas O +za O +darmo O +? O + +# sent_id = 638 +Tego O +nie O +udaÅ‚o O +siÄ™ O +ustalić O +. O + +# sent_id = 639 +EmanujÄ…ca O +pozytywnyi O +emocjami O +, O +ciepÅ‚em O +i O +optymizmem O +reklama O +zachÄ™cajÄ…ca O +do O +odwiedzania O +Islandii B-nam_loc_gpe_country +przydaÅ‚a O +by O +siÄ™ O +Polsce B-nam_loc_gpe_country +. O +. O +. O + +# sent_id = 640 +Za O +" O +x O +" O +lat O +może O +dogonimy O +PR-owców O +z O +wyspy O + +# sent_id = 641 +W O +swoich O +recenzjach O +muzycznych O +Wojciech B-nam_liv_person +Mann I-nam_liv_person +raz O +po O +raz O +opisuje O +niszowych O +artystów O +okoÅ‚obluesowych O +ze O +Stanów B-nam_loc_gpe_city +, O +piszÄ…c O +zawsze O +coÅ› O +w O +stylu O +" O +skÄ…d O +oni O +siÄ™ O +tam O +biorÄ… O +" O +. O + +# sent_id = 642 +CiÄ…gle O +nowi O +i O +ciÄ…gle O +dobrzy O +. O + +# sent_id = 643 +Nic O +tylko O +powtórzyć O +za O +Wojciechem B-nam_liv_person +: O +skÄ…d O +oni O +siÄ™ O +tam O +biorÄ… O +? O +! O + +# sent_id = 644 +Chuck B-nam_liv_person +Palahniuk I-nam_liv_person +nie O +jest O +debiutantem O +, O +ale O +skÄ…d O +oni O +. O +. O +. O + +# sent_id = 645 +Znowu O +cholernie O +dobra O +proza O +zza O +wielkiej O +wody O +. O + +# sent_id = 646 +Postindustrialna O +ballada O +peÅ‚na O +absurdalnych O +, O +arcydziwacznych O +, O +choć O +przecież O +rzeczywistych O +, O +rekwizytów O +. O + +# sent_id = 647 +Melancholia O +i O +niespeÅ‚nienie O +. O + +# sent_id = 648 +Smutek O +Hoppera B-nam_liv_person +, O +inspiracja O +dla O +Cunninghama B-nam_liv_person +. O + +# sent_id = 649 +Historia O +wychowanka O +radykalnej O +sekty O +, O +który O +żyje O +w O +odosobnieniu O +, O +w O +kolonii O +współwyznawców O +. O + +# sent_id = 650 +Gdy O +koÅ„czy O +szkoÅ‚Ä™ O +, O +jako O +niepierworodny O +syn O +zostaje O +wysÅ‚any O +w O +Å›wiat O +aby O +zarabiaÅ‚ O +na O +sektÄ™ O +. O + +# sent_id = 651 +Pozostali O +w O +domach O +współwyznawcy O +po O +jakimÅ› O +czasie O +, O +zgodnie O +z O +najwyższym O +obowiÄ…zkiem O +swojej O +religii O +, O +popeÅ‚niajÄ… O +masowe O +samobójstwo O +. O + +# sent_id = 652 +Jest O +to O +automatyczny O +nakaz O +dla O +wszystkich O +, O +którzy O +pracujÄ… O +poza O +koloniÄ… O +, O +aby O +czym O +prÄ™dzej O +doÅ‚Ä…czyli O +do O +nich O +w O +zaÅ›wiatach O +. O + +# sent_id = 653 +Od O +poczÄ…tku O +wiadomo O +, O +jak O +siÄ™ O +skoÅ„czy O +ta O +historia O +o O +kimÅ› O +, O +kto O +przez O +lata O +opiekowaÅ‚ O +siÄ™ O +domem O +i O +ogrodem O +obcych O +ludzi O +. O + +# sent_id = 654 +W O +ogrodzie O +sadzaÅ‚ O +sztuczne O +kwiaty O +ukradzione O +z O +krypty O +, O +spryskiwaÅ‚ O +je O +farbÄ… O +i O +perfumami O +. O + +# sent_id = 655 +Nikt O +siÄ™ O +nigdy O +nie O +zorientowaÅ‚ O +. O + +# sent_id = 656 +CzÅ‚owiek O +ten O +najbardziej O +na O +Å›wiecie O +byÅ‚ O +przywiÄ…zany O +do O +swojej O +zÅ‚otej O +rybki O +, O +która O +nazywaÅ‚a O +siÄ™ O +numer O +sześćset O +czterdzieÅ›ci O +jeden O +. O + +# sent_id = 657 +SkoÅ„czyÅ‚ O +jako O +pasażer O +lotu O +dwa B-nam_eve_human +tysiÄ…ce I-nam_eve_human +trzydzieÅ›ci I-nam_eve_human +dziewięć I-nam_eve_human +opowiadajÄ…cy O +czarnej O +skrzynce O +historiÄ™ O +swojego O +życia O +. O + +# sent_id = 658 +Yochai B-nam_liv_person +Benkler I-nam_liv_person +stworzyÅ‚ O +koncepcjÄ™ O +commons O +- O +based O +peer O +production O +. O + +# sent_id = 659 +KorzystajÄ…c O +ze O +sposobu O +rozumowania O +Ronalda B-nam_liv_person +Coase'a I-nam_liv_person +twierdzi O +on O +, O +że O +w O +okreÅ›lonych O +okolicznoÅ›ciach O +( O +spowodowanych O +przez O +koszty O +transakcji O +i O +prawa O +wÅ‚asnoÅ›ci O +) O +powstaje O +trzecia O +droga O +w O +organizacji O +produkcja O +, O +inna O +niż O +rynek O +i O +przedsiÄ™biorstwo O +. O + +# sent_id = 660 +Innymi O +sÅ‚owy O +, O +jeÅ›li O +opÅ‚aca O +siÄ™ O +napisać O +WikipediÄ™ B-nam_pro_media_web +albo O +Linuksa B-nam_pro_software +, O +bo O +koszty O +współpracy O +sÄ… O +odpowiednio O +niskie O +, O +a O +nie O +sÄ… O +nam O +potrzebne O +prawa O +wÅ‚asnoÅ›ci O +w O +tradycyjnym O +rozumieniu O +, O +wkracza O +peer O +production O +, O +która O +jako O +metoda O +produkcji O +jest O +do O +realizacji O +tych O +celów O +bardziej O +opÅ‚acalna O +i O +efektywna O +. O + +# sent_id = 661 +Mam O +jednak O +czasem O +wrażenie O +, O +że O +mówiÄ…c O +o O +tym O +sposobie O +produkcji O +, O +opacznie O +pojmuje O +siÄ™ O +jej O +oddzielenie O +od O +rynku O +. O + +# sent_id = 662 +DziaÅ‚ania O +wewnÄ…trz O +przedsiÄ™biorstwa O +( O +które O +oddzielone O +jest O +od O +rynku O +wedÅ‚ug O +tej O +samej O +metody O +) O +sÄ… O +tak O +samo O +pozarynkowe O +, O +jak O +dziaÅ‚ania O +w O +ramach O +peer O +production O +. O + +# sent_id = 663 +Jednak O +na O +pewnym O +poziomie O +te O +struktury O +organizacyjne O +w O +koÅ„cu O +zastosujÄ… O +rynkowÄ… O +metodÄ™ O +zawierania O +transakcji O +( O +trudno O +mi O +sobie O +wyobrazić O +, O +żeby O +przedsiÄ™biorstwa O +mogÅ‚y O +dzwonić O +do O +przypadkowych O +ludzi O +i O +żądać O +stawienia O +siÄ™ O +do O +pracy O +) O +i O +w O +ten O +sposób O +jednak O +nie O +funkcjonujÄ… O +poza O +rynkiem O +. O + +# sent_id = 664 +Tymczasem O +" O +pozarynkowe O +" O +oznacza O +czÄ™sto O +" O +funkcjonujÄ…ce O +poza O +jakÄ…kolwiek O +wymianÄ… O +rynkowÄ… O +" O +, O +chociaż O +wedÅ‚ug O +Benklera B-nam_liv_person +powinno O +oznaczać O +" O +zorganizowane O +na O +podstawach O +innych O +niż O +mechanizm O +cenowy O +" O +. O + +# sent_id = 665 +Peer O +production O +Benklera B-nam_liv_person +to O +sposób O +wewnÄ™trznej O +organizacji O +, O +w O +której O +o O +decyzjach O +i O +transakcjach O +nie O +decydujÄ… O +ceny O +. O + +# sent_id = 666 +Commons O +based O +odnosi O +siÄ™ O +do O +kosztu O +ustanowienia O +praw O +wÅ‚asnoÅ›ci O +i O +sugeruje O +, O +że O +możliwe O +jest O +funkcjonowanie O +systemów O +wymiany O +bez O +" O +wÅ‚asnoÅ›ci O +" O +. O + +# sent_id = 667 +Jednak O +organizacja O +produkujÄ…ca O +wedÅ‚ug O +reguÅ‚ O +commons O +- O +based O +peer O +production O +dziaÅ‚a O +na O +rynku O +, O +podobnie O +jak O +przedsiÄ™biorstwa O +. O + +# sent_id = 668 +Stosowanie O +nazwy O +" O +pozarynkowa O +" O +mogÅ‚o O +by O +sugerować O +, O +że O +nie O +ma O +stycznoÅ›ci O +miÄ™dzy O +tworzeniem O +Linuksa B-nam_pro_software +i O +rynkiem O +, O +co O +jest O +oczywistym O +nieporozumieniem O +. O + +# sent_id = 669 +Zdarza O +siÄ™ O +jednak O +, O +że O +peer O +production O +jest O +traktowane O +jako O +zjawisko O +przeciwstawne O +rynkowi O +: O +jest O +zÅ‚y O +rynek O +i O +dobre O +commons O +. O + +# sent_id = 670 +StÄ…d O +też O +czÄ™ste O +przyszywanie O +marksistowskich O +Å‚atek O +, O +nawet O +bez O +koniecznoÅ›ci O +wskazywania O +Stallmana B-nam_liv_person +i O +Moglena B-nam_liv_person +jako O +źródÅ‚a O +. O + +# sent_id = 671 +Próba O +zastÄ…pienia O +wszystkich O +sposobów O +produkcji O +peer O +production O +jest O +równie O +absurdalna O +jak O +zastÄ…pienie O +wszystkiego O +przedsiÄ™biorstwem O +. O + +# sent_id = 672 +Dystansowanie O +siÄ™ O +od O +problemów O +z O +funkcjonowaniem O +rynku O +to O +jedno O +, O +a O +wyÅ‚Ä…czenie O +siÄ™ O +z O +niego O +to O +drugie O +. O + +# sent_id = 690 +MaÅ‚ysz B-nam_liv_person +musi O +siÄ™ O +teraz O +postarać O +o O +prawdziwy O +cud O +, O +bo O +Puchar B-nam_eve_human_sport +Åšwiata I-nam_eve_human_sport +to O +zdecydowanie O +za O +maÅ‚o O +, O +żeby O +przebić O +wyczyn O +Roberta B-nam_liv_person +Kubicy I-nam_liv_person +. O + +# sent_id = 691 +PrÄ™dkość O +na O +progu O +miaÅ‚ O +MaÅ‚ysz B-nam_liv_person +zawsze O +niewielkÄ… O +, O +a O +tu O +jeszcze O +taki O +rywal O +, O +który O +wybija O +siÄ™ O +majÄ…c O +na O +liczniku O +250 O +kilometrów O +na O +godzinÄ™ O +. O + +# sent_id = 692 +Noty O +za O +styl O +caÅ‚kiem O +niezÅ‚e O +, O +Paulina B-nam_liv_person +Ligocka I-nam_liv_person +by O +siÄ™ O +miÄ™dzy O +dwiema O +bandami O +lepiej O +nie O +zachowaÅ‚a O +. O + +# sent_id = 693 +Sokół B-nam_liv_person +z O +Krakowa B-nam_loc_gpe_city +wypadÅ‚ O +na O +ekranie O +lepiej O +nawet O +od O +Jana B-nam_liv_person +Mazocha I-nam_liv_person +i O +wreszcie O +przyćmiÅ‚ O +Lewisa B-nam_liv_person +Hamiltona I-nam_liv_person +, O +którego O +bez O +wÄ…tpienia O +przypadkowe O +zwyciÄ™stwo O +przeszÅ‚o O +niemal O +bez O +echa O +. O + +# sent_id = 694 +Niezniszczalny O +Krakus B-nam_org_nation +staÅ‚ O +siÄ™ O +prawdziwym O +bohaterem O +FormuÅ‚y B-nam_eve_human_sport +1 I-nam_eve_human_sport +, O +w O +której O +robi O +siÄ™ O +wszystko O +, O +żeby O +wypadków O +byÅ‚o O +jak O +najmniej O +, O +byle O +jakieÅ› O +byÅ‚y O +. O diff --git a/pyproject.toml b/pyproject.toml index da51df8c03270e10b7287719b9248b746e57e61a..c17c0dfde2563b7b666e9cfe07bbaae50009ff05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools"] [project] name = "combo" -version = "3.2.0" +version = "3.2.1" authors = [ {name = "Maja Jablonska", email = "maja.jablonska@ipipan.waw.pl"} ] diff --git a/requirements.txt b/requirements.txt index 10849b37d2a7a0abcb300a208a100aad9700feef..bd8c1763b4bcc46d7753b50621d59b1fbe44d4b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,5 +19,7 @@ pytest~=7.2.2 transformers~=4.27.3 typing_extensions~=4.5.0 sacremoses~=0.0.53 +seqeval~=1.2.2 spacy~=3.7.2 -urllib3~=1.26.6 \ No newline at end of file +urllib3~=1.26.6 +wandb~=0.15.0 \ No newline at end of file