Select Git revision
symboldictionary.h
symboldictionary.h 5.15 KiB
/*
Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
Part of the libcorpus2 project
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the LICENSE and COPYING files for more details.
*/
#ifndef LIBCORPUS2_SYMBOLDICTIONARY_H
#define LIBCORPUS2_SYMBOLDICTIONARY_H
#include <libcorpus2/typedefs.h>
#include <vector>
#include <string>
#include <boost/range.hpp>
#include <map>
#include <iostream>
namespace Corpus2 {
/**
* A template for string-index dictionaries offering lookups in both ways.
*
* A dictionary is created from a set of strings, which is then mapped to
* indices which can be used.
*
* The class is templated on the index type, which should be some form of
* an integer. The amount of used indices should be less than the maximum
* value of the type. There should be no empty strings or duplicate
* strings.
*/
template <typename IndexT>
class SymbolDictionary
{
public:
/// Empty dictionary constructor
SymbolDictionary();
/// Load data into the dictionary
void load_data(const std::vector<std::string>& data);
/// Load data into the dictionary. The strings in the vector are
/// assumed to be already sorted.
void load_sorted_data(const std::vector<std::string>& data);
/// Check if an index is valid in this dictionary
bool is_id_valid(IndexT idx) const;
/// Getter for the size of this dictionary
size_t size() const;
/**
* Get the index for a given string identifier, const char* version.
*
* Essentially a wrapper for the range overload, needed to avoid
* confusion when a static char array is passed there.
*/
IndexT get_id(const char* c) const;
/**
* Get the index for a given string identifier, range version.
*