Skip to content
Snippets Groups Projects
Select Git revision
  • 9dfe1051536de15d2f05f80defe1902b89b10f44
  • master default protected
  • develop protected
  • feat_remove_attr
  • python2.7
  • python3.8
6 results

symboldictionary.h

Blame
  • symboldictionary.h 5.15 KiB
    /*
        Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
        Part of the libcorpus2 project
    
        This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your option)
    any later version.
    
        This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE. 
    
        See the LICENSE and COPYING files for more details.
    */
    
    #ifndef LIBCORPUS2_SYMBOLDICTIONARY_H
    #define LIBCORPUS2_SYMBOLDICTIONARY_H
    
    #include <libcorpus2/typedefs.h>
    #include <vector>
    #include <string>
    #include <boost/range.hpp>
    #include <map>
    #include <iostream>
    
    namespace Corpus2 {
    
    /**
     * A template for string-index dictionaries offering lookups in both ways.
     *
     * A dictionary is created from a set of strings, which is then mapped to
     * indices which can be used.
     *
     * The class is templated on the index type, which should be some form of
     * an integer. The amount of used indices should be less than the maximum
     * value of the type. There should be no empty strings or duplicate
     * strings.
     */
    template <typename IndexT>
    class SymbolDictionary
    {
    public:
    	/// Empty dictionary constructor
    	SymbolDictionary();
    
    	/// Load data into the dictionary
    	void load_data(const std::vector<std::string>& data);
    
    	/// Load data into the dictionary. The strings in the vector are
    	/// assumed to be already sorted.
    	void load_sorted_data(const std::vector<std::string>& data);
    
    	/// Check if an index is valid in this dictionary
    	bool is_id_valid(IndexT idx) const;
    
    	/// Getter for the size of this dictionary
    	size_t size() const;
    
    	/**
    	 * Get the index for a given string identifier, const char* version.
    	 *
    	 * Essentially a wrapper for the range overload, needed to avoid
    	 * confusion when a static char array is passed there.
    	 */
    	IndexT get_id(const char* c) const;
    
    	/**
    	 * Get the index for a given string identifier, range version.
    	 *