Skip to content
Snippets Groups Projects
  • ilor's avatar
    Initial version of the basic functionality split from libmaca. · e862fc89
    ilor authored
    Libcorpus2 now contains the base strructures (tokens etc), their I/O and related classes.
    Libpwrutils (name TBD) contains basic building blocks common to all tagger-related libraries
    Tagset-tool only needs libcorpus, so was brough here as well.
    e862fc89
tokensource.h 3.52 KiB
/*
    Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski

    This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.

    This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. 

    See the LICENSE and COPYING files for more details.
*/

#ifndef PWRNLP_TOKENSOURCE_H
#define PWRNLP_TOKENSOURCE_H

#include <boost/function.hpp>
#include <boost/range.hpp>

#include <iosfwd>
#include <vector>

namespace PwrNlp {

/**
 * A simple interface for classes that can be used as token sources, for
 * example token processing layers. This is actually a template since it
 * might be used with different types of Tokens, and avoiding code
 * duplication is good.
 */
template<typename T>
class TokenSourceTemplate
{
public:
	/// Constructor
	TokenSourceTemplate()
	{
	}

	/// Destructor
	virtual ~TokenSourceTemplate()
	{
	}

	/**
	 * Get the next token, or NULL if there are no more tokens.
	 * Caller takes ownership of the token.
	 */
	virtual T* get_next_token() = 0;

	/**
	 * Tokenization helper. Tokenizes the entire input, calling the sink
	 * functor or each token. The sink takes ownership of the token.
	 */
	void tokenize(boost::function<void (T*)> sink) {
		while (T* t = get_next_token()) {
			sink(t);
		}
	}
};

namespace detail {
	// Helper template class to get type T when all we have is a type T*
	// This bit does nothing by default...
	template<typename T>
	struct deptr
	{
	};

	// ...and this has a typedef for T when the template is used with T*
	template<typename T>
	struct deptr<T*>
	{
		typedef T type;
	};
} /* end ns detail */

/**
 * Generic TokenSource wrapper for containers of tagger Token pointers,
 * e.g. a std::vector<Token*> or a boost::range of Token* iterators.
 *
 * The container should not be modified as long as it is being accesed
 * by a RangeSource wrapper.
 *
 * There is some mild template magic here to make it work with the
 * templated TokenSource. Basically when given a container of some Token*,
 * we want to end up with a TokenSource<Token>, hence the deptr.
 */
template<typename T>
class RangeSource : public TokenSourceTemplate<
		typename detail::deptr<typename T::value_type>::type>
{
public:
	/// convenience typedef
	typedef typename T::const_iterator const_iterator;

	/// Constructor from a range of type T, where T is iterable
	/// (i.e. has const_iterator and value_type typedefs and begin / end
	/// member functions.
	RangeSource(const T& range)
		: end_(range.end()), ptr_(range.begin())
	{
	}

	/// TokenSource override
	typename T::value_type get_next_token()
	{
		if (ptr_ != end_) {
			return *ptr_++;
		} else {
			return NULL;
		}
	}

private:
	/// The end iterator
	const_iterator end_;

	/// Cuurrent position iterator
	const_iterator ptr_;
};

/// Helper function to make a RangeSource from a range,
/// avoiding lenghty template instantiation
template<class T>
RangeSource<T>* make_range_source(const T& range)
{
	return new RangeSource<T>(range);
}

/// Helper function to make a RangeSource from two iterators,
/// avoiding lenghty template instantiation
template<class T>
RangeSource<boost::iterator_range<T> >* make_range_source(const T& b,
		const T& e)
{
	boost::iterator_range<T> range(b, e);
	return new RangeSource< boost::iterator_range<T> >(range);
}

} /* end ns PwrNlp */


#endif // PWRNLP_TOKENSOURCE_H