# -*- coding: utf-8 -*-

# Copyright (C) 2011 Adam Radziszewski. Part of WMBT.
# This program is free software; you can redistribute and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.
#
# See the LICENCE and COPYING files for more details

# SWIG bug workaround: loading multiple SWIG modules brought unwrapped
# swig::stop_iteration exceptions
import ctypes, sys
sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)

import corpus2, wccl
# TODO: get back to default dlopen policy?

import config
import codecs, os

format_help = """
Available input formats: """ + ' '.join(corpus2.TokenReader.available_reader_types()) + """
""" + ' '.join(corpus2.TokenReader.available_reader_types_help()) + """
Available output formats: """ + ' '.join(corpus2.TokenWriter.available_writer_types()) + """
""" + ' '.join(corpus2.TokenWriter.available_writer_types_help())

def f_name(model_name, subdir, ext, suff = ''):
	"""Gets the filename based on model_name having the given
	extension. Optionally, you can specify name suffix."""
	base = (model_name + '-' + suff + '.' + ext) if suff else (model_name + '.' + ext)
	return os.path.join(subdir, base)

def get_tagset(conf):
	return corpus2.get_named_tagset(conf.get(config.S_GLOBAL, config.O_TAGSET))

def get_reader(in_path, tagset, input_format, read_disamb_only):
	"""Creates a reader using the options. If in_path evaluates to False,
	will create a stdin reader. Set read_disamb_only to force reading only
	'disamb' lexemes/interpretations."""
	if read_disamb_only:
		fixd_format = input_format + ',disamb_only'
	
	if in_path:
		return corpus2.TokenReader.create_path_reader(
			fixd_format, tagset, in_path)
	else:
		return corpus2.TokenReader.create_stdin_reader(fixd_format, tagset)

def get_writer(out_path, tagset, output_format):
	"""Creates a writer using the options. If out_path evaluates to False,
	will create a stdout writer."""
	if out_path:
		return corpus2.TokenWriter.create_path_writer(output_format, out_path,
			tagset)
	else:
		return corpus2.TokenWriter.create_stdout_writer(output_format, tagset)

def op_list(wccl_file, sec_name):
	"""Retrieves a list of operators corresponding to a named section from
	the given WCCL file. If section not present, will return an empty list."""
	ops = []
	if wccl_file.has_untyped_section(sec_name):
		sec = wccl_file.get_untyped_section(sec_name)
		for op_idx in range(sec.size()):
			ops.append(sec.get_ptr(op_idx))
	return ops

def get_wccl_ops(conf, model_name, wccl_dir, lex_dir, chan_names):
	"""Returns a pair: WCCL op list, that is a list of WCCL operator lists
	corresponding to the given channel names. Each list may consists of two
	parts: the default operators and channel-specific operators
	(theoretically both may be empty)."""
	wccl_file_path = f_name(model_name, wccl_dir, config.EXT_WCCL)
	tagset = corpus2.get_named_tagset(conf.get(config.S_GLOBAL, config.O_TAGSET))
	wccl_file = wccl.Parser(tagset).parseWcclFileFromPath(wccl_file_path, lex_dir)
	def_ops = op_list(wccl_file, config.DEFAULT_OPS)
	chan_ops = [def_ops + op_list(wccl_file, chan_name) for chan_name in chan_names]
	return chan_ops

def create_context(sent):
	"""Wraps the sentence as SentenceContext to be used with WCCL."""
	return wccl.SentenceContext(sent)
