Commit f62a61f3 authored by Tomasz Walkowiak's avatar Tomasz Walkowiak

initial commit

parents
[service]
tool = omwn
rabbit_host = 10.17.0.85
rabbit_user = clarin
rabbit_password = clarin123
[tool]
workers_number = 1
[logging]
port = 9987
local_log_level = INFO
[logging_levels]
__main__ = INFO
from ._service import *
from ._worker import *
This diff is collapsed.
from __future__ import absolute_import, unicode_literals, division
from abc import ABCMeta, abstractmethod
import logging
import six
__all__ = 'LexWorker',
@six.add_metaclass(ABCMeta)
class LexWorker(object):
"""
The abstract class that all workers should be derived from.
If defines methods which may be overridden to perform initialization of the
worker, as well as the main processing method that will be called for every
request to the worker.
"""
@classmethod
def static_init(cls, config):
"""
This initialization method is called exactly once, when the worker
class is loaded and the service is starting.
It should load and initialize resources that can be shared across
processes, so that they don't need to be loaded many times.
All variables added to this class by this method will be pickled and
sent to all started processes. Therefore, they need to be picklable.
:param dict config: The service configuration dictionary. It's passed
whole to this method in case it wants to look at some of the
parameters.
"""
pass
@classmethod
def static_close(cls):
"""
Called after all processes have stopped and the service is shutting
down. If any of shared resources allocated by :meth:`static_init` need
to be cleaned up, the subclass should override this method to do so.
"""
pass
@staticmethod
def logging_init(log_socket_handler, log_levels):
"""
Called in each subprocess before :meth:`init`.
The purpose of this method is to set up loggers used by the worker. By
default, it takes all keys from ``logging_levels`` section of the
configuration, treats them as logger names and sets their levels to the
values assigned to them. All of those loggers also have
``log_socket_handler`` added as their handler, so they can log to the
centralized logging system.
This method silently does nothing when ``log_socket_handler`` is
``None``.
Normally, there should be no reason to override this method in a
subclass, unless some very special treatment of some loggers is
required.
:param log_socket_handler: The socket handler created for the process.
This may be ``None`` if the central logger has not been set up.
:type log_socket_handler: Optional[logging.handlers.SocketHandler]
:param log_levels: Mapping of logger names to their levels. Normally
taken from ``logging_levels`` section of config dictionary, after
textual level names are resolved.
:type log_levels: Mapping[str,int]
"""
if log_socket_handler is None:
return
for name, level in six.iteritems(log_levels):
logger = logging.getLogger(name)
logger.setLevel(level)
logger.addHandler(log_socket_handler)
def init(self):
"""
Called after an instance of this class has ben constructed in the
process it will be run. It is run once for each instance
(and therefore each process).
It should load all resources that can't be pickled and shared.
"""
pass
def close(self):
"""
Called when the process is being shut down. If the worker allocates any
per-process resources that need to be cleaned up, the subclass should
override this method to do so.
"""
pass
@abstractmethod
def process(self, task_options):
"""
Called for each request made to the worker. This method performs the
task the service is constructed to do and must be overridden by
subclasses.
:param dict task_options: Dictionary containing options for the current
processing task. Subclasses should describe what options that can
handle (or require). This dictionary may contain all values that
can be JSON-encoded.
"""
pass
from __future__ import absolute_import, unicode_literals, division
import logging
from logging.handlers import RotatingFileHandler
from threading import Thread
from struct import Struct
from six.moves import socketserver, cPickle
__all__ = 'LogServer', 'configure_loggers', 'parse_loglevel'
_DEFAULT_LOGFORMAT = ('%(processName)s>>> [%(asctime)s] (%(name)s:%(lineno)d) '
'%(levelname)s: %(message)s')
# Since logging module doesn't standardize name-to-level conversion, here's a
# dict with the standard levels.
_NAME2LVL = {
'CRITICAL': 50,
'ERROR': 40,
'WARNING': 30,
'INFO': 20,
'DEBUG': 10,
}
_logger = logging.getLogger(__name__)
# Main logger is configured for loggers in this library themselves. It gets the
# same handler as worker loggers.
_main_logger = logging.getLogger(__name__.split('.', 1)[0])
# The logger that handles logs the remote loggers. It remains aside loggers for
# this library, accepts everything, logs nothing on its own and only handles
# received log records.
_service_logger = logging.getLogger('<service-remote>')
_handler = None
def configure_loggers(logfile_name='service.log',
logfile_maxsize=1024**2,
logfile_maxbackups=10,
log_format=_DEFAULT_LOGFORMAT,
local_log_level='WARNING'):
"""
Configure the logger used by :class`LogServer` instances.
This function must be called before any logging servers are started (not
that there should ever be need for more than one) and cannot be called
again. This is to ensure thread safety.
:param str logfile_name: Name of the file to which logs are written.
:param int logfile_maxsize: Maximal size in bytes of a single rotating
log file. Default is 1 MiB.
:param int logfile_maxbackups: Maximal number of backup log files kept.
Default is 10.
:param str log_format: Format string for log records output by this
server. A reasonable default is provided.
:param str local_log_level: Name of the log level for loggers in *this*
library. Refer to standard documentation for possible names. This
setting does not affect loggers from workers.
"""
global _handler
if _handler is not None:
raise RuntimeError('Cannot configure logger twice')
_handler = RotatingFileHandler(logfile_name,
maxBytes=logfile_maxsize,
backupCount=logfile_maxbackups,
encoding='utf-8',
delay=True)
fmter = logging.Formatter(log_format)
_handler.setFormatter(fmter)
_main_logger.addHandler(_handler)
_main_logger.setLevel(parse_loglevel(local_log_level))
_service_logger.addHandler(_handler)
# Make sure this will accept everything. Worker loggers should do
# filtering.
_service_logger.setLevel(logging.NOTSET)
# Also log to stderr (usually will be screen). This does not require any
# fussing.
stdhandler = logging.StreamHandler()
stdhandler.setFormatter(fmter)
_main_logger.addHandler(stdhandler)
_service_logger.addHandler(stdhandler)
def parse_loglevel(log_level):
"""
Get logging level constant number from a string.
If the string is an integer literal, return it as integer. Otherwise try to
interpret the string as one of the standard level names and return value
associated with that.
:param str log_level: String naming the log level, to be parsed.
:return: Integer value of the log level, as used by ``logging`` module.
:rtype: int
:raise KeyError: When ``log_level`` is neither an integer literal nor
the name of a standard logging level.
"""
try:
lvlnum = int(log_level)
except ValueError:
lvlnum = _NAME2LVL[log_level.upper()]
return lvlnum
class LogServer(object):
"""
Creates and starts a logging server thread. This threads awaits for
``LogRecord`` pickles from a given port on localhost and logs them to a
rotating file handler.
The thread is meant to run while waiting for subprocesses to end, so it
should not impact efficiency. The thread will also spend most of its time
listening on socket.
The logging server can be told to shutdown at any time.
"""
# str cast is for python2 compatibility.
HOST = str('localhost')
SHUTDOWN_POLL_INTERVAL = 2.
def __init__(self, port):
"""
:param int port: Number of TCP port the server will be listening on.
"""
self._port = port
self._sserver = _LogSocketServer((self.HOST, port), _LogRequestHandler)
self._sthread = Thread(target=self._sserver.serve_forever,
args=(self.SHUTDOWN_POLL_INTERVAL,),
name='logging')
@property
def socket_address(self):
"""The address tuple for socket handlers to connect to this server."""
return self.HOST, self._port
def start(self):
"""
Start the logging thread and return immediately.
:raise RuntimeError: If :func:`configure_loggers` has not been
called before this method.
"""
if _handler is None:
raise RuntimeError('configure_loggers() has not been '
'called before starting')
self._sthread.start()
def shutdown(self):
"""
Shutdown the logging server and thread.
If the thread is not alive, silently do nothing.
"""
if not self._sthread.is_alive():
return
self._sserver.shutdown()
self._sthread.join()
class _LogSocketServer(socketserver.TCPServer):
# By default, socket errors go to stdout. We want them integrated
# nicely with the logging system, hence this subclass.
# This handler is called from except block in socketserver code, so
# it's safe to log exceptions.
def handle_error(self, request, client_address):
_logger.exception('Error while handling message from %r',
client_address)
class _LogRequestHandler(socketserver.StreamRequestHandler):
# This handler is based on the stdlib example:
# https://docs.python.org/2/howto/logging-cookbook.html#sending-and-receiving-logging-events-across-a-network
# But it uses a UNIX stream socket.
# According to the example, the log record length prefix is an unsigned
# long. Calculate its size more flexibly then the hard-coded 4 bytes in the
# example.
# str cast is for python2 compatibility.
__PREFIX_STRUCT = Struct(str('!L'))
def handle(self):
# Read the length prefix.
chunk = self.rfile.read(self.__PREFIX_STRUCT.size)
if len(chunk) < self.__PREFIX_STRUCT.size:
# Must be malformed, we got EOF before reading the struct.
raise RuntimeError(
'Bad length prefix in message: expected {} bytes '
'but only got {}'
.format(self.__PREFIX_STRUCT.size, len(chunk))
)
# Get the integer representing length.
loglen = self.__PREFIX_STRUCT.unpack(chunk)[0]
chunk = self.rfile.read(loglen)
if len(chunk) < loglen:
# Again with the malformed.
raise RuntimeError(
'Bad payload in message: expected {} bytes '
'but only got {}'
.format(loglen, len(chunk))
)
logdict = cPickle.loads(chunk)
logrecord = logging.makeLogRecord(logdict)
_service_logger.handle(logrecord)
#!/usr/bin/python
import logging
import lex_ws
import nltk
from nltk.corpus import wordnet as wn
my_logger = logging.getLogger(__name__)
_log = logging.getLogger(__name__)
languages={"pl":"pol","en":"eng","es":"spa"};
class OMWNWorker(lex_ws.LexWorker):
@classmethod
def static_init(cls, config):
my_logger.info('Loading models...')
for lang in languages:
wn.lemmas("test", lang=languages[lang]);
my_logger.info('Loading finished.')
return
def process(self, input):
my_logger.info('Doing work!')
res={};
if "function" in input:
res=self.evaluate_function(input["function"],input)
my_logger.info('Work done!')
return res;
def evaluate_function(self, function_type, input):
response = {}
if function_type == 'list':
element=input["element"];
url="http://compling.hss.ntu.edu.sg/omw/cgi-bin/wn-gridx.cgi?";
if not "lang" in element or not (element["lang"] in languages):
return response;
if ("lemma" in element):
print str(element["lemma"].encode('utf-8'))
res=wn.lemmas(element["lemma"].encode('utf-8').decode('utf-8') , lang=languages[element["lang"]]);
if len(res)>0:
formats=["json"];
url=url+"lemma="+element["lemma"]+"&lang="+languages[element["lang"]];
response={"formats":formats,"url":url}
return response;
elif function_type == 'get':
element=input["element"];
if not "lang" in element or not (element["lang"] in languages):
return response;
if ("lemma" in element):
return self.getDatabyLemma(element["lemma"],languages[element["lang"]]);
return {};
elif function_type == 'getInfo':
response={'pl':{'name':"Inne języki",'fullName':"Open Multilingual Wordnet",
'description':'Słowosieć (z ang. wordnet) to sieć semantyczna, która odzwierciedla system leksykalny języka naturalnego. Węzłami Słowosieci są jednostki leksykalne, czyli wyrazy i ich znaczenia, różnorako połączone relacjami semantycznymi ze ściśle określonego repertuaru. Na przykład kot jest hiponimem (podklasą) zwierzęcia, pazur i łapa są w relacji meronimii (część/całość), a wchodzić i wychodzić są antonimami. Jednostka leksykalna uzyskuje znaczenie przez odniesienie do innych jednostek leksykalnych w obrębie systemu, a możemy o niej wnioskować na podstawie przypisanych jej relacji. Na przykład kota definiuje się jako rodzaj zwierzęcia, łapę jako całość, której częścią jest pazur, a czynności wchodzenia i wychodzenia jako przeciwieństwa. <br> Struktura wordnetu jest dostosowana do potrzeb automatycznej analizy tekstów. Jest to w istocie podstawowy zasób językowy, ważny w badaniach nad sztuczną inteligencją.'
+'<br><a target="_blank" href="http://compling.hss.ntu.edu.sg/omw/">więcej...</a>',
'copyright':'Utrzymanie: <a href="http://www3.ntu.edu.sg/home/fcbond/">Francis Bond</a>&lt;<a href="mailto:bond@ieee.org">bond@ieee.org</a>&gt;'
},
'en':{'name':"Other languages",'fullName':"Open Multilingual Wordnet",
'description':'Open wordnets in a variety of languages, all linked to the Princeton Wordnet of English (PWN). The goal is to make it easy to use wordnets in multiple languages. The individual wordnets have been made by many different projects and vary greatly in size and accuracy'
+'<br><a target="_blank" href="http://compling.hss.ntu.edu.sg/omw/">more...</a>',
'copyright':'<a href="http://www3.ntu.edu.sg/home/fcbond/">Francis Bond</a>&lt;<a href="mailto:bond@ieee.org">bond@ieee.org</a>&gt;'
}
};
return response;
def getDatabyLemma(self,lemma,language):
result=[];
wnlemmas=wn.lemmas(lemma, lang=language);
for wnlem in wnlemmas:
wnsynset=wnlem.synset()
trans=dict()
for lang in languages:
if (languages[lang]!=language):
trans[lang]=wnsynset.lemma_names(languages[lang])
synset={'name':wnsynset.name(),'definition':wnsynset.definition(),'offset':str(wnsynset.offset()).zfill(8) + '-' + wnsynset.pos(),'translate':trans};
result.append(synset);
return result;
if __name__ == '__main__':
lex_ws.LexService.main(OMWNWorker)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment