diff --git a/README.md b/README.md index af8e2451977b4a3309a093902e296482be60cbcb..203d83e0559448d18b7157a4aa83e2863897fa66 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ docker build . -t "wsd-pl" # start ``` docker run -d --restart=always -v /samba:/samba \ - [-v $(pwd)/config.ini:/worker/config.ini] wsd-pl + -v $(pwd)/CONFIG_INI:/worker/config.ini wsd-pl ``` -Config file is optional - pass it only if it differs from default one. +where *CONFIG_INI* represents name of config for service and can be: +*config_pl.ini* of *config_en.ini*. + +If you need custom config file for tool, you can also pass it to the container +and reference it in config.ini. diff --git a/config_en.ini b/config_en.ini new file mode 100644 index 0000000000000000000000000000000000000000..006dfcddabeec67514ff820a766a62b30128aa5f --- /dev/null +++ b/config_en.ini @@ -0,0 +1,23 @@ +[service] +tool = wsden + +root = /samba/requests/ +rabbit_host =10.17.0.85 +rabbit_user =clarin +rabbit_password =clarin123 + +[tool] +workers_number = 10 +config_file = /home/worker/src/default_en.ini +model_dir = /home/worker/models +model_filename = merged_graph.xml.gz +model_url = http://minio.clarin-pl.eu/public/models/wosedon/kb-25022020.zip + + +[logging] +port = 9996 +local_log_level = DEBUG + +[logging_levels] +__main__ = DEBUG +wsd_worker = DEBUG diff --git a/config.ini b/config_pl.ini similarity index 63% rename from config.ini rename to config_pl.ini index ee21ecfacf87d92d6fec8302effd76f1b0276476..7b7e770b3dfc0e1df05f387cf179bd4afcfe6e98 100644 --- a/config.ini +++ b/config_pl.ini @@ -8,8 +8,10 @@ rabbit_password =clarin123 [tool] workers_number = 10 -config_file = /home/worker/src/default.ini +config_file = /home/worker/src/default_pl.ini model_dir = /home/worker/models +model_filename = merged_graph.xml.gz +model_url = http://minio.clarin-pl.eu/public/models/wosedon/kb-25022020.zip [logging] diff --git a/entrypoint.sh b/entrypoint.sh index 6b34340404c8b138dcf6be94b26813d9c498ac14..5d7af39601aeff1bd28a65e3bdd839444c7744d7 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,17 +1,42 @@ #!/bin/sh +# Script downloads models defined in config file and starts worker. +# Script should be called inside a container. -cd /home/worker +config_file='/home/worker/config.ini' -if [ ! -d models ]; then - mkdir models +# Simple function to retireve value for given key from ini file. +# Note that key should be unique, it does not handle sections +get_ini_value() +{ + ini_file_path="$1" + ini_key="$2" + ini_val=$(grep "$ini_key" "$ini_file_path" | sed 's/ *= */=/g' \ + | awk -F '=' '{print $2}' | sed 's/ *$//') + echo "$ini_val" +} + + +if [ ! -f "$config_file" ]; then + echo "ERROR: Config file '$config_file' not found!" + exit 1 fi -cd models -if [ ! -f "merged_graph.xml.gz" ]; then - echo "Downloading a model..." - wget wget http://minio.clarin-pl.eu/public/models/wosedon/kb-25022020.zip - unzip kb-25022020.zip; rm kb-25022020.zip +model_dir=$(get_ini_value "$config_file" "model_dir") +if [ ! -d "$model_dir" ]; then + mkdir "$model_dir" +fi + +cd "$model_dir" +model_dir=$(get_ini_value "$config_file" "model_dir") +model_filename=$(get_ini_value "$config_file" "model_filename") +model_url=$(get_ini_value "$config_file" "model_url") + +if [ ! -f "model_filename" ]; then + echo "Downloading a model '$model_filename' from '$model_url' ..." + remote_model_name="$model_filename"'.zip' # assuming it is a zip archive + wget -O "$remote_model_name" "$model_url" + unzip "$remote_model_name"; rm "$remote_model_name" echo "Done!" fi -cd /home/worker && python3.6 main.py \ No newline at end of file +cd /home/worker && python3.6 main.py diff --git a/src/default_en.ini b/src/default_en.ini new file mode 100644 index 0000000000000000000000000000000000000000..e8eaf93cf97a1d45950c51a4bfc40b68c50e190d --- /dev/null +++ b/src/default_en.ini @@ -0,0 +1,102 @@ +[wosedon] +; disambiguation context +context = SlidingSentenceWindow +; builders +gbuilders = SynsetGraphBuilder LexicalUnitGraphBuilder UniversalBuilder +; how to merge the graphs +mergers = SynsetsLUMerger2 UniversalMerger +; finally, our algorithm +wsdalgorithm = GTPPR +; heuristic change of the final ranking after WSD +rerankers = +; edge weights +weights = dynamic:1.0 + universal_0:1.0 + l182:1.0 + l195:1.0 + l184:1.0 + l186:1.0 + l188:1.0 + l183:1.0 + l187:1.0 + l185:1.0 + l191:1.0 + l194:1.0 + l192:1.0 + s179:1.0 + s178:1.0 + s209:1.0 + s171:1.0 + s173:1.0 + s172:1.0 + s175:1.0 + s174:1.0 + s177:1.0 + s176:1.0 + s207:1.0 + s206:1.0 + s205:1.0 + s204:1.0 + s203:1.0 + s202:1.0 + s201:1.0 + s200:1.0 + s223:1.0 + s208:1.0 + s197:1.0 + s196:1.0 + s195:1.0 + s194:1.0 + s193:1.0 + s192:1.0 + s191:1.0 + s190:1.0 + s199:1.0 + s198:1.0 + s210:1.0 + l170:1.0 + s222:1.0 + s184:1.0 + s185:1.0 + s186:1.0 + s187:1.0 + s180:1.0 + s181:1.0 + s182:1.0 + s183:1.0 + s188:1.0 + s189:1.0 + +use_weights = False + +[wosedon:rerank_options] +percentage_diff = 10 + +[wosedon:resources] +kb_graph_file = %(resources_dir)s/kb-24092019/graph +mwe_dict_file = %(resources_dir)s/mwe_en.xml +expansion_sources = /mnt/lvm_work/REPOS/wosedon/wosedon/gbuilders/devel/kbexp +; expansion = %(expansion_sources)s/autoextend/ae-enwn.txt; +; expansion = %(expansion_sources)s/wikipedia/gloss_with_semcor-o-b.plwn.idf; +expansion = non-existing-source.txt + +tagset = spacy + +[wosedon:build_options] +unique_edges = False +directed_graphs = True +accept_pos = 5 6 7 8 +base_only = True +window = 0 + +[wosedon:wsd_alg] +epsilon = 0.05 +rw_iter = 200 + +damping_factor = 0.85 +max_iter = 25 + +algorithm_sources = /mnt/lvm_work/REPOS/wosedon/wosedon/algorithms/devel/mixins/resources +alpha = 0.5 +sensefreq = %(algorithm_sources)s/pwn.sf +patterns = %(algorithm_sources)s/patterns.txt diff --git a/src/default.ini b/src/default_pl.ini similarity index 100% rename from src/default.ini rename to src/default_pl.ini