Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
V
ValUnifer
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
IPIPAN
ValUnifer
Commits
d5ae117b
Commit
d5ae117b
authored
2 years ago
by
dcz
Browse files
Options
Downloads
Patches
Plain Diff
Loading selected walenty dict
Readme
parent
72c644eb
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
README.md
+25
-0
25 additions, 0 deletions
README.md
syntax/management/commands/import_tei.py
+68
-20
68 additions, 20 deletions
syntax/management/commands/import_tei.py
with
93 additions
and
20 deletions
README.md
+
25
−
0
View file @
d5ae117b
...
...
@@ -19,6 +19,13 @@ In order to run the development environment locally:
-> run ./reset_db.sh script in interactive bash
-> exit interactive bash by typing ctrl-d
docker-compose start backend
By default the database is fulfilled by a small subset of the Polish Valence Dictionary.
To load different dictionary file one have to run following tasks before executing ./reset_db.sh script:
*
download full Walenty dataset (TEI format can by downlowaded from http://zil.ipipan.waw.pl/Walenty)
*
unpack zip archive and place xml file in ./data/walenty
*
set environment variable WALENTY_FILE_NAME to the name of file (e.g. export WALENTY_FILE_NAME=walenty_20210913.xml)
In order to reinstall a database instance a folder specified by the DATABASE_DIR should be removed.
...
...
@@ -35,3 +42,21 @@ In order to build the frontend Vue.js application for production execute the fol
docker-compose run frontend yarn build
Compiled application files will be located in
`frontend/dist/`
.
## Default users
####Admin user
*
Login: shell
*
Password: valier
####Leksykograf user
*
Login: Leksykograf
*
Password: valier111
####Super leksykograf user
*
Login: Superleksykograf
*
Hasło: valier111
This diff is collapsed.
Click to expand it.
syntax/management/commands/import_tei.py
+
68
−
20
View file @
d5ae117b
#! /usr/bin/python
# -*- coding: utf-8 -*-
import
logging
import
os
from
xml.sax
import
handler
,
make_parser
from
django.core.management.base
import
BaseCommand
import
sys
,
os
,
shutil
,
codecs
,
copy
,
errno
,
logging
from
xml.sax
import
saxutils
,
handler
,
make_parser
from
importer.WalentyXML
import
WalentyTeiHandler
from
importer.WalentyPreprocessXML
import
WalentyPreprocessTeiHandler
from
shellvalier.settings
import
BASE_DIR
from
common.models
import
ImportInProgress
from
connections.models
import
POS
,
Status
from
examples.models
import
ExampleOpinion
,
ExampleSource
from
importer.WalentyPreprocessXML
import
WalentyPreprocessTeiHandler
from
importer.WalentyXML
import
WalentyTeiHandler
from
semantics.models
import
FrameOpinion
,
ArgumentRole
,
SemanticRole
,
RoleAttribute
,
\
SelectionalPreferenceRelation
,
RoleType
from
shellvalier.environment
import
get_environment
from
shellvalier.settings
import
BASE_DIR
from
syntax.management.commands.add_predefined_preferences
import
create_predefined_preferences
from
syntax.management.commands.import_relations
import
import_relations
from
syntax.models
import
SchemaOpinion
,
Aspect
,
InherentSie
,
Negativity
,
Predicativity
,
SyntacticFunction
,
Control
,
PredicativeControl
,
Position
from
syntax.models
import
SchemaOpinion
,
Aspect
,
InherentSie
,
Negativity
,
Predicativity
,
SyntacticFunction
,
\
Control
,
PredicativeControl
,
Position
from
syntax.models_phrase
import
(
Case
,
PhraseAspect
,
AdverbialCategory
,
PhraseNegativity
,
PhraseInherentSie
,
Number
,
Gender
,
Degree
,
LemmaOperator
,
LemmaCooccur
,
ModificationType
,
)
from
semantics.models
import
FrameOpinion
,
ArgumentRole
,
SemanticRole
,
RoleAttribute
,
PredefinedSelectionalPreference
,
SelectionalPreferenceRelation
,
RoleType
from
common.models
import
ImportInProgress
class
Command
(
BaseCommand
):
args
=
'
none
'
...
...
@@ -29,16 +35,17 @@ class Command(BaseCommand):
def
handle
(
self
,
**
options
):
import_tei
()
def
import_tei
():
def
import_tei
():
logging
.
basicConfig
(
filename
=
'
import.log
'
,
level
=
logging
.
DEBUG
)
xml_file
=
os
.
path
.
join
(
BASE_DIR
,
'
data
'
,
'
walenty
'
,
'
walenty_20210913_smaller.xml
'
)
#xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
xml_file_name
=
get_environment
(
'
WALENTY_FILE_NAME
'
,
default
=
'
walenty_20210913_smaller.xml
'
)
xml_file
=
os
.
path
.
join
(
BASE_DIR
,
'
data
'
,
'
walenty
'
,
xml_file_name
)
# xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smaller.xml')
# xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913_smallest.xml')
# xml_file = os.path.join(BASE_DIR, 'data', 'walenty', 'walenty_20210913.xml')
xml_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
xml_file
)
import_constants
()
...
...
@@ -57,6 +64,7 @@ def import_tei():
parser
.
parse
(
xml_path
)
ImportInProgress
.
objects
.
all
().
delete
()
def
import_constants
():
import_poses
()
import_statuses
()
...
...
@@ -81,60 +89,73 @@ def import_constants():
create_predefined_preferences
()
import_relations
()
def
import_poses
():
poses
=
[
u
'
unk
'
,
u
'
adj
'
,
u
'
noun
'
,
u
'
adv
'
,
u
'
verb
'
]
for
pos_tag
in
poses
:
pos
=
POS
(
tag
=
pos_tag
)
pos
.
save
()
def
import_statuses
():
statuses
=
[(
10
,
u
'
do obróbki
'
),
(
20
,
u
'
w obróbce
'
),
(
25
,
u
'
do usunięcia
'
),
(
30
,
u
'
gotowe
'
),
(
35
,
u
'
zalążkowe
'
),
(
40
,
u
'
sprawdzone
'
),
(
50
,
u
'
(F) w obróbce
'
),
(
60
,
u
'
(F) gotowe
'
),
(
70
,
u
'
(F) sprawdzone
'
),
(
80
,
u
'
(S) w obróbce
'
),
(
90
,
u
'
(S) gotowe
'
),
(
100
,
u
'
(S) sprawdzone
'
)]
statuses
=
[(
10
,
u
'
do obróbki
'
),
(
20
,
u
'
w obróbce
'
),
(
25
,
u
'
do usunięcia
'
),
(
30
,
u
'
gotowe
'
),
(
35
,
u
'
zalążkowe
'
),
(
40
,
u
'
sprawdzone
'
),
(
50
,
u
'
(F) w obróbce
'
),
(
60
,
u
'
(F) gotowe
'
),
(
70
,
u
'
(F) sprawdzone
'
),
(
80
,
u
'
(S) w obróbce
'
),
(
90
,
u
'
(S) gotowe
'
),
(
100
,
u
'
(S) sprawdzone
'
)]
for
pri
,
name
in
statuses
:
status
=
Status
(
key
=
name
,
priority
=
pri
)
status
.
save
()
def
import_schema_opinions
():
opinions
=
[(
60
,
u
'
vul
'
),
(
50
,
u
'
col
'
),
(
40
,
u
'
dat
'
),
(
30
,
u
'
bad
'
),
(
20
,
u
'
unc
'
),
(
10
,
u
'
cer
'
)]
for
pri
,
short
in
opinions
:
opinion
=
SchemaOpinion
(
key
=
short
,
priority
=
pri
)
opinion
.
save
()
def
import_frame_opinions
():
opinions
=
[(
70
,
u
'
met
'
),
(
60
,
u
'
vul
'
),
(
50
,
u
'
col
'
),
(
40
,
u
'
dat
'
),
(
30
,
u
'
bad
'
),
(
20
,
u
'
unc
'
),
(
10
,
u
'
cer
'
),
(
80
,
u
'
dom
'
),
(
90
,
u
'
rar
'
),
(
100
,
u
'
unk
'
)]
opinions
=
[(
70
,
u
'
met
'
),
(
60
,
u
'
vul
'
),
(
50
,
u
'
col
'
),
(
40
,
u
'
dat
'
),
(
30
,
u
'
bad
'
),
(
20
,
u
'
unc
'
),
(
10
,
u
'
cer
'
),
(
80
,
u
'
dom
'
),
(
90
,
u
'
rar
'
),
(
100
,
u
'
unk
'
)]
for
pri
,
short
in
opinions
:
opinion
=
FrameOpinion
(
key
=
short
,
priority
=
pri
)
opinion
.
save
()
def
import_aspects
():
aspects
=
[(
10
,
u
'
imperf
'
),
(
20
,
u
'
perf
'
),
(
32
,
u
'
_
'
),
(
42
,
u
''
)]
for
pri
,
name
in
aspects
:
aspect
=
Aspect
(
name
=
name
,
priority
=
pri
)
aspect
.
save
()
def
import_inherent_sies
():
sies
=
[(
10
,
u
'
false
'
),
(
20
,
u
'
true
'
)]
for
pri
,
name
in
sies
:
sie
=
InherentSie
(
name
=
name
,
priority
=
pri
)
sie
.
save
()
def
import_negativities
():
negativities
=
[(
20
,
u
'
aff
'
),
(
10
,
u
'
neg
'
),
(
31
,
u
'
_
'
),
(
41
,
u
''
)]
for
pri
,
name
in
negativities
:
neg
=
Negativity
(
name
=
name
,
priority
=
pri
)
neg
.
save
()
def
import_predicativities
():
predicativities
=
[(
20
,
u
'
false
'
),
(
10
,
u
'
true
'
)]
for
pri
,
name
in
predicativities
:
pred
=
Predicativity
(
name
=
name
,
priority
=
pri
)
pred
.
save
()
def
import_syntactic_functions
():
functions
=
[(
0
,
u
'
subj
'
),
(
20
,
u
'
head
'
),
(
10
,
u
'
obj
'
)]
for
pri
,
name
in
functions
:
sf
=
SyntacticFunction
(
name
=
name
,
priority
=
pri
)
sf
.
save
()
def
import_control_tags
():
controls
=
[(
10
,
u
'
controller
'
),
(
20
,
u
'
controllee
'
),
(
30
,
u
'
controller2
'
),
(
40
,
u
'
controllee2
'
)]
for
pri
,
name
in
controls
:
...
...
@@ -145,6 +166,7 @@ def import_control_tags():
cont
=
PredicativeControl
(
name
=
name
,
priority
=
pri
)
cont
.
save
()
def
import_semantic_roles
():
roles
=
[
(
10
,
u
'
Initiator
'
,
u
'
91,106,217
'
,
None
),
...
...
@@ -169,7 +191,8 @@ def import_semantic_roles():
# priorities set so that, when role and attribute priorities are added,
# Role_Source < Role_Foreground < Role_Background < Role_Goal
# and Role can be inserted anywhere into that hierarchy
attributes
=
[(
1
,
u
'
Source
'
,
None
,
u
'
left
'
),
(
3
,
u
'
Foreground
'
,
None
,
u
'
top
'
),
(
5
,
u
'
Background
'
,
None
,
u
'
bottom
'
),
(
7
,
u
'
Goal
'
,
None
,
u
'
right
'
)]
attributes
=
[(
1
,
u
'
Source
'
,
None
,
u
'
left
'
),
(
3
,
u
'
Foreground
'
,
None
,
u
'
top
'
),
(
5
,
u
'
Background
'
,
None
,
u
'
bottom
'
),
(
7
,
u
'
Goal
'
,
None
,
u
'
right
'
)]
for
pri
,
role
,
color
,
gradient
in
roles
:
role
=
SemanticRole
(
role
=
role
,
color
=
color
,
priority
=
pri
)
role
.
save
()
...
...
@@ -191,31 +214,44 @@ def import_semantic_role_types():
cont
=
RoleType
(
type
=
name
)
cont
.
save
()
# def import_predefined_preferences():
# predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT', u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA', u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
# predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT',
# u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA',
# u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
# for name in predefs:
# predef = PredefinedSelectionalPreference(key=name)
# predef.save()
def
import_preference_relations
():
relations
=
[(
14
,
u
'
meronimia
'
),
(
15
,
u
'
holonimia
'
),
(
20
,
u
'
meronimia (typu część)
'
),
(
21
,
u
'
meronimia (typu porcja)
'
),
(
22
,
u
'
meronimia (typu miejsce)
'
),
(
23
,
u
'
meronimia (typu element)
'
),
(
24
,
u
'
meronimia (typu materiał)
'
),
(
25
,
u
'
holonimia (typu część)
'
),
(
26
,
u
'
holonimia (typu porcja)
'
),
(
27
,
u
'
holonimia (typu miejsce)
'
),
(
28
,
u
'
holonimia (typu element)
'
),
(
29
,
u
'
holonimia (typu materiał)
'
),
(
51
,
u
'
nosiciel stanu/cechy
'
),
(
52
,
u
'
stan/cecha
'
),
(
61
,
u
'
synonimia międzyparadygmatyczna
'
),
(
64
,
u
'
meronimia (typu element taksonomiczny)
'
),
(
65
,
u
'
holonimia (typu element taksonomiczny)
'
),
(
108
,
u
'
fuzzynimia synsetów
'
),
(
-
1
,
u
'
RELAT
'
)]
relations
=
[(
14
,
u
'
meronimia
'
),
(
15
,
u
'
holonimia
'
),
(
20
,
u
'
meronimia (typu część)
'
),
(
21
,
u
'
meronimia (typu porcja)
'
),
(
22
,
u
'
meronimia (typu miejsce)
'
),
(
23
,
u
'
meronimia (typu element)
'
),
(
24
,
u
'
meronimia (typu materiał)
'
),
(
25
,
u
'
holonimia (typu część)
'
),
(
26
,
u
'
holonimia (typu porcja)
'
),
(
27
,
u
'
holonimia (typu miejsce)
'
),
(
28
,
u
'
holonimia (typu element)
'
),
(
29
,
u
'
holonimia (typu materiał)
'
),
(
51
,
u
'
nosiciel stanu/cechy
'
),
(
52
,
u
'
stan/cecha
'
),
(
61
,
u
'
synonimia międzyparadygmatyczna
'
),
(
64
,
u
'
meronimia (typu element taksonomiczny)
'
),
(
65
,
u
'
holonimia (typu element taksonomiczny)
'
),
(
108
,
u
'
fuzzynimia synsetów
'
),
(
-
1
,
u
'
RELAT
'
)]
for
id
,
name
in
relations
:
relat
=
SelectionalPreferenceRelation
(
plwn_id
=
id
,
key
=
name
)
relat
.
save
()
def
import_examples_sources
():
sources
=
[(
0
,
u
'
NKJP0.5M
'
),
(
1
,
u
'
NKJP1.2M
'
),
(
2
,
u
'
NKJP30M
'
),
(
3
,
u
'
NKJP250M
'
),
(
4
,
u
'
NKJP300M
'
),
(
5
,
u
'
NKJP500M
'
),
(
6
,
u
'
NKJP1800M
'
),
(
7
,
u
'
linguistic_literature
'
),
(
8
,
u
'
other_literature
'
),
(
9
,
u
'
own
'
)]
sources
=
[(
0
,
u
'
NKJP0.5M
'
),
(
1
,
u
'
NKJP1.2M
'
),
(
2
,
u
'
NKJP30M
'
),
(
3
,
u
'
NKJP250M
'
),
(
4
,
u
'
NKJP300M
'
),
(
5
,
u
'
NKJP500M
'
),
(
6
,
u
'
NKJP1800M
'
),
(
7
,
u
'
linguistic_literature
'
),
(
8
,
u
'
other_literature
'
),
(
9
,
u
'
own
'
)]
for
pri
,
name
in
sources
:
es
=
ExampleSource
(
key
=
name
,
priority
=
pri
)
es
.
save
()
def
import_examples_opinions
():
opinions
=
[(
0
,
'
zły
'
),
(
1
,
'
wątpliwy
'
),
(
2
,
'
dobry
'
)]
for
pri
,
name
in
opinions
:
eo
=
ExampleOpinion
(
key
=
name
,
priority
=
pri
)
eo
.
save
()
def
import_phrase_attributes
():
import_cases
()
import_phrase_aspects
()
...
...
@@ -231,54 +267,65 @@ def import_phrase_attributes():
dummy_position
.
save
()
assert
(
dummy_position
.
id
==
1
)
def
import_cases
():
cases
=
[(
0
,
u
'
str
'
),
(
1
,
u
'
nom
'
),
(
2
,
u
'
gen
'
),
(
3
,
u
'
dat
'
),
(
4
,
u
'
acc
'
),
(
5
,
u
'
inst
'
),
(
6
,
u
'
loc
'
),
(
10
,
u
'
pred
'
),
(
11
,
u
'
part
'
),
(
12
,
u
'
postp
'
),
(
13
,
u
'
agr
'
)]
cases
=
[(
0
,
u
'
str
'
),
(
1
,
u
'
nom
'
),
(
2
,
u
'
gen
'
),
(
3
,
u
'
dat
'
),
(
4
,
u
'
acc
'
),
(
5
,
u
'
inst
'
),
(
6
,
u
'
loc
'
),
(
10
,
u
'
pred
'
),
(
11
,
u
'
part
'
),
(
12
,
u
'
postp
'
),
(
13
,
u
'
agr
'
)]
for
pri
,
name
in
cases
:
case
=
Case
(
name
=
name
,
priority
=
pri
)
case
.
save
()
def
import_phrase_aspects
():
aspects
=
[(
10
,
u
'
imperf
'
),
(
20
,
u
'
perf
'
),
(
30
,
u
'
_
'
)]
for
pri
,
name
in
aspects
:
aspect
=
PhraseAspect
(
name
=
name
,
priority
=
pri
)
aspect
.
save
()
def
import_phrase_negativities
():
negativities
=
[(
10
,
u
'
aff
'
),
(
20
,
u
'
neg
'
),
(
30
,
u
'
_
'
)]
for
pri
,
name
in
negativities
:
negativity
=
PhraseNegativity
(
name
=
name
,
priority
=
pri
)
negativity
.
save
()
def
import_phrase_inherent_sies
():
sies
=
[(
10
,
u
'
się
'
),
(
20
,
u
''
)]
for
pri
,
name
in
sies
:
sie
=
PhraseInherentSie
(
name
=
name
,
priority
=
pri
)
sie
.
save
()
def
import_adverbial_categories
():
advcats
=
[(
1
,
u
'
locat
'
),
(
2
,
u
'
abl
'
),
(
3
,
u
'
adl
'
),
(
4
,
u
'
perl
'
),
(
5
,
u
'
temp
'
),
(
6
,
u
'
dur
'
),
(
7
,
'
mod
'
),
(
8
,
'
caus
'
),
(
9
,
'
dest
'
),
(
10
,
'
instr
'
),
(
11
,
'
pron
'
),
(
12
,
'
misc
'
)]
advcats
=
[(
1
,
u
'
locat
'
),
(
2
,
u
'
abl
'
),
(
3
,
u
'
adl
'
),
(
4
,
u
'
perl
'
),
(
5
,
u
'
temp
'
),
(
6
,
u
'
dur
'
),
(
7
,
'
mod
'
),
(
8
,
'
caus
'
),
(
9
,
'
dest
'
),
(
10
,
'
instr
'
),
(
11
,
'
pron
'
),
(
12
,
'
misc
'
)]
for
pri
,
name
in
advcats
:
advcat
=
AdverbialCategory
(
name
=
name
,
priority
=
pri
)
advcat
.
save
()
def
import_numbers
():
numbers
=
[(
1
,
u
'
sg
'
),
(
2
,
u
'
pl
'
),
(
10
,
u
'
agr
'
),
(
20
,
u
'
_
'
)]
for
pri
,
name
in
numbers
:
number
=
Number
(
name
=
name
,
priority
=
pri
)
number
.
save
()
def
import_genders
():
genders
=
[(
1
,
u
'
m1
'
),
(
2
,
u
'
m2
'
),
(
3
,
u
'
m3
'
),
(
4
,
u
'
f
'
),
(
5
,
u
'
n
'
),
(
10
,
u
'
agr
'
)]
for
pri
,
name
in
genders
:
gender
=
Gender
(
name
=
name
,
priority
=
pri
)
gender
.
save
()
def
import_degrees
():
degrees
=
[(
1
,
u
'
pos
'
),
(
2
,
u
'
com
'
),
(
3
,
u
'
sup
'
),
(
20
,
u
'
_
'
)]
for
pri
,
name
in
degrees
:
degree
=
Degree
(
name
=
name
,
priority
=
pri
)
degree
.
save
()
def
import_lemma_operators
():
operators
=
[(
1
,
u
'
xor
'
),
(
2
,
u
'
or
'
)]
for
pri
,
name
in
operators
:
...
...
@@ -289,6 +336,7 @@ def import_lemma_operators():
cooccur
=
LemmaCooccur
(
name
=
name
,
priority
=
pri
)
cooccur
.
save
()
def
import_modification_types
():
modtypes
=
[(
1
,
u
'
ratr
'
),
(
2
,
u
'
ratr1
'
),
(
3
,
u
'
atr
'
),
(
4
,
u
'
atr1
'
),
(
5
,
u
'
natr
'
)]
for
pri
,
name
in
modtypes
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment