Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
I
Iobber
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
Chunking
Iobber
Commits
869cd580
Commit
869cd580
authored
Mar 25, 2013
by
jezozwierzak
Browse files
Options
Downloads
Patches
Plain Diff
Added feature selection and gitignore
parent
f0ed27dc
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+3
-0
3 additions, 0 deletions
.gitignore
scripts/feature_selection/crf_wrapper.py
+79
-44
79 additions, 44 deletions
scripts/feature_selection/crf_wrapper.py
with
82 additions
and
44 deletions
.gitignore
0 → 100644
+
3
−
0
View file @
869cd580
# Compiled source #
###################
*.pyc
This diff is collapsed.
Click to expand it.
scripts/feature_selection/crf_wrapper.py
+
79
−
44
View file @
869cd580
...
...
@@ -12,6 +12,7 @@ from optparse import OptionParser
from
threading
import
Thread
descr
=
"""
%prog [options] corpus_dir out_dir
"""
wind
=
2
def
go
():
parser
=
OptionParser
(
usage
=
descr
)
...
...
@@ -86,29 +87,39 @@ def process_fold(fold, out_dir, corpus_dir, config_dir, config_name):
os
.
remove
(
os
.
path
.
join
(
out_dir
,
"
models
"
,
str
(
fold
).
zfill
(
2
),
"
dict-sie.lex
"
))
#Wybranie sąsiada
def
neightbour
(
v
):
a
=
random
.
randint
(
0
,
len
(
v
))
b
=
random
.
randint
(
0
,
len
(
v
[
0
]))
def
neightbour
(
v
=
[],
c
=
{}):
global
wind
a
=
random
.
randint
(
0
,
len
(
v
)
+
len
(
c
))
b
=
random
.
randint
(
0
,
wind
)
if
a
<
len
(
v
):
v
[
a
][
b
]
=
1
if
v
[
a
][
b
]
==
0
else
0
a
=
random
.
randint
(
0
,
len
(
v
))
b
=
random
.
randint
(
0
,
len
(
v
[
0
]))
else
:
s_c
=
sorted
(
c
)
s_c
[
a
-
len
(
v
)]
=
1
if
s_c
[
a
-
len
(
v
)]
==
0
else
0
a
=
random
.
randint
(
0
,
len
(
v
)
+
len
(
c
))
b
=
random
.
randint
(
0
,
wind
)
if
a
<
len
(
v
):
v
[
a
][
b
]
=
1
if
v
[
a
][
b
]
==
0
else
0
return
v
else
:
s_c
=
sorted
(
c
)
s_c
[
a
-
len
(
v
)]
=
1
if
s_c
[
a
-
len
(
v
)]
==
0
else
0
return
(
v
,
c
)
#Obliczenie temperatury początkowej
def
tempestimation
(
out_dir
,
config_dir
,
config_name
,
corpus_dir
,
folds
,
vector
):
def
tempestimation
(
out_dir
,
config_dir
,
config_name
,
corpus_dir
,
folds
,
vector
,
constructed
):
iterations
=
100
#ilosc iteracji symulacji
sum
=
0
results
=
{}
#histogram wyników
for
i
in
range
(
iterations
):
print
"
Temperature estimation it:
"
+
i
result
=
f
(
os
.
path
.
join
(
out_dir
,
str
(
i
).
zfill
(
2
)),
config_dir
,
config_name
,
corpus_dir
,
folds
,
vector
)
sum
+=
result
if
result
not
in
results
.
keys
():
results
[
result
]
=
1
else
:
results
[
result
]
+=
1
vector
=
neightbour
(
vector
)
vector
,
constructed
=
neightbour
(
vector
,
constructed
)
avg
=
sum
/
float
(
iterations
)
#obliczenie średniego wyniku
k
=
0
deviation
=
0
...
...
@@ -128,10 +139,12 @@ def get_features_number(cclfile):
return
count
+
1
def
generate_features_txt
(
resultfile
,
vector
=
[],
constructed
=
{}):
global
wind
out
=
open
(
resultfile
,
'
w+
'
)
feature_num
=
0
actual_feature_num
=
len
(
vector
[
0
])
/
2
+
1
actual_feature_num
=
wind
/
2
+
1
if
len
(
vector
)
>
0
:
for
i
in
range
(
len
(
vector
)):
for
j
in
range
(
len
(
vector
[
i
])):
if
vector
[
i
][
j
]
==
1
:
...
...
@@ -139,8 +152,20 @@ def generate_features_txt(resultfile, vector = [], constructed = {}):
out
.
write
(
"
\n
"
)
feature_num
+=
1
out
.
write
(
"
\n
"
)
for
i
in
range
(
len
(
constructed
)):
feats
=
constructed
[
i
].
split
(
"
%
"
)
if
len
(
constructed
)
>
0
:
for
key
in
constructed
.
keys
():
if
constructed
[
key
]
==
1
:
feature_substrings
=
key
.
split
(
"
%
"
)
for
subs
in
feature_substrings
:
feature1
=
subs
.
split
(
"
.
"
)[
0
]
feature1_num
=
subs
.
split
(
"
.
"
)[
1
]
if
subs
==
feature_substrings
[
0
]:
out
.
write
(
'
U%02d:%%x[%s,%s]/
'
%
(
feature_num
,
feature1_num
,
feature1
))
elif
subs
==
feature_substrings
[
-
1
]:
out
.
write
(
'
%%x[%s,%s]
'
%
(
feature1_num
,
feature1
))
else
:
out
.
write
(
'
%%x[%s,%s]/
'
%
(
feature1_num
,
feature1
))
feature_num
+=
1
out
.
write
(
"
\n
"
)
out
.
write
(
"
B
"
)
...
...
@@ -168,33 +193,43 @@ def P(e, en, temp):
return
1
def
main
(
corpus_dir
,
out_dir
,
config
,
window
,
folds
):
global
wind
wind
=
window
config_dir
=
os
.
path
.
dirname
(
config
)
config_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
config
))[
0
]
config_ccl
=
os
.
path
.
join
(
config_dir
,
config_name
+
"
.ccl
"
)
constructed
=
[]
constructed
.
append
(
"
1%2
"
)
generate_features_txt
(
os
.
path
.
join
(
out_dir
,
"
config_files
"
,
config_name
+
"
-layer1.txt
"
),
constructed
=
constructed
)
# if not os.path.exists(out_dir):
# tools.mkdir_p(out_dir)
#
# a_vector = randomize_vector(get_features_number(config_ccl), window)
# temperature = tempestimation(os.path.join(out_dir, "estimation"), config_dir, config_name, corpus_dir, folds, a_vector)
#
# a_value = f(os.path.join(out_dir, "selection", "first"), config_dir, config_name, corpus_dir, folds, a_vector)
# b_value = 0
# i = 1
# while temperature > 0:
# b_vector = neightbour(a_vector)
# b_value = f(os.path.join(out_dir, "selection", str(i).zfill(2)), config_dir, config_name, corpus_dir, folds, b_vector)
# prob = P(a_value, b_value, temperature)
# if b_value > a_value:
# a_vector = b_vector
# elif random.randint(0, 1) < prob:
# a_vector = b_vector
# temperatore = temperature * 0.95
# i += 1
if
not
os
.
path
.
exists
(
out_dir
):
tools
.
mkdir_p
(
out_dir
)
vector
=
[[
1
,
1
,
1
,
1
,
0
],[
1
,
1
,
1
,
0
,
0
]]
constructed
=
{
'
0.-1%0.0
'
:
0
,
'
0.0%0.1
'
:
0
,
'
1.-2%1.-1
'
:
0
,
'
1.0%1.-1
'
:
0
,
'
1.0%1.1
'
:
0
,
'
1.1%1.2
'
:
0
,
'
7.-1%8.-1
'
:
0
,
'
7.0%8.0
'
:
0
,
'
7.1%8.1
'
:
0
,
'
1.-2%1.-1%1.0
'
:
0
,
'
1.-1%1.0%1.1
'
:
0
,
'
1.0%1.1%1.2
'
:
0
}
a_vector
=
randomize_vector
(
get_features_number
(
config_ccl
),
window
)
a_constructed
=
constructed
temperature
=
tempestimation
(
os
.
path
.
join
(
out_dir
,
"
estimation
"
),
config_dir
,
config_name
,
corpus_dir
,
folds
,
a_vector
)
a_value
=
f
(
os
.
path
.
join
(
out_dir
,
"
selection
"
,
"
first
"
),
config_dir
,
config_name
,
corpus_dir
,
folds
,
a_vector
)
b_value
=
0
i
=
1
while
temperature
>
0
:
print
"
Feature selection temp:
"
+
temperature
b_vector
,
b_constructed
=
neightbour
(
a_vector
,
a_constructed
)
b_value
=
f
(
os
.
path
.
join
(
out_dir
,
"
selection
"
,
str
(
i
).
zfill
(
2
)),
config_dir
,
config_name
,
corpus_dir
,
folds
,
vector
=
b_vector
,
constructed
=
b_constructed
)
prob
=
P
(
a_value
,
b_value
,
temperature
)
if
b_value
>
a_value
:
a_vector
=
b_vector
a_constructed
=
b_constructed
elif
random
.
randint
(
0
,
1
)
<
prob
:
a_vector
=
b_vector
a_constructed
=
b_constructed
temperature
=
temperature
*
0.95
i
+=
1
if
__name__
==
'
__main__
'
:
go
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment