Skip to content
Snippets Groups Projects
Commit 869cd580 authored by jezozwierzak's avatar jezozwierzak
Browse files

Added feature selection and gitignore

parent f0ed27dc
No related branches found
No related tags found
No related merge requests found
# Compiled source #
###################
*.pyc
......@@ -12,6 +12,7 @@ from optparse import OptionParser
from threading import Thread
descr = """%prog [options] corpus_dir out_dir"""
wind = 2
def go():
parser = OptionParser(usage=descr)
......@@ -86,29 +87,39 @@ def process_fold(fold, out_dir, corpus_dir, config_dir, config_name):
os.remove(os.path.join(out_dir, "models", str(fold).zfill(2), "dict-sie.lex"))
#Wybranie sąsiada
def neightbour(v):
a = random.randint(0, len(v))
b = random.randint(0, len(v[0]))
def neightbour(v = [], c = {}):
global wind
a = random.randint(0, len(v)+len(c))
b = random.randint(0, wind)
if a < len(v):
v[a][b] = 1 if v[a][b] == 0 else 0
a = random.randint(0, len(v))
b = random.randint(0, len(v[0]))
else:
s_c = sorted(c)
s_c[a-len(v)] = 1 if s_c[a-len(v)] == 0 else 0
a = random.randint(0, len(v)+len(c))
b = random.randint(0, wind)
if a < len(v):
v[a][b] = 1 if v[a][b] == 0 else 0
return v
else:
s_c = sorted(c)
s_c[a-len(v)] = 1 if s_c[a-len(v)] == 0 else 0
return (v, c)
#Obliczenie temperatury początkowej
def tempestimation(out_dir, config_dir, config_name, corpus_dir, folds, vector):
def tempestimation(out_dir, config_dir, config_name, corpus_dir, folds, vector, constructed):
iterations = 100 #ilosc iteracji symulacji
sum = 0
results = {} #histogram wyników
for i in range(iterations):
print "Temperature estimation it:" + i
result = f(os.path.join(out_dir, str(i).zfill(2)), config_dir, config_name, corpus_dir, folds, vector)
sum += result
if result not in results.keys():
results[result] = 1
else:
results[result] += 1
vector = neightbour(vector)
vector, constructed = neightbour(vector, constructed)
avg = sum / float(iterations)#obliczenie średniego wyniku
k = 0
deviation = 0
......@@ -128,10 +139,12 @@ def get_features_number(cclfile):
return count + 1
def generate_features_txt(resultfile, vector = [], constructed = {}):
global wind
out = open(resultfile, 'w+')
feature_num = 0
actual_feature_num = len(vector[0]) / 2 + 1
actual_feature_num = wind / 2 + 1
if len(vector) > 0:
for i in range(len(vector)):
for j in range(len(vector[i])):
if vector[i][j] == 1:
......@@ -139,8 +152,20 @@ def generate_features_txt(resultfile, vector = [], constructed = {}):
out.write("\n")
feature_num += 1
out.write("\n")
for i in range(len(constructed)):
feats = constructed[i].split("%")
if len(constructed) > 0:
for key in constructed.keys():
if constructed[key] == 1:
feature_substrings = key.split("%")
for subs in feature_substrings:
feature1 = subs.split(".")[0]
feature1_num = subs.split(".")[1]
if subs == feature_substrings[0]:
out.write('U%02d:%%x[%s,%s]/'%(feature_num, feature1_num, feature1))
elif subs == feature_substrings[-1]:
out.write('%%x[%s,%s]'%(feature1_num, feature1))
else:
out.write('%%x[%s,%s]/'%(feature1_num, feature1))
feature_num += 1
out.write("\n")
out.write("B")
......@@ -168,33 +193,43 @@ def P(e, en, temp):
return 1
def main(corpus_dir, out_dir, config, window, folds):
global wind
wind = window
config_dir = os.path.dirname(config)
config_name = os.path.splitext(os.path.basename(config))[0]
config_ccl = os.path.join(config_dir, config_name + ".ccl")
constructed = []
constructed.append("1%2")
generate_features_txt(os.path.join(out_dir, "config_files", config_name + "-layer1.txt"), constructed = constructed)
# if not os.path.exists(out_dir):
# tools.mkdir_p(out_dir)
#
# a_vector = randomize_vector(get_features_number(config_ccl), window)
# temperature = tempestimation(os.path.join(out_dir, "estimation"), config_dir, config_name, corpus_dir, folds, a_vector)
#
# a_value = f(os.path.join(out_dir, "selection", "first"), config_dir, config_name, corpus_dir, folds, a_vector)
# b_value = 0
# i = 1
# while temperature > 0:
# b_vector = neightbour(a_vector)
# b_value = f(os.path.join(out_dir, "selection", str(i).zfill(2)), config_dir, config_name, corpus_dir, folds, b_vector)
# prob = P(a_value, b_value, temperature)
# if b_value > a_value:
# a_vector = b_vector
# elif random.randint(0, 1) < prob:
# a_vector = b_vector
# temperatore = temperature * 0.95
# i += 1
if not os.path.exists(out_dir):
tools.mkdir_p(out_dir)
vector = [[1, 1, 1, 1, 0],[1, 1, 1, 0, 0]]
constructed = {'0.-1%0.0' :0, '0.0%0.1' :0,
'1.-2%1.-1':0, '1.0%1.-1':0, '1.0%1.1':0, '1.1%1.2':0,
'7.-1%8.-1':0, '7.0%8.0' :0, '7.1%8.1':0,
'1.-2%1.-1%1.0' : 0, '1.-1%1.0%1.1' : 0, '1.0%1.1%1.2' : 0
}
a_vector = randomize_vector(get_features_number(config_ccl), window)
a_constructed = constructed
temperature = tempestimation(os.path.join(out_dir, "estimation"), config_dir, config_name, corpus_dir, folds, a_vector)
a_value = f(os.path.join(out_dir, "selection", "first"), config_dir, config_name, corpus_dir, folds, a_vector)
b_value = 0
i = 1
while temperature > 0:
print "Feature selection temp:" + temperature
b_vector, b_constructed = neightbour(a_vector, a_constructed)
b_value = f(os.path.join(out_dir, "selection", str(i).zfill(2)), config_dir, config_name, corpus_dir, folds, vector = b_vector, constructed = b_constructed)
prob = P(a_value, b_value, temperature)
if b_value > a_value:
a_vector = b_vector
a_constructed = b_constructed
elif random.randint(0, 1) < prob:
a_vector = b_vector
a_constructed = b_constructed
temperature = temperature * 0.95
i += 1
if __name__ == '__main__':
go()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment