Merge branch 'dev' into 'master'

added CI, refactored See merge request !1

Merge branch 'dev' into 'master'
7af2488f · Mateusz Gniewkowski · bf9ffbc3 · e1fe275d · 7af2488f · 7af2488f
Commit 7af2488f authored Sep 14, 2020 by Mateusz Gniewkowski
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
+image: clarinpl/python:2.7
+
+cache:
+  paths:
+  - .tox
+
+before_script:
+  - pip install tox==2.9.1
+
+pep8:
+  script:
+   - tox -v -e pep8
+
+build_image:
+  stage: build
+  image: 'docker:18.09.7'
+  only:
+    - master
+  services:
+    - 'docker:18.09.7-dind'
+  before_script:
+    - ''
+  script:
+    - docker build -t clarinpl/subfeatsel .
+    - echo $DOCKER_PASSWORD > pass.txt
+    - cat pass.txt | docker login --username $DOCKER_USERNAME --password-stdin
+    - rm pass.txt
+    - docker push clarinpl/subfeatsel
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,19 +20,9 @@ WORKDIR /tmp
 RUN wget http://prdownloads.sourceforge.net/weka/weka-3-8-3.zip  && \
    unzip  weka-3-8-3.zip

-
-
+WORKDIR /home/worker
+COPY ./src ./src
+COPY ./main.py .
 COPY requirements.txt .
 RUN pip install -r requirements.txt
-
-# Change it to pypi
-COPY module/ /home/worker/
-RUN python -m easy_install /home/worker/ltcore*
-
-
-# Change it to pypi
-WORKDIR /tmp
-RUN wget http://www.nlp.pwr.wroc.pl/download/lexcsd/ltlearn-0.2.0.tar.gz && \
-    tar -xvf ltlearn-0.2.0.tar.gz && \
-    cd ltlearn-0.2.0 && \
-    python setup.py install
\ No newline at end of file
+CMD ["python","main.py"]
--- a/config.ini
+++ b/config.ini
@@ -2,12 +2,12 @@
 tool = subfeatsel

 root = /samba/requests/
-rabbit_host = rabbit.clarin.ws
-rabbit_user = clarin
-rabbit_password = clarin123
+rabbit_host = rabbitmq
+rabbit_user = test
+rabbit_password = test

 [tool]
-workers_number = 12
+workers_number = 2
 weka_jar=/tmp/weka-3-8-3/weka.jar

 [logging]

--- a/main.py
+++ b/main.py
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Implementation of SubFeatSel worker."""
+import nlp_ws
+
+from src.subfeatsel_worker import FeatSelSubWorker
+
+if __name__ == "__main__":
+    nlp_ws.NLPService.main(FeatSelSubWorker)
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,5 @@ numpy==1.14.3
 scikit-learn==0.19.0
 scipy==0.19.1
 sqlalchemy
+ltcore
+ltlearn
--- a/src/__init__.py
+++ b/src/__init__.py
--- a/src/subfeatsel_worker.py
+++ b/src/subfeatsel_worker.py
--- a/subfeatsel_worker.py
+++ b/subfeatsel_worker.py
-#!/usr/bin/python2
-
-import json
-
-
-import nlp_ws
-from scipy import stats
-import numpy,tempfile,scipy
-from scipy.sparse import lil_matrix
-import ltlearn.transformations.selection.weka.wekaattributeselection as was
-from argparse import Namespace
-from ltcore import SparseMatrix
-import sys,shutil,time
-from collections import namedtuple
-from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier,GradientBoostingClassifier, AdaBoostClassifier
-from sklearn.feature_selection import RFE
-from sklearn.svm import  SVC
-from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import MultinomialNB
-import base64
-
-reload(sys)  
-sys.setdefaultencoding('utf8')
-
-
-SCIPY_METHODS = ['MannWhitney', 'WilcoxonRankSum', 'KolmogorovSmirnov', 'KruskalWallis', 'TTest']
-WEKA_METHODS = ['InfoGainAttributeEval', 'CfsSubsetEval', 'ConsistencySubsetEval',
-                'GainRatioAttributeEval', 'ChiSquaredAttributeEval']
-ENSAMBLE_METHODS =['ExtraTrees','RandomForest','GradientBoosting','AdaBoost']
-RFE_METHODS=['RFE_LogisticRegression','RFE_SVM','RFE_NaiveBayes']
-
-stats_file = 'descriptive_features.csv'
-verbose=True
-
-class FeatureSelection(object):
-	"""
-	A class for feature selection with Weka or Scipy.
-	"""
-
-	def __init__(self, attr_eval, no_features_to_select, weka_jar,tmp_dir):
-		self.attr_eval = attr_eval
-		self.no_feats = no_features_to_select
-		self.weka_jar = weka_jar
-		self.tmp_dir= tmp_dir
-		if self.attr_eval in WEKA_METHODS:
-			if 'Subset' in self.attr_eval:
-				self.search_method = 'GreedyStepwise'
-			else:
-				self.search_method = 'Ranker'
-
-	def select_features(self, matrix):
-		""" 
-		Select features differing classes from the matrix. Matrix has to have only two classes inside.
-
-		:return {cluster_id:[(feature, mean in that cluster, std in that cluster, mean in all the other clusters,
-							std in all the other clusters)]}
-		"""
-
-		if self.attr_eval in WEKA_METHODS:
-			return self.weka_feature_selection(matrix)
-		elif self.attr_eval in SCIPY_METHODS:
-			return self.scipy_feature_selection(matrix)
-		elif self.attr_eval in ENSAMBLE_METHODS:
-			return self.ensamble_feature_selection(matrix)
-		elif self.attr_eval in RFE_METHODS:
-			return self.rfe_feature_selection(matrix)
-		else:
-			raise Exception( 'Unrecognized feature selection method: %s.' % self.attr_eval);
-
-	def rfe_feature_selection(self, matrix):
-		"""
-		Select features with ENSAMBLE methods from sklearn
-		.
-		"""
-		print matrix.head_word
-		if self.attr_eval == 'RFE_LogisticRegression':
-			model = LogisticRegression()
-		elif self.attr_eval == 'RFE_SVM':
-			model = SVC(kernel='linear')	
-		elif self.attr_eval == 'RFE_NaiveBayes':
-			model = MultinomialNB()	
-		rfe = RFE(model, 2)
-		print type(matrix.data)
-		print "start"
-		rfe = rfe.fit(matrix.data, matrix.class_labels)
-		print "finished"
-		
-		
-		
-		importances = (self.no_feats-(rfe.ranking_-1))/(self.no_feats*1.0);
-		
-		indices = numpy.argsort(rfe.ranking_)[::1]
-		
-		
-		
-		# Print the feature ranking
-		
-		
-		matrix.data=matrix.data.tolil();
-		subclasses = self.split_matrix_by_classes(matrix)
-		results = []
-		for f in range(len(matrix.column_labels)):
-			feature=matrix.column_labels[indices[f]]
-			if (importances[indices[f]]<0): break;
-			sample1 = get_column(subclasses[0], feature)
-			sample1 = complete_with_zeros(sample1, subclasses[0].data.shape[0])
-			sample2 = get_column(subclasses[1], feature)
-			sample2 = complete_with_zeros(sample2, subclasses[1].data.shape[0])
-			if subclasses[0].head_word == 'true':
-				mean_in = numpy.mean(sample1)
-				std_in = numpy.std(sample1)
-				mean_out = numpy.mean(sample2)
-				std_out = numpy.std(sample2)
-			else:
-				mean_in = numpy.mean(sample2)
-				std_in = numpy.std(sample2)
-				mean_out = numpy.mean(sample1)
-				std_out = numpy.std(sample1)
-			results.append({"feature":feature.encode('utf-8'),"importance":importances[indices[f]], "mean_in":mean_in.item(), "std_in":std_in.item(), "mean_out":mean_out.item(), "std_out":std_out.item()})
-
-		return results				
-	def ensamble_feature_selection(self, matrix):
-		"""
-		Select features with ENSAMBLE methods from sklearn
-		.
-		"""
-		print matrix.head_word
-		if self.attr_eval == 'ExtraTrees':
-			forest = ExtraTreesClassifier(n_estimators=250,max_features=self.no_feats,random_state=0,n_jobs=2)
-		
-		elif self.attr_eval == 'RandomForest':
-			forest = RandomForestClassifier(n_estimators=250,max_features=self.no_feats,random_state=0,n_jobs=2)	
-		
-		elif self.attr_eval == 'GradientBoosting':
-			forest = GradientBoostingClassifier(n_estimators=250,max_features=self.no_feats,random_state=0)	
-		elif self.attr_eval == 'AdaBoost':
-			forest = AdaBoostClassifier()	
-		
-		forest.fit(matrix.data, matrix.class_labels)
-		
-		importances = forest.feature_importances_
-		
-		indices = numpy.argsort(importances)[::-1]
-
-		# Print the feature ranking
-
-		
-		matrix.data=matrix.data.tolil();
-
-		subclasses = self.split_matrix_by_classes(matrix)
-
-		results = []
-		ind=0;
-		for f in range(len(matrix.column_labels)):
-			feature=matrix.column_labels[indices[f]]
-			if (importances[indices[f]]<0.0001): break;
-			ind=ind+1;
-			if ind>self.no_feats: break;
-			
-			sample1 = get_column(subclasses[0], feature)
-			sample1 = complete_with_zeros(sample1, subclasses[0].data.shape[0])
-			sample2 = get_column(subclasses[1], feature)
-			sample2 = complete_with_zeros(sample2, subclasses[1].data.shape[0])
-			
-			if subclasses[0].head_word == 'true':
-				mean_in = numpy.mean(sample1)
-				std_in = numpy.std(sample1)
-				mean_out = numpy.mean(sample2)
-				std_out = numpy.std(sample2)
-			else:
-				mean_in = numpy.mean(sample2)
-				std_in = numpy.std(sample2)
-				mean_out = numpy.mean(sample1)
-				std_out = numpy.std(sample1)
-			results.append({"feature":feature.encode('utf-8'),"importance":importances[indices[f]], "mean_in":mean_in.item(), "std_in":std_in.item(), "mean_out":mean_out.item(), "std_out":std_out.item()})
-		
-		
-		return results		
-
-		
-	def scipy_feature_selection(self, matrix):
-		"""
-		Select features with Scipy.
-		"""
-		matrix.data=matrix.data.tolil();
-		subclasses = self.split_matrix_by_classes(matrix)
-		matrix=[];
-		results = []
-		for col_name in subclasses[0].column_labels:
-			sample1 = get_column(subclasses[0], col_name)
-			sample1 = complete_with_zeros(sample1, subclasses[0].data.shape[0])
-			sample2 = get_column(subclasses[1], col_name)
-			sample2 = complete_with_zeros(sample2, subclasses[1].data.shape[0])
-			try:
-				if self.attr_eval == 'MannWhitney':
-					res = stats.mannwhitneyu(sample1, sample2)
-				elif self.attr_eval == 'WilcoxonRankSum':
-					res = stats.ranksums(sample1, sample2)
-				elif self.attr_eval == 'KolmogorovSmirnov':
-					res = stats.ks_2samp(sample1, sample2)
-				elif self.attr_eval == 'KruskalWallis':
-					res = stats.kruskal(sample1, sample2)
-				elif self.attr_eval == 'TTest':
-					res = stats.ttest_ind(sample1, sample2)
-				else:
-					print('No correlation test named %s' % self.attr_eval)
-					res = None
-				if res is not None and res[1] < 0.05:   # res[1] --> p-value
-					if subclasses[0].head_word == 'true':
-						mean_in = numpy.mean(sample1)
-						std_in = numpy.std(sample1)
-						mean_out = numpy.mean(sample2)
-						std_out = numpy.std(sample2)
-					else:
-						mean_in = numpy.mean(sample2)
-						std_in = numpy.std(sample2)
-						mean_out = numpy.mean(sample1)
-						std_out = numpy.std(sample1)
-					results.append((col_name, res[0], res[1], mean_in, std_in, mean_out, std_out))
-			except ValueError:
-				print '%s error for %s' % (self.attr_eval, col_name), set(sample1), set(sample2)
-
-		#return [(r[0].encode('utf-8'), r[3], r[4], r[5], r[6]) for r in sorted(results, key=byThirdTerm)]
-		return [{"feature":r[0].encode('utf-8'), "stat":r[1],"p-value":r[2],"mean_in":r[3], "std_in":r[4], "mean_out":r[5], "std_out":r[6]} for r in sorted(results, key=byThirdTerm)[:self.no_feats]]
-		
-	def weka_feature_selection(self, matrix):
-		"""
-		Select features with Weka.
-		"""
-		
-		print 'Selecting features for %s' % matrix.head_word
-		tmp_mat = SparseMatrix(matrix.head_word, matrix.class_labels, matrix.column_labels, matrix.contexts,
-							   matrix.data.tolil(), boundaries=matrix.boundaries, possible_classes=matrix.possible_classes)
-		
-		
-		w = was.AttributeSelection()
-		w._weka_jar = self.weka_jar
-		w._work_dir = self.tmp_dir
-		w.set_attribute_evaluator(self.attr_eval)
-		w.set_search_method(self.search_method)
-
-		if self.no_feats is not None and 'Subset' not in self.attr_eval:
-			w.set_search_method_options('-N %s' % self.no_feats)
-		elif 'Subset' not in self.attr_eval:
-			print 'WARNING: number of features to select is not given in config file'
-	
-		w.make(tmp_mat)
-		feats = w._selected_features[:-1]
-		
-		matrix.data=matrix.data.tolil();
-		subclasses = self.split_matrix_by_classes(matrix)
-		results = []
-		for feature in feats:
-			
-			sample1 = get_column(subclasses[0], feature)
-			sample1 = complete_with_zeros(sample1, subclasses[0].data.shape[0])
-			sample2 = get_column(subclasses[1], feature)
-			sample2 = complete_with_zeros(sample2, subclasses[1].data.shape[0])
-			if subclasses[0].head_word == 'true':
-				mean_in = numpy.mean(sample1)
-				std_in = numpy.std(sample1)
-				mean_out = numpy.mean(sample2)
-				std_out = numpy.std(sample2)
-			else:
-				mean_in = numpy.mean(sample2)
-				std_in = numpy.std(sample2)
-				mean_out = numpy.mean(sample1)
-				std_out = numpy.std(sample1)
-			results.append({"feature":feature.encode('utf-8'), "mean_in":mean_in.item(), "std_in":std_in.item(), "mean_out":mean_out.item(), "std_out":std_out.item()})
-
-		return results
-
-	def split_matrix_by_classes(self,matrix):
-		oneclass_matrices = []
-
-		cls_dict = {}
-		for i, cls in enumerate(matrix.class_labels):
-			for c in cls.split(':'):
-				if c in cls_dict:
-					cls_dict[c].append(i)
-				else:
-					cls_dict[c] = [i]
-	
-		for cls in cls_dict:
-		
-			matrix.head_word = cls
-			oneclass_matrices.append(cut_matrix_by_indices(matrix, cls_dict[cls]))
-
-		return oneclass_matrices
-
-
-def complete_with_zeros(sample, length):
-	"""
-	Extend a list with zeros, so that it has specified length in the end.
-	"""
-
-	return numpy.append(sample, numpy.zeros(length - len(sample)), 0)
-
-
-
-
-def cut_matrix_by_indices(matrix, indices):
-	"""
-	Returns SparseMatrix with rows at given indices from given matrix.
-	"""
-
-	documents_ids = []
-	class_labels = []
-	contexts = []
-	data = lil_matrix((len(indices), matrix.data.shape[1]))
-	row = 0
-	for i in indices:
-		documents_ids.append(matrix.documents_ids[i])
-		class_labels.append(matrix.class_labels[i])
-		contexts.append(matrix.contexts[i])
-		data.data[row] = matrix.data.data[i]
-		data.rows[row] = matrix.data.rows[i]
-		row += 1
-
-	new_matrix = SparseMatrix(matrix.head_word, class_labels, matrix.column_labels, contexts, data, matrix.attributes,
-							  matrix.boundaries, list(set(class_labels)), documents_ids)
-	new_matrix.data = new_matrix.data.tocsc()
-	return new_matrix
-		
-
-
-def get_column(matrix, col_name):
-	"""
-	Return data from a specific column.
-	"""
-
-	mat = matrix.data;
-	for i in range(len(matrix.column_labels)):
-		if matrix.column_labels[i] == col_name:
-			return mat.data[mat.indptr[i]:mat.indptr[i+1]]
-	return []
-
-def get_size(obj, seen=None):
-        """Recursively finds size of objects"""
-        size = sys.getsizeof(obj)
-        if seen is None:
-            seen = set()
-        obj_id = id(obj)
-        if obj_id in seen:
-            return 0
-        # Important mark as seen *before* entering recursion to gracefully handle
-        # self-referential objects
-        seen.add(obj_id)
-        if isinstance(obj, dict):
-            size += sum([get_size(v, seen) for v in obj.values()])
-            size += sum([get_size(k, seen) for k in obj.keys()])
-        elif hasattr(obj, '__dict__'):
-            size += get_size(obj.__dict__, seen)
-        elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
-            size += sum([get_size(i, seen) for i in obj])
-        return size
-		
-class FeatSelSubWorker(nlp_ws.NLPWorker):
-	
-		
-    @classmethod
-    def static_init(cls, config):
-		cls.weka_jar = config['tool']['weka_jar'];
-		
-    #def __init__(self):
-    #    self.weka_jar='/usr/share/java/weka-3.6.10.jar'
-    #    return
-		
-		
-    def init(self):
-        #self._docex = DocExtractor.from_config_dict(self._desconfig)
-		return
-
-    def process(self, input_path, task_options, output_path):
-		#attr_eval = 'InfoGainAttributeEval'
-		attr_eval = 'MannWhitney'
-		
-		if 'method' in task_options: attr_eval=task_options['method'];
-		no_features_to_select = 100
-		if 'no_features_to_select' in task_options: no_features_to_select=task_options['no_features_to_select'];
-		
-		temp_folder = tempfile.mkdtemp()
-		fs = FeatureSelection(attr_eval, no_features_to_select, self.weka_jar,temp_folder)
-		
-		cluster_matrix = JSONfile2csr(input_path);
-		#cluster_matrix.data=cluster_matrix.data.tolil();	
-		cluster_matrix.possible_classes = [u'true', u'false']
-		cluster_matrix.class_labels=task_options["class_labels"]
-		cluster_matrix.head_word=task_options["head_word"]
-		cluster_id = cluster_matrix.head_word
-		res = fs.select_features(cluster_matrix)
-		shutil.rmtree(temp_folder)		
-		#result = self._docex.extract_from_document(input_path)
-		
-		res={"result":res,"id":task_options["id"],"method":attr_eval};
-		with open(output_path, 'w') as result_ofs:
-			json.dump(res,result_ofs)
-
-def byThirdTerm(feat_tuple):
-	return feat_tuple[2]
-
-def get_group_class_lables(class_labels):
-		"""
-		This splitter splits data f matrix into a number of matrices (one for each possible class).
-		The outcome will be a list  each corresponding to one class, with class_labels true or false.
-		e.g. if input matrix has class_labels looking like this:
-		class1:class2:class3
-		class1:class4
-		class2:class3
-		Matrix for class2 will have class_labels looking like this:
-		true
-		false
-		true
-
-		@type matrix: JSON matrix
-		@rtype: list(tempfile.NamedTemporaryFile)
-		"""
-	
-
-		
-			
-		classes = []
-		for line in class_labels:
-			classes.extend(line.split(':'))
-		classes = set(classes)
-
-		#creating dict: {tag: [true, false, false ...]}
-		label_list_dict = {}
-		for tag in classes:
-			new_label_list = []
-			for line in class_labels:
-				if tag in line.split(':'):
-					new_label_list.append('true')
-				else:
-					new_label_list.append('false')
-			label_list_dict[tag] = new_label_list
-
-		res = []
-		for tag in classes:
-				matrix={};
-				matrix["class_labels"] = label_list_dict[tag]
-				matrix["head_word"] = tag
-				res.append(matrix)
-				 
-		return res
-
-
-def sparse2JSON(filename,cluster_matrix):	
-	input_matrix["data"]=scipy.sparse.lil.lil_matrix(input_matrix["data"]);
-	array=cluster_matrix["data"]
-	
-	print array.shape
-	cluster_matrix["data"]=dict();
-	cluster_matrix["data"]["data"]=array.data.tolist();
-	cluster_matrix["data"]["rows"]=array.rows.tolist();
-	cluster_matrix["data"]["shape"]=array.shape;
-	cluster_matrix["data"]["dtype"]=array.dtype.str;
-	with open(filename,'w') as outfile:
-		json.dump(cluster_matrix, outfile);
-		
-def csr2JSONfile(filename,cluster_matrix):	
-	array=cluster_matrix["data"]
-	
-	cluster_matrix["data"]=dict();
-	cluster_matrix["data"]["data"]=array.data.tolist();
-	cluster_matrix["data"]["indices"]=array.indices.tolist();
-	cluster_matrix["data"]["shape"]=array.shape;
-	cluster_matrix["data"]["indptr"]=array.indptr.tolist();
-	with open(filename,'w') as outfile:
-		json.dump(cluster_matrix, outfile);		
-
-def csrObject2JSONfile(filename,cluster_matrix):	
-	array=cluster_matrix.data;
-	
-	cluster_matrix.data=dict();
-	cluster_matrix.data["data"]=array.data.tolist();
-	cluster_matrix.data["indices"]=array.indices.tolist();
-	cluster_matrix.data["shape"]=array.shape;
-	cluster_matrix.data["indptr"]=array.indptr.tolist();
-	with open(filename,'w') as outfile:
-		json.dump(cluster_matrix.__dict__, outfile);		
-		
-def JSONfile2csr(inputFile):	
-	with open(inputFile) as json_data:
-			cluster_matrix = json.load(json_data,object_hook=lambda d: Namespace(**d))
-	array=cluster_matrix.data;
-	cluster_matrix.data=scipy.sparse.csr_matrix((numpy.asarray(array.data), array.indices, array.indptr), shape=array.shape);
-	return cluster_matrix;
-	
-		
-def create():
-	inputFile='data/input_matrix.json';
-	with open(inputFile) as json_data:
-			input_matrix = json.load(json_data);
-	input_matrix["data"]=scipy.sparse.csr_matrix(input_matrix["data"]);
-	csr2JSONfile("datas1.json",input_matrix);
-		
-	
-def test():
-	
-	inputFile='data1.json';
-	input_matrix=JSONfile2csr(inputFile);
-	
-	
-	matrices_labels = get_group_class_lables(input_matrix.class_labels)
-	submatrix=matrices_labels[1];
-	option={'method':'MannWhitney' ,'tree_tmp':'RFE_NaiveBayes ExtraTrees MannWhitney',
-	'no_features_to_select':100,
-		"class_labels":submatrix["class_labels"],"head_word":submatrix["head_word"],"id":submatrix["head_word"]}
-	#clustering_results()
-	start_time = time.time()
-	w=FeatSelSubWorker();
-
-	w.process(inputFile,option,'res.json');  
-	print "Processed in %f"%(time.time()-start_time);
-	
-if __name__ == '__main__':
-	nlp_ws.NLPService.main(FeatSelSubWorker)
-	#test()
--- a/tox.ini
+++ b/tox.ini
+[tox]
+envlist = pep8,docstyle
+skipsdist = True
+
+[testenv:pep8]
+deps =
+    flake8
+    pep8-naming
+basepython = python2.7
+commands =
+    flake8 {posargs}
+
+[flake8]
+# W504 skipped because it is overeager and unnecessary
+ignore = W504
+show-source = True
+exclude = .git,.venv,.tox,dist,doc,*egg,build,venv
+import-order-style = pep8
+max-line-length = 80
+
+
+[pydocstyle]
+# D104 Missing docstring in public package
+# D203 1 blank line required before class docstring
+# D213 Multi-line docstring summary should start at the second line
+# D214 Section is over-indented
+# D215 Section underline is over-indented
+# D401 First line should be in imperative mood; try rephrasing
+# D405 Section name should be properly capitalized
+# D406 Section name should end with a newline
+# D407 Missing dashed underline after section
+# D408 Section underline should be in the line following the section’s name
+# D409 Section underline should match the length of its name
+# D410 Missing blank line after section
+# D411 Missing blank line before section
+ignore = D104,D203,D213,D214,D215,D401,D405,D406,D407,D408,D409,D410,D411
+match-dir = ^(?!\.tox|venv).*
+match = ^(?!setup).*\.py
+