Newer
Older
schema: '2.0'
stages:
import_luna_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: f40adccbf0b51094a71b876c9ccad751
size: 8265
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
luna_main_pipeline:
cmd: "PYTHONPATH=. python -m spacy download pl_core_news_lg\nPYTHONPATH=. python\
\ experiment/luna/pipeline/luna_main.py\n"
deps:
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: 773f92667e16efd915ec6384d06aa4fb.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna
md5: 2e334734387ab4579b7b5269d5029e81.dir
size: 71627685
nfiles: 4000
luna_import_to_common_format:
cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py
deps:
- path: experiment/luna/import_dataset/import_luna.py
md5: 44a1b914afda2ae74462e7afd83f658e
size: 8278
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
outs:
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
voicelab_import_to_common_format:
cmd: PYTHONPATH=. python experiment/voicelab/import_data.py
deps:
- path: experiment/voicelab/import_data.py
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
outs:
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
luna_gold_transcript_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_gold_transcript_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_gold_transcript_processing.py
md5: 2bae24d511febebb26b3264b204784f5
size: 1466
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/dataset_relation_manager_data/luna
md5: ff680a49296818460a49bd0c70089a4a.dir
size: 229007155
nfiles: 1000
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
luna_ajn_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_ajn_asr_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_ajn_asr_processing.py
md5: ec7d7b5384f845173d9fb77e9cfa9907
size: 2501
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_polish_asr
md5: 620e178854dbcb69f49a608f34573a88.dir
size: 6159899
nfiles: 494
- path: experiment_data/pipeline/asr_benchmark_luna/ajn_spacy
md5: 312be284d4ec9e38986048e785fcbbc1.dir
size: 6535212
nfiles: 494
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_alignment_wer
md5: 8ad558edb6a8bd2508a7e25bcf53bf94.dir
size: 21936929
nfiles: 494
- path: experiment_data/pipeline/asr_benchmark_luna/pos_ajn_metrics_wer
md5: 98c74c5bf87637749eac1ed5ff3393b4.dir
size: 16842
nfiles: 494
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_alignment_wer
md5: 1741fff740259398b28bf2a6ba3aec41.dir
size: 20671277
nfiles: 494
- path: experiment_data/pipeline/asr_benchmark_luna/word_ajn_metrics_wer
md5: 18605657ff9c7ef3221e27b671a3b4d1.dir
size: 16835
nfiles: 494
luna_techmo_processing:
cmd: "PYTHONPATH=. python experiment/luna/pipeline/luna_techmo_processing.py\n"
deps:
- path: experiment/luna/pipeline/luna_techmo_processing.py
md5: b4d5ad7a0d7fb0714a2dc02cb457e8c9
size: 2628
- path: experiment_data/cached_asr/luna_techmo
md5: 033ea7b5434dded73bf869bfdd299462.dir
size: 4256479
nfiles: 500
- path: experiment_data/dataset/LUNA.PL
md5: d342155b1871e881797cf7da09d5dc3c.dir
size: 1578358645
nfiles: 4500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript
md5: c9c51d94294eb1b30b39aef5d6abbe4b.dir
size: 6706925
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/gold_transcript_spacy
md5: 24a399475b752737db0f2a8671507014.dir
size: 6785648
nfiles: 500
outs:
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_alignment_wer
md5: c71539f3889c627a371957958bd0907d.dir
size: 20897599
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/pos_techmo_metrics_wer
md5: 4efbe309674d9d494bae3dac057025ba.dir
size: 17341
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_polish_asr
md5: acfaec46b2415ed6a64e3a3464d164f8.dir
size: 9697519
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/techmo_spacy
md5: e869581816457d1585a7e42d0a18b8b2.dir
size: 6124559
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_alignment_wer
md5: 0dabd65b3981d588cd23d943abc6e231.dir
size: 21380796
nfiles: 500
- path: experiment_data/pipeline/asr_benchmark_luna/word_techmo_metrics_wer
md5: 4cfbb2830b280084ece14b1ef815b92a.dir
size: 17298
nfiles: 500
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
voicelab_gold_transcript_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_gold_transcript.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_gold_transcript.py
md5: 4ba38fdfac616f8a0818cedabf66b94d
size: 2312
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322
md5: 926ef9bab4ce41b9de95f2f3d5ab67a0.dir
size: 110711470
nfiles: 1600
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: f2e68dcc8842a15e417ae6f5221a802a.dir
size: 26643278
nfiles: 800
voicelab_techmo_processing:
cmd: "PYTHONPATH=. python -u experiment/voicelab/voicelab_pipeline_techmo.py\n"
deps:
- path: experiment/voicelab/voicelab_pipeline_techmo.py
md5: 23c0869d7cc9f0088870362d669ab82e
size: 2685
- path: experiment_data/cached_asr/voicelab_cbiz_testset_20220322_techmo
md5: 6c3b356723d562c978f84e733b91f5d0.dir
size: 17539259
nfiles: 800
- path: experiment_data/dataset/voicelab_cbiz_testset_20220322
md5: 3c2b18e1f1f89e4c5ad7b254e472b25e.dir
size: 4803739404
nfiles: 1600
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript
md5: fb6812b2f3044c0285ee6ee2b21d0523.dir
size: 21846798
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/gold_transcript_spacy
md5: f2e68dcc8842a15e417ae6f5221a802a.dir
size: 26643278
nfiles: 800
outs:
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_alignment_wer
md5: 8c5f0380ba2891b3e726d647c2863c60.dir
size: 81650836
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/pos_techmo_metrics_wer
md5: b1a674826142a44095a4c6439ac49024.dir
size: 27934
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_polish_asr
md5: c45e29b08af7bb13cdf54da9655bd96c.dir
size: 39158267
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/techmo_spacy
md5: a39c82666419c2b7791952a1fa116d61.dir
size: 24482297
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_alignment_wer
md5: 72ff86c7cb2e89ac7e04677f532255b2.dir
size: 83756423
nfiles: 800
- path: experiment_data/pipeline/asr_benchmark_voicelab_cbiz_testset_20220322/word_techmo_metrics_wer
md5: 2fe3288abe85e4a385e2aefa0e8cad7e.dir
size: 27780
nfiles: 800