Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
poldeepner2
Manage
Activity
Members
Labels
Plan
Issues
29
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Information extraction
poldeepner2
Commits
7acf5a30
Commit
7acf5a30
authored
4 years ago
by
Michał Marcińczuk
Browse files
Options
Downloads
Plain Diff
Merge branch 'dev' into kgr10_roberta
parents
87718460
981ab3aa
Branches
kgr10_roberta
No related merge requests found
Pipeline
#2868
passed with stage
in 4 minutes and 52 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
evaluate_tsv.py
+12
-4
12 additions, 4 deletions
evaluate_tsv.py
main.py
+1
-1
1 addition, 1 deletion
main.py
poleval_ner_test_v2.py
+2
-2
2 additions, 2 deletions
poleval_ner_test_v2.py
with
15 additions
and
7 deletions
evaluate_tsv.py
+
12
−
4
View file @
7acf5a30
...
...
@@ -20,13 +20,21 @@ def main(args):
sentences
=
[
sentence
[
0
]
for
sentence
in
sentences_labels
]
labels
=
[
sentence
[
1
]
for
sentence
in
sentences_labels
]
time_start
=
time
.
clock
()
data_size
=
0
for
sentence
in
sentences
:
data_size
+=
sum
([
len
(
token
)
+
1
for
token
in
sentence
])
t0
=
time
.
clock
()
predictions
=
ner
.
process
(
sentences
)
time_
end
=
time
.
clock
()
time_
processing
=
time
.
clock
()
-
t0
report
=
classification_report
(
labels
,
predictions
,
digits
=
4
)
print
(
report
)
print
(
"
Processing time: %d second(s)
"
%
(
time_end
-
time_start
))
print
(
f
"
Total time :
{
time_processing
:
>
8.4
}
second(s)
"
)
print
(
f
"
Data size: :
{
data_size
/
1000000
:
>
8.4
}
M characters
"
)
print
(
f
"
Speed: :
{
data_size
/
1000000
/
(
time_processing
/
60
)
:
>
8.4
}
M characters/minute
"
)
print
(
f
"
Number of token labels :
{
len
(
ner
.
label_list
)
:
>
8
}
"
)
def
parse_args
():
...
...
@@ -39,7 +47,7 @@ def parse_args():
help
=
'
the maximum total input sequence length after WordPiece tokenization.
'
)
parser
.
add_argument
(
'
--device
'
,
required
=
False
,
default
=
"
cpu
"
,
metavar
=
'
cpu|cuda
'
,
help
=
'
device type used for processing
'
)
parser
.
add_argument
(
'
--squeeze
'
,
required
=
False
,
default
=
False
,
parser
.
add_argument
(
'
--squeeze
'
,
required
=
False
,
default
=
False
,
action
=
"
store_true
"
,
help
=
'
try to squeeze multiple examples into one Input Feature
'
)
return
parser
.
parse_args
()
...
...
This diff is collapsed.
Click to expand it.
main.py
+
1
−
1
View file @
7acf5a30
...
...
@@ -95,7 +95,7 @@ def main():
logger
.
info
(
"
Loading pretrained model...
"
)
t0
=
time
.
time
()
if
args
.
pretrained_path
.
startswith
(
"
automodel
:
"
):
if
args
.
pretrained_path
.
startswith
(
"
hf
:
"
):
from
poldeepner2.model.herbert_for_token_calssification
import
AutoTokenizerForTokenClassification
pretrained_dir
=
args
.
pretrained_path
.
split
(
'
:
'
)[
1
]
model
=
AutoTokenizerForTokenClassification
(
...
...
This diff is collapsed.
Click to expand it.
poleval_ner_test_v2.py
+
2
−
2
View file @
7acf5a30
import
sys
,
json
,
getopt
from
tqdm
import
tqdm
from
attr
import
dataclass
from
dateutil
import
parser
...
...
@@ -130,9 +131,8 @@ def computeScores(goldfile, userfile, cn: CategoryNormalizer, htype="split", typ
with
open
(
goldfile
)
as
json_data
:
goldjson
=
json
.
load
(
json_data
)
for
nr
in
range
(
len
(
goldjson
[
'
questions
'
])):
for
nr
in
tqdm
(
range
(
len
(
goldjson
[
'
questions
'
]))
)
:
idGold
=
'
/
'
.
join
(
goldjson
[
'
questions
'
][
nr
][
'
input
'
][
'
fname
'
].
split
(
'
/
'
)[
4
:])
print
(
idGold
)
if
idGold
in
idsToAnnsUser
:
found
+=
1
# find the most recent answer:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment