Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
anonymizer
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
nlpworkers
anonymizer
Commits
3b95f19e
Commit
3b95f19e
authored
2 years ago
by
Bartłomiej Bojanowski
Browse files
Options
Downloads
Patches
Plain Diff
Add extend pl dictionaty
parent
6ca8582a
Branches
Branches containing commit
3 merge requests
!10
Anonimizer v2
,
!9
Fix infancy erorrs based on Magdalena's report
,
!7
Better coverage
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
config/detectors/ner_kpwr_ext.yaml
+73
-2
73 additions, 2 deletions
config/detectors/ner_kpwr_ext.yaml
dictionaries/pl_ext_dict.txt
+38129
-0
38129 additions, 0 deletions
dictionaries/pl_ext_dict.txt
with
38202 additions
and
2 deletions
config/detectors/ner_kpwr_ext.yaml
+
73
−
2
View file @
3b95f19e
...
...
@@ -7,5 +7,76 @@ ner:
"
nam_fac_road"
:
"
street_name"
"
nam_loc_gpe_city"
:
"
city"
"
nam_loc_gpe_country"
:
"
country"
"
nam_loc_gpe_admin1"
:
"
country"
# TODO: Implement better mapping for this
"
nam_loc_historical_region"
:
"
country"
# TODO: Implement better mapping for this
"
nam_loc_gpe_subdivision"
:
"
location"
,
"
nam_loc_gpe_admin1"
:
"
location"
,
"
nam_loc_gpe_district"
:
"
location"
,
"
nam_loc_gpe_admin3"
:
"
location"
,
"
nam_loc_gpe_admin2"
:
"
location"
,
"
nam_loc_gpe_conurbation"
:
"
location"
,
"
nam_loc_country_region"
:
"
location"
,
"
nam_loc"
:
"
location"
,
"
nam_fac_bridge"
:
"
location"
,
"
nam_fac_goe"
:
"
location"
,
"
nam_loc_land_mountain"
:
"
location"
,
"
nam_loc_land_island"
:
"
location"
,
"
nam_loc_land"
:
"
location"
,
"
nam_loc_land_peak"
:
"
location"
,
"
nam_loc_land_continent"
:
"
location"
,
"
nam_loc_land_region"
:
"
location"
,
"
nam_loc_historical_region"
:
"
location"
,
"
nam_fac_park"
:
"
location"
,
"
nam_fac_square"
:
"
street"
,
"
nam_fac_goe_stop"
:
"
street"
,
"
nam_adj_country"
:
"
country"
,
"
nam_num_phone"
:
"
phone_number"
,
"
nam_num_house"
:
"
number"
,
"
nam_liv_person"
:
"
name"
,
"
nam_adj_person"
:
"
surname"
,
"
nam_adj_city"
:
"
city"
,
"
nam_pro_title_document"
:
"
title"
,
"
nam_pro_title_book"
:
"
title"
,
"
nam_pro_title_article"
:
"
title"
,
"
nam_pro_title"
:
"
title"
,
"
nam_pro_title_song"
:
"
title"
,
"
nam_pro_title_treaty"
:
"
title"
,
"
nam_pro_title_album"
:
"
title"
,
"
nam_pro_title_tv"
:
"
title"
,
"
nam_loc_hydronym"
:
"
hydronym"
,
"
nam_loc_hydronym_sea"
:
"
hydronym"
,
"
nam_loc_hydronym_lake"
:
"
hydronym"
,
"
nam_loc_hydronym_river"
:
"
hydronym"
,
"
nam_loc_hydronym_ocean"
:
"
hydronym"
,
"
nam_oth_www"
:
"
url"
,
"
nam_pro_award"
:
"
proper_name"
,
"
nam_pro_media_radio"
:
"
proper_name"
,
"
nam_pro_media_tv"
:
"
proper_name"
,
"
nam_pro_media_web"
:
"
proper_name"
,
"
nam_pro_software_game"
:
"
proper_name"
,
"
nam_pro_model_car"
:
"
proper_name"
,
"
nam_pro_media"
:
"
proper_name"
,
"
nam_pro_vehicle"
:
"
proper_name"
,
"
nam_pro_brand"
:
"
proper_name"
,
"
nam_pro_media_periodic"
:
"
proper_name"
,
"
nam_pro_software"
:
"
proper_name"
,
"
nam_pro"
:
"
proper_name"
,
"
nam_eve_human_cultural"
:
"
proper_name"
,
"
nam_eve_human_holiday"
:
"
proper_name"
,
"
nam_eve_human"
:
"
proper_name"
,
"
nam_eve"
:
"
proper_name"
,
"
nam_eve_human_sport"
:
"
proper_name"
,
"
nam_fac_system"
:
"
proper_name"
,
"
nam_oth_tech"
:
"
proper_name"
,
"
nam_oth_currency"
:
"
proper_name"
,
"
nam_oth_position"
:
"
proper_name"
,
"
nam_oth"
:
"
serial_number"
,
"
nam_oth_data_format"
:
"
serial_number"
,
"
nam_oth_license"
:
"
serial_number"
,
"
nam_org_group_team"
:
"
organization_name"
,
"
nam_org_company"
:
"
organization_name"
,
"
nam_org_group":"organization_name",
"nam_org_political_party"
:
"
organization_name"
,
"
nam_org_group_band"
:
"
organization_name"
,
"
nam_org_nation"
:
"
organization_name"
,
"
nam_org_organization_sub":"organization_name",
"nam_org_institution":"organization_name",
"nam_org_organization":"organization_name",
This diff is collapsed.
Click to expand it.
dictionaries/pl_ext_dict.txt
0 → 100644
+
38129
−
0
View file @
3b95f19e
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment