Spaces:
Runtime error
Runtime error
update config
Browse files- configuration/config.py +3 -80
- gradio_ner.py +3 -3
- requirements.txt +2 -1
configuration/config.py
CHANGED
|
@@ -1,88 +1,11 @@
|
|
| 1 |
-
import spacy
|
| 2 |
from functools import lru_cache
|
| 3 |
-
from
|
| 4 |
-
from source.datamodel.common import CountryCode, LineOfBusiness
|
| 5 |
-
from source.datamodel.annotation_ranking import Weights, WeightCatalog
|
| 6 |
|
| 7 |
|
| 8 |
class Settings(BaseSettings):
|
| 9 |
SERVER_HOST: str = '0.0.0.0'
|
| 10 |
-
PORT: int =
|
| 11 |
-
|
| 12 |
-
SLEEP_DURATION = 1e-4 # 0.1 ms sleep
|
| 13 |
-
APP_NAME: str = "MIRA MODELS"
|
| 14 |
-
MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models"
|
| 15 |
-
LOCAL_MIRA_MODELS: str = "ml_models"
|
| 16 |
-
MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09"
|
| 17 |
-
MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09"
|
| 18 |
-
MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl"
|
| 19 |
-
PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10"
|
| 20 |
-
PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10"
|
| 21 |
-
PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL')
|
| 22 |
-
PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL')
|
| 23 |
-
ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json"
|
| 24 |
-
ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json"
|
| 25 |
-
ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json"
|
| 26 |
-
LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth"
|
| 27 |
-
LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json"
|
| 28 |
-
LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer"
|
| 29 |
-
ADDRESS_DETECTION_MAX_LEN: int = 60
|
| 30 |
-
ADDRESS_INDEX_MIN: int = 40
|
| 31 |
-
DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse"
|
| 32 |
-
TSI_THRESHOLD: int = 100000
|
| 33 |
-
BROKER_MODEL: dict = {
|
| 34 |
-
'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite",
|
| 35 |
-
'WORD_POSITION': 1,
|
| 36 |
-
#'POS_POSITION': 2,
|
| 37 |
-
'LEMMA_POSITION': 2,
|
| 38 |
-
#'NER_POSITION': 3
|
| 39 |
-
}
|
| 40 |
-
si_model: dict = {
|
| 41 |
-
'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib",
|
| 42 |
-
'WORD_POSITION': 1,
|
| 43 |
-
'LEMMA_POSITION': 2,
|
| 44 |
-
'NER_POSITION': 3,
|
| 45 |
-
'POS_POSITION': 4
|
| 46 |
-
}
|
| 47 |
-
#spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm')
|
| 48 |
-
spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md')
|
| 49 |
-
layoutlm_config: dict = {'local_rank': -1,
|
| 50 |
-
'overwrite_cache': True,
|
| 51 |
-
'max_seq_length': 512,
|
| 52 |
-
'model_type': 'layoutlm',
|
| 53 |
-
'cls_token_box': [0, 0, 0, 0],
|
| 54 |
-
'sep_token_box': [1000, 1000, 1000, 1000],
|
| 55 |
-
'pad_token_box': [0, 0, 0, 0]}
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def loss_ratio_params():
|
| 59 |
-
url = "http://0.0.0.0:3000/claim-experience-risk-level/"
|
| 60 |
-
login = "clerk"
|
| 61 |
-
pw = "asdfgh"
|
| 62 |
-
return url, login, pw
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
@lru_cache()
|
| 66 |
-
def get_weight_catalog():
|
| 67 |
-
weight_catalog = WeightCatalog()
|
| 68 |
-
|
| 69 |
-
# PROPERTY BE WEIGHTS
|
| 70 |
-
weight_catalog.set_weights(
|
| 71 |
-
LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER',
|
| 72 |
-
Weights(subject=0.7, body=0.2, attachment=0.1))
|
| 73 |
-
weight_catalog.set_weights(
|
| 74 |
-
LineOfBusiness.property, CountryCode.belgium, 'BROKER',
|
| 75 |
-
Weights(subject=0.1, body=0.6, attachment=0.2))
|
| 76 |
-
|
| 77 |
-
# PROPERTY NL WEIGHTS
|
| 78 |
-
weight_catalog.set_weights(
|
| 79 |
-
LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER',
|
| 80 |
-
Weights(subject=0.7, body=0.2, attachment=0.1))
|
| 81 |
-
weight_catalog.set_weights(
|
| 82 |
-
LineOfBusiness.property, CountryCode.netherlands, 'BROKER',
|
| 83 |
-
Weights(subject=0.1, body=0.6, attachment=0.2))
|
| 84 |
-
|
| 85 |
-
return weight_catalog
|
| 86 |
|
| 87 |
|
| 88 |
@lru_cache()
|
|
|
|
|
|
|
| 1 |
from functools import lru_cache
|
| 2 |
+
from pydantic_settings import BaseSettings
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
class Settings(BaseSettings):
|
| 6 |
SERVER_HOST: str = '0.0.0.0'
|
| 7 |
+
PORT: int = 7860
|
| 8 |
+
MODEL_CHECKPOINT: str = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
@lru_cache()
|
gradio_ner.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import logging
|
| 3 |
from transformers import pipeline
|
| 4 |
-
|
| 5 |
examples = [
|
| 6 |
"""
|
| 7 |
Notice of Representation
|
|
@@ -73,7 +73,7 @@ Notice of Representation
|
|
| 73 |
John D Locke, Esq"""
|
| 74 |
]
|
| 75 |
# Replace this with your own checkpoint
|
| 76 |
-
model_checkpoint =
|
| 77 |
ner_pipeline = pipeline(
|
| 78 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
| 79 |
)
|
|
@@ -89,4 +89,4 @@ demo = gr.Interface(ner,
|
|
| 89 |
gr.HighlightedText(),
|
| 90 |
examples=examples)
|
| 91 |
|
| 92 |
-
demo.launch(server_name=
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import logging
|
| 3 |
from transformers import pipeline
|
| 4 |
+
from configuration.config import settings
|
| 5 |
examples = [
|
| 6 |
"""
|
| 7 |
Notice of Representation
|
|
|
|
| 73 |
John D Locke, Esq"""
|
| 74 |
]
|
| 75 |
# Replace this with your own checkpoint
|
| 76 |
+
model_checkpoint = settings.MODEL_CHECKPOINT
|
| 77 |
ner_pipeline = pipeline(
|
| 78 |
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
|
| 79 |
)
|
|
|
|
| 89 |
gr.HighlightedText(),
|
| 90 |
examples=examples)
|
| 91 |
|
| 92 |
+
demo.launch(server_name=settings.SERVER_HOST, server_port=settings.PORT)
|
requirements.txt
CHANGED
|
@@ -7,4 +7,5 @@ evaluate==0.4.1
|
|
| 7 |
accelerate==0.25.0
|
| 8 |
seqeval==1.2.2
|
| 9 |
pandas==2.1.4
|
| 10 |
-
gradio==4.13.0
|
|
|
|
|
|
| 7 |
accelerate==0.25.0
|
| 8 |
seqeval==1.2.2
|
| 9 |
pandas==2.1.4
|
| 10 |
+
gradio==4.13.0
|
| 11 |
+
pydantic_settings==2.1.0
|