Spaces:

aimlnerd
/

legal-entity-ner-transformers

Runtime error

App Files Files Community

aimlnerd commited on Jan 9, 2024

Commit

f7558d2

1 Parent(s): c09f7da

update config

Browse files

Files changed (3) hide show

configuration/config.py +3 -80
gradio_ner.py +3 -3
requirements.txt +2 -1

configuration/config.py CHANGED Viewed

@@ -1,88 +1,11 @@
-import spacy
 from functools import lru_cache
-from pydantic import BaseSettings, Field
-from source.datamodel.common import CountryCode, LineOfBusiness
-from source.datamodel.annotation_ranking import Weights, WeightCatalog
 class Settings(BaseSettings):
     SERVER_HOST: str = '0.0.0.0'
-    PORT: int = 3000
-    STOP_TIMEOUT = 120
-    SLEEP_DURATION = 1e-4  # 0.1 ms sleep
-    APP_NAME: str = "MIRA MODELS"
-    MIRA_MODELS_BLOB_PATH: str = "Mira/ml_models"
-    LOCAL_MIRA_MODELS: str = "ml_models"
-    MIRA_INTENT_MODEL: str = "ml_models/intent_classifier/2021-04-09"
-    MARINE_NL_NER_MODEL: str = "ml_models/ner_marine_nl/2021-04-09"
-    MARINE_NL_RB_MODEL: str = "ml_models/ner_marine_nl/rule_based_annotator/rb_annotator.pkl"
-    PROPERTY_NL_NER_MODEL: str = "ml_models/ner_property_nl/ner_v10"
-    PROPERTY_BE_NER_MODEL: str = "ml_models/ner_property_be/ner_v10"
-    PROPERTY_BE_UW_MODEL: str = Field("ml_models/ner_property_be/uw_property_be_dev", env='PROPERTY_BE_UW_MODEL')
-    PROPERTY_NL_UW_MODEL: str = Field("ml_models/ner_property_nl/uw_property_nl_dev", env='PROPERTY_NL_UW_MODEL')
-    ADDRESS_DETECTION_LAXONS: str = "ml_models/address_detection/laxons.json"
-    ADDRESS_DETECTION_TERMS: str = "ml_models/address_detection/terms.json"
-    ADDRESS_DETECTION_BROKER_ADDRESSES: str = "ml_models/address_detection/broker_addresses.json"
-    LAYOUTLM_MODEL: str = "ml_models/layoutlm/layoutlm_model.pth"
-    LAYOUTLM_LABEL_MAPPING: str = "ml_models/layoutlm/labels_mapping.json"
-    LAYOUTLM_TOKENIZER: str = "ml_models/layoutlm/tokenizer"
-    ADDRESS_DETECTION_MAX_LEN: int = 60
-    ADDRESS_INDEX_MIN: int = 40
-    DEEPPARSE_ROOT_DIR: str = "ml_models/deepparse"
-    TSI_THRESHOLD: int = 100000
-    BROKER_MODEL: dict = {
-        'CRF_BROKER_MODEL_PATH': r"source/services/ner_crf/model/crf/30_Nov_2023-14h-broker_pycrf.crfsuite",
-        'WORD_POSITION': 1,
-        #'POS_POSITION': 2,
-        'LEMMA_POSITION': 2,
-        #'NER_POSITION': 3
-        }
-    si_model: dict = {
-        'CRF_SI_MODEL_PATH': r"ml_models/si/crf_23_Jun_2022-11h_inclu_lemma_n_amount_with_eur_gt10k_amount.joblib",
-        'WORD_POSITION': 1,
-        'LEMMA_POSITION': 2,
-        'NER_POSITION': 3,
-        'POS_POSITION': 4
-        }
-    #spacy_pretrained_model_nl_sm = spacy.load('nl_core_news_sm')
-    spacy_pretrained_model_nl_md = spacy.load('nl_core_news_md')
-    layoutlm_config: dict = {'local_rank': -1,
-                             'overwrite_cache': True,
-                             'max_seq_length': 512,
-                             'model_type': 'layoutlm',
-                             'cls_token_box': [0, 0, 0, 0],
-                             'sep_token_box': [1000, 1000, 1000, 1000],
-                             'pad_token_box': [0, 0, 0, 0]}
-def loss_ratio_params():
-    url = "http://0.0.0.0:3000/claim-experience-risk-level/"
-    login = "clerk"
-    pw = "asdfgh"
-    return url, login, pw
-@lru_cache()
-def get_weight_catalog():
-    weight_catalog = WeightCatalog()
-    # PROPERTY BE WEIGHTS
-    weight_catalog.set_weights(
-        LineOfBusiness.property, CountryCode.belgium, 'POLICYHOLDER',
-        Weights(subject=0.7, body=0.2, attachment=0.1))
-    weight_catalog.set_weights(
-        LineOfBusiness.property, CountryCode.belgium, 'BROKER',
-        Weights(subject=0.1, body=0.6, attachment=0.2))
-    # PROPERTY NL WEIGHTS
-    weight_catalog.set_weights(
-        LineOfBusiness.property, CountryCode.netherlands, 'POLICYHOLDER',
-        Weights(subject=0.7, body=0.2, attachment=0.1))
-    weight_catalog.set_weights(
-        LineOfBusiness.property, CountryCode.netherlands, 'BROKER',
-        Weights(subject=0.1, body=0.6, attachment=0.2))
-    return weight_catalog
 @lru_cache()

 from functools import lru_cache
+from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
     SERVER_HOST: str = '0.0.0.0'
+    PORT: int = 7860
+    MODEL_CHECKPOINT: str = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
 @lru_cache()

gradio_ner.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import logging
 from transformers import pipeline
 examples = [
 """
 Notice of Representation
@@ -73,7 +73,7 @@ Notice of Representation
  John D Locke, Esq"""
 ]
 # Replace this with your own checkpoint
-model_checkpoint = "aimlnerd/bert-finetuned-legalentity-ner-accelerate"
 ner_pipeline = pipeline(
     "token-classification", model=model_checkpoint, aggregation_strategy="simple"
 )
@@ -89,4 +89,4 @@ demo = gr.Interface(ner,
              gr.HighlightedText(),
              examples=examples)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import logging
 from transformers import pipeline
+from configuration.config import settings
 examples = [
 """
 Notice of Representation
  John D Locke, Esq"""
 ]
 # Replace this with your own checkpoint
+model_checkpoint = settings.MODEL_CHECKPOINT
 ner_pipeline = pipeline(
     "token-classification", model=model_checkpoint, aggregation_strategy="simple"
 )
              gr.HighlightedText(),
              examples=examples)
+demo.launch(server_name=settings.SERVER_HOST, server_port=settings.PORT)

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ evaluate==0.4.1
 accelerate==0.25.0
 seqeval==1.2.2
 pandas==2.1.4
-gradio==4.13.0

 accelerate==0.25.0
 seqeval==1.2.2
 pandas==2.1.4
+gradio==4.13.0
+pydantic_settings==2.1.0