YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
Usage
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
class NER:
"""
实体命名实体识别
"""
def __init__(self,model_path) -> None:
"""
Args:
model_path:模型地址
"""
self.model_path = model_path
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForTokenClassification.from_pretrained(model_path)
def ner(self,sentence:str) -> list:
"""
命名实体识别
Args:
sentence:要识别的句子
Return:
实体列表:[{'type':'LOC','tokens':[...]},...]
"""
ans = []
for i in range(0,len(sentence),500):
ans = ans + self._ner(sentence[i:i+500])
return ans
def _ner(self,sentence:str) -> list:
if len(sentence) == 0: return []
inputs = self.tokenizer(
sentence, add_special_tokens=True, return_tensors="pt"
)
if torch.cuda.is_available():
self.model = self.model.to(torch.device('cuda:0'))
for key in inputs:
inputs[key] = inputs[key].to(torch.device('cuda:0'))
with torch.no_grad():
logits = self.model(**inputs).logits
predicted_token_class_ids = logits.argmax(-1)
predicted_tokens_classes = [self.model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
entities = []
entity = {}
for idx, token in enumerate(self.tokenizer.tokenize(sentence,add_special_tokens=True)):
if 'B-' in predicted_tokens_classes[idx] or 'S-' in predicted_tokens_classes[idx]:
if len(entity) != 0:
entities.append(entity)
entity = {}
entity['type'] = predicted_tokens_classes[idx].replace('B-','').replace('S-','')
entity['tokens'] = [token]
elif 'I-' in predicted_tokens_classes[idx] or 'E-' in predicted_tokens_classes[idx] or 'M-' in predicted_tokens_classes[idx]:
if len(entity) == 0:
entity['type'] = predicted_tokens_classes[idx].replace('I-','').replace('E-','').replace('M-','')
entity['tokens'] = []
entity['tokens'].append(token)
else:
if len(entity) != 0:
entities.append(entity)
entity = {}
if len(entity) > 0:
entities.append(entity)
return entities
ner_model = NER('lixin12345/chinese-medical-ner')
text = """
患者既往慢阻肺多年;冠心病史6年,平素规律服用心可舒、保心丸等控制可;双下肢静脉血栓3年,保守治疗效果可;左侧腹股沟斜疝无张力修补术后2年。否认"高血压、糖尿病"等慢性病病史,否认"肝炎、结核"等传染病病史及其密切接触史,否认其他手术、重大外伤、输血史,否认"食物、药物、其他"等过敏史,预防接种史随社会。
"""
ans = ner_model.ner(text)
# ans
# DiseaseNameOrComprehensiveCertificate
# 慢阻肺
# DiseaseNameOrComprehensiveCertificate
# 冠心病
# Drug
# 心可舒
# Drug
# 保心丸
# DiseaseNameOrComprehensiveCertificate
# 双下肢静脉血栓
# DiseaseNameOrComprehensiveCertificate
# 左侧腹股沟斜疝
# TreatmentOrPreventionProcedures
# 无张力修补术
# DiseaseNameOrComprehensiveCertificate
# 高血压
# DiseaseNameOrComprehensiveCertificate
# 糖尿病
# DiseaseNameOrComprehensiveCertificate
# 肝炎
# DiseaseNameOrComprehensiveCertificate
# 结核
Source
From hit wi
license: apache-2.0
- Downloads last month
- 437
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.