Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import torch | |
| import streamlit as st | |
| from transformers import BertTokenizer | |
| from transformers import BertForSequenceClassification | |
| from sklearn.preprocessing import LabelEncoder | |
| from keras.utils import pad_sequences | |
| from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler | |
| st.markdown("### Paper category classification") | |
| st.markdown("<img width=200px src='https://grandgames.net/img/upload/0d153888a24eb5b8c0195495cd83d0dd.jpg'>", unsafe_allow_html=True) | |
| # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter | |
| def load_model_and_tokenizer(): | |
| tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
| model = BertForSequenceClassification.from_pretrained( | |
| "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. | |
| num_labels = 44,) | |
| model.load_state_dict(torch.load("model_last_version.pt", map_location=torch.device('cpu'))) | |
| return model, tokenizer | |
| model, tokenizer = load_model_and_tokenizer() | |
| title = st.text_area("INPUT TITLE HERE") | |
| abstract = st.text_area("INPUT ABSTRACT HERE") | |
| # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент | |
| if len(title) == 0 and len(abstract) == 0: | |
| st.markdown(f"Could you input paper title/abstract :)") | |
| elif len(title) == 0 and len(abstract) > 0: | |
| st.markdown(f"Could you input paper title :)") | |
| else: | |
| MAX_LEN = 64 | |
| # Преобразуем название статьи в токены | |
| tokens = tokenizer(title, padding=True, truncation=True, return_tensors="pt") | |
| # Получаем предсказание модели для названия статьи и абстракта (если есть) | |
| input_ids = tokens['input_ids'] | |
| attention_mask = tokens['attention_mask'] | |
| logits = model(input_ids, attention_mask)[0] | |
| tags_names = ['Accelerator Physics', | |
| 'adap-org', | |
| "adap-org", | |
| 'Algebra-Geometry', | |
| 'Astro-physics', | |
| "Astro-physics", | |
| 'Chao-dynamics', | |
| 'Chemistry-physics', | |
| 'cmp-lg', | |
| "cmp-lg", | |
| 'comp-gas', | |
| 'cond-mat', | |
| "cond-mat", | |
| 'Computer Science', | |
| 'dg-ga', | |
| 'Economics', | |
| 'eess', | |
| 'funct-an', | |
| 'gr-qc', | |
| "gr-qc", | |
| 'hep-ex', | |
| "hep-ex", | |
| 'hep-lat', | |
| "hep-lat", | |
| 'hep-ph', | |
| "hep-ph", | |
| 'hep-th', | |
| "hep-th", | |
| 'Math', | |
| 'math-ph', | |
| 'mtrl-th', | |
| 'nlin', | |
| 'nucl-ex', | |
| 'nucl-th', | |
| "nucl-th", | |
| 'patt-sol', | |
| 'Physics', | |
| 'q-alg', | |
| 'Quantitie-biology', | |
| 'q-fin', | |
| 'quant-ph', | |
| "quant-ph", | |
| 'solv-int', | |
| 'Statistics'] | |
| if abstract: | |
| abstract_tokens = tokenizer(abstract, padding=True, truncation=True, return_tensors="pt") | |
| abstract_input_ids = abstract_tokens['input_ids'] | |
| abstract_attention_mask = abstract_tokens['attention_mask'] | |
| abstract_logits = model(abstract_input_ids, abstract_attention_mask)[0] | |
| logits += abstract_logits | |
| # Получаем вероятности и сортируем их в порядке убывания | |
| probs = torch.softmax(logits, dim=-1).squeeze() | |
| sorted_probs, sorted_indices = torch.sort(probs, descending=True) | |
| # Считаем сумму вероятностей | |
| sum_probs = 0.0 | |
| top_classes = [] | |
| for i in range(len(sorted_probs)): | |
| sum_probs += sorted_probs[i] | |
| if sum_probs > 0.95 or sorted_probs[i] < 0.001: | |
| break | |
| top_classes.append((tags_names[sorted_indices[i].item()], sorted_probs[i].item())) | |
| # Выводим список тем с их вероятностями | |
| # from transformers import pipeline | |
| # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl") | |
| raw_predictions = top_classes#le.inverse_transform(prediction)#pipe(text) | |
| # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost | |
| st.markdown(f"Possible categories with their probabilities for this paper : {raw_predictions}") | |
| # выводим результаты модели в текстовое поле, на потеху пользователю |