|  | import torch | 
					
						
						|  | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | 
					
						
						|  |  | 
					
						
						|  | import spacy | 
					
						
						|  | nlp = spacy.load('en_core_web_sm') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def create_nest_sentences(document:str, token_max_length = 1024): | 
					
						
						|  | nested = [] | 
					
						
						|  | sent = [] | 
					
						
						|  | length = 0 | 
					
						
						|  | tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli') | 
					
						
						|  | tokens = nlp(document) | 
					
						
						|  |  | 
					
						
						|  | for sentence in tokens.sents: | 
					
						
						|  | tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] | 
					
						
						|  | length += len(tokens_in_sentence) | 
					
						
						|  |  | 
					
						
						|  | if length < token_max_length: | 
					
						
						|  | sent.append(sentence) | 
					
						
						|  | else: | 
					
						
						|  | nested.append(sent) | 
					
						
						|  | sent = [] | 
					
						
						|  | length = 0 | 
					
						
						|  |  | 
					
						
						|  | if sent: | 
					
						
						|  | nested.append(sent) | 
					
						
						|  | return nested | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def load_summary_model(): | 
					
						
						|  | model_name = "facebook/bart-large-mnli" | 
					
						
						|  | summarizer = pipeline(task='summarization', model=model_name) | 
					
						
						|  | return summarizer | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:int): | 
					
						
						|  | output = summarizer(sequence, num_beams=4, max_length=maximum_tokens, min_length=minimum_tokens, do_sample=False) | 
					
						
						|  | return output[0].get('summary_text') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def load_model(): | 
					
						
						|  | model_name = "facebook/bart-large-mnli" | 
					
						
						|  | tokenizer = AutoTokenizer.from_pretrained(model_name) | 
					
						
						|  | model = AutoModelForSequenceClassification.from_pretrained(model_name) | 
					
						
						|  | classifier = pipeline(task='zero-shot-classification', model=model, tokenizer=tokenizer, framework='pt') | 
					
						
						|  | return classifier | 
					
						
						|  |  | 
					
						
						|  | def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool): | 
					
						
						|  | outputs = classifier(sequence, labels, multi_label=multi_class) | 
					
						
						|  | return outputs['labels'], outputs['scores'] | 
					
						
						|  |  | 
					
						
						|  |  |