Spaces:
Sleeping
Sleeping
| import requests | |
| import numpy as np | |
| import arxiv | |
| from langchain.utilities import ArxivAPIWrapper | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| HF_API_TOKEN = os.environ.get('HF_API_TOKEN') | |
| HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} | |
| summarizer_model_name = "microsoft/Phi-3-mini-4k-instruct" | |
| feature_extractor_model_name = "ml6team/keyphrase-extraction-kbir-inspec" | |
| ranker_model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| def hf_api_call(model_name, payload): | |
| API_URL = f"https://api-inference.huggingface.co/models/{model_name}" | |
| response = requests.post(API_URL, headers=HEADERS, json=payload) | |
| return response.json() | |
| def extract_keywords(abstract): | |
| payload = {"inputs": abstract} | |
| result = hf_api_call(feature_extractor_model_name, payload) | |
| keyphrases = np.unique([item['word'].strip() for item in result]) | |
| print(keyphrases) | |
| return keyphrases | |
| def search_papers(keywords, n_papers): | |
| arxiv_agent = ArxivAPIWrapper(top_k_results=n_papers, doc_content_chars_max=None, load_max_docs=n_papers+3) | |
| query = " ".join(keywords) | |
| results = arxiv_agent.get_summaries_as_docs(query) | |
| return results | |
| def re_rank_papers(query_abstract, papers, n_papers): | |
| summaries = {paper.page_content: {"Title": paper.metadata['Title']} for paper in papers} | |
| summ_list = [] | |
| payload = { | |
| "inputs": { | |
| "source_sentence": query_abstract, | |
| "sentences": list(summaries.keys()) | |
| } | |
| } | |
| result = hf_api_call(ranker_model_name, payload) | |
| for i, key in enumerate(summaries.keys()): | |
| summ_list.append((key, summaries[key]["Title"], result[i])) | |
| print((key, summaries[key]["Title"], result[i])) | |
| summ_list = sorted(summ_list, key=lambda x: x[2], reverse=True) | |
| summaries = {} | |
| for i in range(n_papers) : | |
| summaries[summ_list[i][0]] = { | |
| "Title" : summ_list[i][1], | |
| "score" : summ_list[i][2] | |
| } | |
| return summaries | |
| def format_abstracts_as_references(papers): | |
| cite_text = "" | |
| i = 0 | |
| for key in papers.keys() : | |
| citation = f"{i+1}" | |
| cite_text = f"{cite_text}[{citation}]: {key}\n" | |
| i+=1 | |
| return cite_text | |
| def format_authors(authors): | |
| formatted_authors = [] | |
| for author in authors: | |
| name_parts = author.name.split() | |
| last_name = name_parts[-1] | |
| initials = ''.join([name[0] for name in name_parts[:-1]]) | |
| formatted_authors.append(f"{last_name} {initials}") | |
| return ', '.join(formatted_authors) | |
| def to_vancouver_style(entry): | |
| authors = format_authors(entry.authors) | |
| title = entry.title | |
| journal = 'arXiv' | |
| year = entry.published.year | |
| arxiv_id = entry.get_short_id() | |
| return f"{authors}. {title}. {journal}. {year}. arXiv:{arxiv_id}" | |
| def generate_refs(papers) : | |
| client = arxiv.Client() | |
| results = [] | |
| for key in papers.keys() : | |
| search = arxiv.Search( | |
| query = papers[key]["Title"], | |
| max_results = 1, | |
| sort_by = arxiv.SortCriterion.Relevance | |
| ) | |
| results.append(list(client.results(search))[0]) | |
| references = [to_vancouver_style(entry) for entry in results] | |
| ids = [entry.get_short_id() for entry in results] | |
| i = 0 | |
| refs = "\n\nReferences:\n" | |
| for reference in references: | |
| refs = f"{refs}[{i+1}] {reference}\n" | |
| i+=1 | |
| return refs, ids | |
| def generate_related_work(query_abstract, ranked_papers, base_prompt, sentence_plan, n_words): | |
| data = f"Abstract: {query_abstract} \n {format_abstracts_as_references(ranked_papers)} \n Plan: {sentence_plan}" | |
| complete_prompt = f"{base_prompt}\n```{data}```" | |
| payload = { | |
| "inputs": complete_prompt, | |
| "parameters": { | |
| "max_new_tokens": n_words, | |
| "temperature": 0.01, | |
| "return_full_text": False, | |
| "do_sample": False | |
| } | |
| } | |
| result = hf_api_call(summarizer_model_name, payload) | |
| print(result) | |
| related_work = result[0]['generated_text'] | |
| refs, ids = generate_refs(ranked_papers) | |
| related_work += refs | |
| with open("literature review.txt", "w") as f: | |
| f.write(related_work) | |
| return related_work, ids |