import os, re, faiss, zipfile, warnings, gradio as gr from pathlib import Path from typing import List from sentence_transformers import SentenceTransformer from PyPDF2 import PdfReader from docx import Document from docx.opc.exceptions import PackageNotFoundError from openai import OpenAI FILE = Path("my_resume.pdf") def read_pdf(p: Path) -> str: return " ".join(pg.extract_text() or "" for pg in PdfReader(p).pages) def read_docx(p: Path) -> str: return " ".join(par.text for par in Document(p).paragraphs if par.text.strip()) try: raw = read_docx(FILE) except (PackageNotFoundError, KeyError, zipfile.BadZipFile): warnings.warn("Reading résumé as PDF") raw = read_pdf(FILE) text = re.sub(r"\s+", " ", raw).strip() LINK_MD = 'LinkedIn Profile' BLOG_MD = 'Technical Blog' ARCH_NOTE = ( "ARCHITECTURE NOTE – The bot follows a Retrieval-Augmented Generation " "(RAG) design: PDF → 180-token chunks → MiniLM-L6 embeddings → FAISS " "similarity search → GPT-3.5-turbo answer constrained to context." ) text += f" LinkedIn: {LINK_MD} Blog: {BLOG_MD} {ARCH_NOTE}" def chunkify(t: str, max_tok: int = 180) -> List[str]: out, buf, n = [], [], 0 for s in re.split(r"(?<=[.!?])\s+", t): w = len(s.split()) if n + w > max_tok: out.append(" ".join(buf)); buf, n = [], 0 buf.append(s); n += w if buf: out.append(" ".join(buf)) return out CHUNKS = chunkify(text) embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") vecs = embedder.encode(CHUNKS, convert_to_numpy=True) faiss.normalize_L2(vecs) index = faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs) def retrieve(q: str, k: int = 4): qv = embedder.encode([q], convert_to_numpy=True); faiss.normalize_L2(qv) sims, idx = index.search(qv, k) return sims[0], [CHUNKS[i] for i in idx[0]] client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) MODEL = "gpt-3.5-turbo-0125" SYSTEM = ("You are a helpful assistant. Answer ONLY with facts in the context. " "If missing, reply exactly: \"I don't know based on the resume.\"") def overlap(a: str, b: str) -> bool: return bool(set(re.findall(r"\w+", a.lower())) & set(re.findall(r"\w+", b.lower()))) SAFE = {"experience","project","certification","certifications","education", "skill","skills","summary","company","companies","role","linkedin", "website","blog","portfolio","architecture"} STATIC_ANSWERS = { "linkedin": LINK_MD, "linked-in": LINK_MD, "blog": BLOG_MD, "architecture": ARCH_NOTE } def generate(msg: str) -> str: lower_msg = msg.lower().strip() for key, val in STATIC_ANSWERS.items(): if key in lower_msg: return val if not (SAFE & set(re.findall(r"\w+", lower_msg))): return "Please ask something related to my résumé." sims, ctxs = retrieve(msg) min_sim = 0.10 if len(msg.split()) < 3 else 0.25 if max(sims) < min_sim: return "I don't know based on the resume." ctx = "\n".join(ctxs) ans = client.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": SYSTEM}, {"role": "user", "content": f"Context:\n{ctx}"}, {"role": "user", "content": f"Question: {msg}"} ], max_tokens=256, temperature=0.2 ).choices[0].message.content.strip() return ans if overlap(ans, ctx) else "I don't know based on the resume." quick = [ "Professional Summary","Education details","Experience", "Certifications","Skills","LinkedIn","Blog","Architecture" ] def user_submit(msg, hist): if hist is None: hist = [] ans = generate(msg) hist = hist + [{"role":"user","content":msg}, {"role":"assistant","content":ans}] return "", hist, hist def quick_send(hist, q): if hist is None: hist = [] ans = generate(q) hist = hist + [{"role":"user","content":q}, {"role":"assistant","content":ans}] return hist, hist with gr.Blocks(theme="soft") as demo: with gr.Row(equal_height=True): with gr.Column(min_width=170, scale=0): gr.Markdown("### Quick questions") btns = [gr.Button(q) for q in quick] with gr.Column(scale=4): chat = gr.Chatbot(type="messages", label="SriGPT - Ask about my resume ", height=520) inp = gr.Textbox(placeholder="RAG based LM", show_label=False) state = gr.State([]) inp.submit(user_submit, [inp, state], [inp, chat, state]) for b, q in zip(btns, quick): b.click(lambda hist, q=q: quick_send(hist, q), [state], [chat, state]) demo.launch()