Spaces:
Sleeping
Sleeping
Upload 15 files
Browse files- api_client/__init__.py +0 -0
- api_client/api.py +12 -0
- config.py +11 -0
- data/__init__.py +0 -0
- data/schemaClass.py +16 -0
- dockerfile +10 -0
- main.py +28 -0
- requirements.txt +0 -0
- routers/downloadingDocs.py +10 -0
- routers/textProcessing.py +27 -0
- usecase/__init__.py +0 -0
- usecase/classificationNode.py +22 -0
- usecase/entity_extractionNode.py +21 -0
- usecase/summarizationNode.py +21 -0
- usecase/workflow.py +25 -0
api_client/__init__.py
ADDED
File without changes
|
api_client/api.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from config import settings
|
3 |
+
from langchain_groq import ChatGroq
|
4 |
+
|
5 |
+
os.environ["GROQ_API_KEY"] = settings.Groq_api_key
|
6 |
+
def llm():
|
7 |
+
llm = ChatGroq(
|
8 |
+
model= "llama-3.3-70b-versatile",
|
9 |
+
temperature=0
|
10 |
+
)
|
11 |
+
return llm
|
12 |
+
llm=llm()
|
config.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings
|
2 |
+
|
3 |
+
class Settings(BaseSettings):
|
4 |
+
Groq_api_key:str
|
5 |
+
|
6 |
+
|
7 |
+
class Config:
|
8 |
+
env_file = ".env"
|
9 |
+
|
10 |
+
settings = Settings()
|
11 |
+
|
data/__init__.py
ADDED
File without changes
|
data/schemaClass.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List,Optional
|
3 |
+
class State(BaseModel):
|
4 |
+
text: str
|
5 |
+
classification: Optional[str] = None
|
6 |
+
entities: Optional[List[str]] = None
|
7 |
+
summary: Optional[str] = None
|
8 |
+
|
9 |
+
|
10 |
+
class TextInput(BaseModel):
|
11 |
+
text:str
|
12 |
+
|
13 |
+
class TextProcessingResponseAPI(BaseModel):
|
14 |
+
classification: str
|
15 |
+
entities: list[str]
|
16 |
+
summary: str
|
dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.10-slim
|
5 |
+
WORKDIR /app
|
6 |
+
COPY requirements.txt .
|
7 |
+
RUN pip install --upgrade pip && pip install -r requirements.txt --no-cache-dir
|
8 |
+
COPY . .
|
9 |
+
|
10 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
main.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from routers import textProcessing
|
4 |
+
from routers import downloadingDocs
|
5 |
+
|
6 |
+
# Create the FastAPI app
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
|
10 |
+
origins = ["*"] # Allow any website to access the API; you can specify specific domains if needed
|
11 |
+
|
12 |
+
# Add CORS middleware to handle cross-origin requests
|
13 |
+
app.add_middleware(
|
14 |
+
CORSMiddleware,
|
15 |
+
allow_origins=origins,
|
16 |
+
allow_credentials=True,
|
17 |
+
allow_methods=["*"], # Allow all methods (GET, POST, etc.), adjust based on your needs
|
18 |
+
allow_headers=["*"], # Allow all headers in requests
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
# Include the text processing router
|
23 |
+
app.include_router(textProcessing.router)
|
24 |
+
app.include_router(downloadingDocs.router)
|
25 |
+
|
26 |
+
@app.get("/")
|
27 |
+
def read_root():
|
28 |
+
return {"message": "Welcome to the Text Processing API!"}
|
requirements.txt
ADDED
Binary file (3.34 kB). View file
|
|
routers/downloadingDocs.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from data.schemaClass import TextInput
|
2 |
+
from fastapi import APIRouter
|
3 |
+
from fastapi.responses import FileResponse
|
4 |
+
|
5 |
+
router = APIRouter()
|
6 |
+
|
7 |
+
@router.get("/download_report")
|
8 |
+
def download_rep():
|
9 |
+
file_path = "analysis_report.docx"
|
10 |
+
return FileResponse(path=file_path,filename="Text_Analysis_Report.docx",media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
routers/textProcessing.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from usecase.workflow import langgraphPipeline
|
2 |
+
from data.schemaClass import TextInput
|
3 |
+
from fastapi import APIRouter, HTTPException
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
+
import streamlit as st
|
6 |
+
router = APIRouter()
|
7 |
+
|
8 |
+
@router.post("/process_text")
|
9 |
+
def process_text(input_data:TextInput):
|
10 |
+
try:
|
11 |
+
pipeline_app = langgraphPipeline()
|
12 |
+
|
13 |
+
# state_input = {"text":input_data.text}
|
14 |
+
state_input = {"text":"hi"}
|
15 |
+
|
16 |
+
# print(f"{input_data.text} type is {type(input_data.text)}")
|
17 |
+
result = pipeline_app.invoke(state_input)
|
18 |
+
|
19 |
+
return JSONResponse(
|
20 |
+
content={
|
21 |
+
"classification": result.get("classification", ""),
|
22 |
+
"entities": result.get("entities", []),
|
23 |
+
"summary": result.get("summary", "")
|
24 |
+
}
|
25 |
+
)
|
26 |
+
except Exception as e:
|
27 |
+
raise HTTPException(status_code=500,detail=str(e))
|
usecase/__init__.py
ADDED
File without changes
|
usecase/classificationNode.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from data.schemaClass import State
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from langchain.schema import HumanMessage
|
4 |
+
from api_client.api import llm
|
5 |
+
|
6 |
+
|
7 |
+
def classification_node_usecase(state: State):
|
8 |
+
"""
|
9 |
+
Classify the text into Categories: News, Blog, Research, or Other
|
10 |
+
"""
|
11 |
+
prompt = PromptTemplate(
|
12 |
+
input_variables=["text"],
|
13 |
+
template="Classify the following text into one of the categories: News, Blog, Research, or Other.\n\nText:{text}\n\nCategory:"
|
14 |
+
)
|
15 |
+
|
16 |
+
message = HumanMessage(content=prompt.format(text=state.text))
|
17 |
+
classification = llm.invoke([message]).content.strip()
|
18 |
+
|
19 |
+
# Update the state with the classification result
|
20 |
+
state.classification = classification
|
21 |
+
|
22 |
+
return state # Return the updated state with the classification
|
usecase/entity_extractionNode.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from data.schemaClass import State
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from langchain.schema import HumanMessage
|
4 |
+
from api_client.api import llm
|
5 |
+
|
6 |
+
|
7 |
+
def entity_extraction_node_usecase(state: State):
|
8 |
+
"""
|
9 |
+
Extract all the entities (Person, Organization, Location) from the text
|
10 |
+
"""
|
11 |
+
prompt = PromptTemplate(
|
12 |
+
input_variables=["text"],
|
13 |
+
template="Extract all the entities (Person, Organization, Location) from the following text. Provide the result as a comma-separated list.\n\nText:{text}\n\nEntities:"
|
14 |
+
)
|
15 |
+
message = HumanMessage(content=prompt.format(text=state.text)) # Access state.text instead of state["text"]
|
16 |
+
entities = llm.invoke([message]).content.strip().split(", ") # Get a list of entities
|
17 |
+
|
18 |
+
# Update the state with the extracted entities
|
19 |
+
state.entities = entities
|
20 |
+
|
21 |
+
return state # Return the updated state with the entities
|
usecase/summarizationNode.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from data.schemaClass import State
|
2 |
+
from langchain.prompts import PromptTemplate
|
3 |
+
from langchain.schema import HumanMessage
|
4 |
+
from api_client.api import llm
|
5 |
+
|
6 |
+
|
7 |
+
def summarization_node_usecase(state: State):
|
8 |
+
"""
|
9 |
+
Summarize the text in one short sentence
|
10 |
+
"""
|
11 |
+
prompt = PromptTemplate(
|
12 |
+
input_variables=["text"],
|
13 |
+
template="Summarize the following text in one short sentence.\n\nText:{text}\n\nSummary:"
|
14 |
+
)
|
15 |
+
message = HumanMessage(content=prompt.format(text=state.text)) # Access state.text instead of state["text"]
|
16 |
+
summary = llm.invoke([message]).content.strip()
|
17 |
+
|
18 |
+
# Update the state with the summary
|
19 |
+
state.summary = summary
|
20 |
+
|
21 |
+
return state # Return the updated state with the summary
|
usecase/workflow.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langgraph.graph import StateGraph, END
|
2 |
+
from data.schemaClass import State
|
3 |
+
from usecase.classificationNode import classification_node_usecase
|
4 |
+
from usecase.entity_extractionNode import entity_extraction_node_usecase
|
5 |
+
from usecase.summarizationNode import summarization_node_usecase
|
6 |
+
|
7 |
+
|
8 |
+
def langgraphPipeline():
|
9 |
+
workflow = StateGraph(State)
|
10 |
+
# Add nodes to the graph
|
11 |
+
workflow.add_node("classification_node", classification_node_usecase)
|
12 |
+
workflow.add_node("entity_extraction", entity_extraction_node_usecase)
|
13 |
+
workflow.add_node("summarization", summarization_node_usecase)
|
14 |
+
|
15 |
+
# Add edges to the graph
|
16 |
+
workflow.set_entry_point("classification_node") # Set the entry point of the graph
|
17 |
+
workflow.add_edge("classification_node", "entity_extraction")
|
18 |
+
workflow.add_edge("entity_extraction", "summarization")
|
19 |
+
workflow.add_edge("summarization", END)
|
20 |
+
|
21 |
+
# Compile the graph
|
22 |
+
app = workflow.compile()
|
23 |
+
|
24 |
+
return app
|
25 |
+
|