RAG / app.py
khushidhar1210's picture
app.py
a0a6574 verified
import streamlit as st
import geopandas as gpd
import shapely
from shapely.geometry import Polygon
from io import BytesIO
from transformers import pipeline, RagTokenizer, RagRetriever, RagSequenceForGeneration
import os
import zipfile
import json
import xml.etree.ElementTree as ET
from shapely.ops import transform
from pyproj import Proj, transform as proj_transform
# Load pre-trained Hugging Face RAG model
def load_rag_model():
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq")
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")
return tokenizer, retriever, model
tokenizer, retriever, model = load_rag_model()
# Function to load shapefile (SHP, DBF, etc.)
def load_shapefile(uploaded_file):
if uploaded_file is not None:
if uploaded_file.name.endswith('.zip'):
with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
zip_ref.extractall("extracted_files")
shp_file = [f for f in os.listdir("extracted_files") if f.endswith(".shp")][0]
shapefile_path = os.path.join("extracted_files", shp_file)
else:
shapefile_path = uploaded_file
return gpd.read_file(shapefile_path)
return None
# Function to get land summary based on a KML or KMZ file
def get_land_summary_by_kml(kml_file):
tree = ET.parse(kml_file)
root = tree.getroot()
ns = {'kml': 'http://www.opengis.net/kml/2.2'}
coordinates = []
for coord in root.findall('.//kml:coordinates', ns):
coords = coord.text.strip().split()
coordinates.extend([(float(x.split(',')[0]), float(x.split(',')[1])) for x in coords])
# Create a polygon from coordinates
poly = Polygon(coordinates)
return poly.area
# Function to summarize floodland areas, acreage, and usable land
def summarize_land_data(shapefile_data):
# Example: Getting floodland areas, acreage, and usable land from shapefile data
total_area = shapefile_data['geometry'].area.sum() # Sum of all area
usable_land = shapefile_data[shapefile_data['use_type'] == 'Usable Land'] # Filter usable land
usable_land_area = usable_land['geometry'].area.sum() # Area of usable land
return total_area, usable_land_area
# Function to generate a response using RAG model
def generate_rag_response(query):
inputs = tokenizer(query, return_tensors="pt")
retriever_outputs = retriever(inputs['input_ids'], return_tensors="pt")
generated_ids = model.generate(input_ids=inputs['input_ids'],
context_input_ids=retriever_outputs['context_input_ids'],
context_attention_mask=retriever_outputs['context_attention_mask'])
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
return response
# Streamlit app interface
def main():
st.title("Geospatial Data Summary and Chatbot")
# Buttons for interaction
summary_button = st.button("Get Land Summary")
chatbot_button = st.button("Chat with the Bot")
# Upload file option
uploaded_file = st.file_uploader("Upload SHP/DBF/ZIP file", type=["zip", "shp", "dbf"])
kml_file = st.file_uploader("Upload KML or KMZ boundary file", type=["kml", "kmz"])
if summary_button:
if uploaded_file is not None:
# Load the shapefile
shapefile_data = load_shapefile(uploaded_file)
if shapefile_data is not None:
total_area, usable_land_area = summarize_land_data(shapefile_data)
st.write(f"Total floodland area: {total_area:.2f} sq meters")
st.write(f"Usable land area: {usable_land_area:.2f} sq meters")
elif kml_file is not None:
# Process KML for land summary
land_area = get_land_summary_by_kml(kml_file)
st.write(f"Floodland area in KML boundary: {land_area:.2f} sq meters")
else:
st.write("Please upload either a shapefile or KML/KMZ file.")
elif chatbot_button:
if uploaded_file is not None or kml_file is not None:
query = st.text_input("Ask the bot a question about the data:")
if query:
answer = generate_rag_response(query)
st.write(answer)
else:
st.write("Please upload a file before interacting with the chatbot.")
if __name__ == "__main__":
main()