import streamlit as st import geopandas as gpd import shapely from shapely.geometry import Polygon from io import BytesIO from transformers import pipeline, RagTokenizer, RagRetriever, RagSequenceForGeneration import os import zipfile import json import xml.etree.ElementTree as ET from shapely.ops import transform from pyproj import Proj, transform as proj_transform # Load pre-trained Hugging Face RAG model def load_rag_model(): tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq") retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq") model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq") return tokenizer, retriever, model tokenizer, retriever, model = load_rag_model() # Function to load shapefile (SHP, DBF, etc.) def load_shapefile(uploaded_file): if uploaded_file is not None: if uploaded_file.name.endswith('.zip'): with zipfile.ZipFile(uploaded_file, 'r') as zip_ref: zip_ref.extractall("extracted_files") shp_file = [f for f in os.listdir("extracted_files") if f.endswith(".shp")][0] shapefile_path = os.path.join("extracted_files", shp_file) else: shapefile_path = uploaded_file return gpd.read_file(shapefile_path) return None # Function to get land summary based on a KML or KMZ file def get_land_summary_by_kml(kml_file): tree = ET.parse(kml_file) root = tree.getroot() ns = {'kml': 'http://www.opengis.net/kml/2.2'} coordinates = [] for coord in root.findall('.//kml:coordinates', ns): coords = coord.text.strip().split() coordinates.extend([(float(x.split(',')[0]), float(x.split(',')[1])) for x in coords]) # Create a polygon from coordinates poly = Polygon(coordinates) return poly.area # Function to summarize floodland areas, acreage, and usable land def summarize_land_data(shapefile_data): # Example: Getting floodland areas, acreage, and usable land from shapefile data total_area = shapefile_data['geometry'].area.sum() # Sum of all area usable_land = shapefile_data[shapefile_data['use_type'] == 'Usable Land'] # Filter usable land usable_land_area = usable_land['geometry'].area.sum() # Area of usable land return total_area, usable_land_area # Function to generate a response using RAG model def generate_rag_response(query): inputs = tokenizer(query, return_tensors="pt") retriever_outputs = retriever(inputs['input_ids'], return_tensors="pt") generated_ids = model.generate(input_ids=inputs['input_ids'], context_input_ids=retriever_outputs['context_input_ids'], context_attention_mask=retriever_outputs['context_attention_mask']) response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) return response # Streamlit app interface def main(): st.title("Geospatial Data Summary and Chatbot") # Buttons for interaction summary_button = st.button("Get Land Summary") chatbot_button = st.button("Chat with the Bot") # Upload file option uploaded_file = st.file_uploader("Upload SHP/DBF/ZIP file", type=["zip", "shp", "dbf"]) kml_file = st.file_uploader("Upload KML or KMZ boundary file", type=["kml", "kmz"]) if summary_button: if uploaded_file is not None: # Load the shapefile shapefile_data = load_shapefile(uploaded_file) if shapefile_data is not None: total_area, usable_land_area = summarize_land_data(shapefile_data) st.write(f"Total floodland area: {total_area:.2f} sq meters") st.write(f"Usable land area: {usable_land_area:.2f} sq meters") elif kml_file is not None: # Process KML for land summary land_area = get_land_summary_by_kml(kml_file) st.write(f"Floodland area in KML boundary: {land_area:.2f} sq meters") else: st.write("Please upload either a shapefile or KML/KMZ file.") elif chatbot_button: if uploaded_file is not None or kml_file is not None: query = st.text_input("Ask the bot a question about the data:") if query: answer = generate_rag_response(query) st.write(answer) else: st.write("Please upload a file before interacting with the chatbot.") if __name__ == "__main__": main()