Spaces:

iamomtiwari
/

Ml_labs

Running

App Files Files Community

Ml_labs / app.py

iamomtiwari

Update app.py

95641e3 verified about 2 months ago

raw

history blame contribute delete

3.78 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.manifold import TSNE
	from sklearn.decomposition import PCA
	import joblib
	from tensorflow.keras.models import load_model
	from keras.models import load_model

	# Load data and models
	df = pd.read_csv("dataset.csv")
	scaler = joblib.load("scaler.pkl")
	encoder = load_model("encoder.h5")
	autoencoder = load_model("autoencoder.h5", compile=False)

	# Safely extract correct feature columns for scaler
	if hasattr(scaler, 'feature_names_in_'):
	feature_cols = scaler.feature_names_in_.tolist()
	else:
	# Fallback: exclude known non-feature columns
	feature_cols = df.columns.difference([
	'COMM_NAME', 'COMM_CODE', 'COMM_WT', 'Cluster', 'Reconstruction_Error',
	'Anomaly', 'tSNE_1', 'tSNE_2', 'PCA_1', 'PCA_2'
	]).tolist()

	# Transform only the original features the scaler expects
	X_scaled = scaler.transform(df[feature_cols])
	encoded_data = encoder.predict(X_scaled)

	# Add t-SNE and PCA embeddings if not already present
	if 'tSNE_1' not in df.columns:
	tsne = TSNE(n_components=2, random_state=42)
	tsne_result = tsne.fit_transform(encoded_data)
	df['tSNE_1'] = tsne_result[:, 0]
	df['tSNE_2'] = tsne_result[:, 1]

	if 'PCA_1' not in df.columns:
	pca = PCA(n_components=2)
	pca_result = pca.fit_transform(encoded_data)
	df['PCA_1'] = pca_result[:, 0]
	df['PCA_2'] = pca_result[:, 1]

	# Gradio UI functions
	def plot_cluster_visualization(plot_type, cluster_id):
	plt.figure(figsize=(8, 6))
	x, y = ('tSNE_1', 'tSNE_2') if plot_type == 't-SNE' else ('PCA_1', 'PCA_2')
	for cluster in sorted(df['Cluster'].unique()):
	subset = df[df['Cluster'] == cluster]
	plt.scatter(subset[x], subset[y], label=f'Cluster {cluster}', s=60)

	if cluster_id != 'All':
	cluster_id = int(cluster_id)
	selected = df[df['Cluster'] == cluster_id]
	plt.scatter(selected[x], selected[y], edgecolor='black', facecolor='none', s=120, label='Selected Cluster')

	plt.title(f"{plot_type} Visualization of Clusters")
	plt.xlabel(x)
	plt.ylabel(y)
	plt.legend()
	plt.grid(True)
	return plt.gcf()

	def show_cluster_commodities(cluster_id, top_n):
	if cluster_id == 'All':
	result = df.sort_values(by='Reconstruction_Error', ascending=False)
	else:
	cluster_id = int(cluster_id)
	result = df[df['Cluster'] == cluster_id].sort_values(by='Reconstruction_Error', ascending=False)
	return result[['COMM_NAME', 'Cluster', 'Reconstruction_Error', 'Anomaly']].head(top_n)

	def show_anomalies(top_n):
	anomalies = df[df['Anomaly']].sort_values(by='Reconstruction_Error', ascending=False)
	return anomalies[['COMM_NAME', 'Cluster', 'Reconstruction_Error']].head(top_n)

	# Gradio UI layout
	with gr.Blocks() as demo:
	gr.Markdown("# 📊 Commodity Index Clustering + Anomaly Detection (Autoencoder)")

	with gr.Row():
	plot_type = gr.Radio(["t-SNE", "PCA"], label="Plot Type", value="t-SNE")
	cluster_choice = gr.Dropdown(['All'] + list(map(str, sorted(df['Cluster'].unique()))), label="Cluster", value='All')
	top_n = gr.Slider(5, 50, step=1, label="Top N Results", value=10)

	with gr.Row():
	plot_output = gr.Plot()
	table_output = gr.Dataframe()

	plot_button = gr.Button("Show Cluster Visualization")
	plot_button.click(fn=plot_cluster_visualization, inputs=[plot_type, cluster_choice], outputs=plot_output)

	cluster_table_btn = gr.Button("Show Cluster Commodities")
	cluster_table_btn.click(fn=show_cluster_commodities, inputs=[cluster_choice, top_n], outputs=table_output)

	anomaly_btn = gr.Button("Show Top Anomalies")
	anomaly_btn.click(fn=show_anomalies, inputs=[top_n], outputs=table_output)

	demo.launch()