File size: 4,437 Bytes
2e96626 b7517a3 2e96626 6f850cd b7517a3 322852a 2e96626 b7517a3 9458d26 b7517a3 bd1a142 322852a b7517a3 6f850cd b7517a3 322852a b7517a3 2e96626 b7517a3 6f850cd b7517a3 2e96626 b7517a3 2e96626 b7517a3 2e96626 b7517a3 2e96626 b7517a3 2e96626 b7517a3 2e96626 b7517a3 2e96626 bd1a142 6f850cd 2e96626 6f850cd 2e96626 6646cc2 2e96626 6f850cd 2e96626 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import pandas as pd
import numpy as np
from momentfm import MOMENTPipeline
from io import StringIO
# Initialize model globally
model = MOMENTPipeline.from_pretrained(
"AutonLab/MOMENT-1-large",
model_kwargs={"task_name": "reconstruction"},
)
model.init()
def generate_analysis_report(data_input, sensitivity=3.0):
"""Generate a comprehensive textual analysis report"""
try:
# Process and validate data
df = pd.read_csv(StringIO(data_input))
# Validate columns
if 'timestamp' not in df.columns or 'value' not in df.columns:
return "Error: CSV must contain 'timestamp' and 'value' columns"
# Convert data types
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df['value'] = pd.to_numeric(df['value'], errors='coerce')
# Check for invalid data
if df.isnull().any().any():
return "Error: Invalid data format (check timestamp/value formats)"
df = df.sort_values('timestamp')
# Prepare data for model
values = df['value'].values.astype(np.float32).reshape(1, -1, 1)
# Get reconstruction
reconstructed = model.reconstruct(values)
errors = np.abs(df['value'].values - reconstructed[0,:,0])
# Calculate threshold (modified z-score)
median = np.median(errors)
mad = np.median(np.abs(errors - median))
threshold = median + sensitivity * (1.4826 * mad)
# Identify anomalies
anomalies = df[errors > threshold].copy()
anomalies['anomaly_score'] = errors[errors > threshold]
anomalies = anomalies.sort_values('anomaly_score', ascending=False)
normal_points = df[errors <= threshold]
# Generate report
report = f"""
EQUIPMENT ANALYSIS REPORT
========================
Generated at: {pd.Timestamp.now()}
Detection sensitivity: {sensitivity} (z-score)
DATA OVERVIEW
-------------
Time period: {df['timestamp'].min()} to {df['timestamp'].max()}
Total observations: {len(df)}
Value range: {df['value'].min():.2f} to {df['value'].max():.2f}
Median value: {df['value'].median():.2f}
Mean value: {df['value'].mean():.2f}
ANOMALY DETECTION RESULTS
-------------------------
Detection threshold: {threshold:.2f}
Anomalies detected: {len(anomalies)} ({len(anomalies)/len(df):.1%} of data)
Strongest anomaly: {errors.max():.2f} at {df.loc[errors.argmax(), 'timestamp']}
TOP ANOMALIES
-------------
{anomalies[['timestamp', 'value', 'anomaly_score']].head(15).to_string(index=False, float_format='%.2f')}
NORMAL OPERATION SUMMARY
------------------------
Typical value range: {normal_points['value'].min():.2f} to {normal_points['value'].max():.2f}
Stable period duration: {pd.Timedelta(normal_points['timestamp'].max() - normal_points['timestamp'].min())}
RECOMMENDATIONS
---------------
1. Investigate top {min(3, len(anomalies))} anomalous readings
2. Check equipment around {anomalies['timestamp'].iloc[0]} for potential issues
3. Consider recalibration if anomalies cluster in specific time periods
4. Review maintenance logs around detected anomalies
"""
return report.strip()
except Exception as e:
return f"ANALYSIS ERROR: {str(e)}"
# Gradio Interface for the report-only version
import gradio as gr
with gr.Blocks() as demo:
gr.Markdown("## π Equipment Analysis Report Generator")
with gr.Row():
with gr.Column():
data_input = gr.Textbox(label="Paste CSV Data", lines=10, value="""timestamp,value
2025-04-01 00:00:00,100
2025-04-01 01:00:00,102
2025-04-01 02:00:00,98
2025-04-01 03:00:00,105
2025-04-01 04:00:00,103
2025-04-01 05:00:00,107
2025-04-01 06:00:00,200
2025-04-01 07:00:00,108
2025-04-01 08:00:00,110
2025-04-01 09:00:00,98
2025-04-01 10:00:00,99
2025-04-01 11:00:00,102
2025-04-01 12:00:00,101""")
sensitivity = gr.Slider(1.0, 5.0, value=3.0, label="Detection Sensitivity")
submit_btn = gr.Button("Generate Report", variant="primary")
with gr.Column():
report_output = gr.Textbox(label="Analysis Report", lines=20, interactive=False)
submit_btn.click(
generate_analysis_report,
inputs=[data_input, sensitivity],
outputs=report_output
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|