zainulabedin949 commited on
Commit
322852a
·
verified ·
1 Parent(s): 57d61ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -55
app.py CHANGED
@@ -25,28 +25,22 @@ except Exception as e:
25
  def validate_data(data_input):
26
  """Validate and process input data"""
27
  try:
28
- if isinstance(data_input, str):
29
- df = pd.read_csv(StringIO(data_input))
30
- else:
31
- raise ValueError("Input must be CSV text")
32
 
33
  # Validate columns
34
  if not all(col in df.columns for col in ['timestamp', 'value']):
35
- raise ValueError("CSV must contain 'timestamp' and 'value' columns")
36
 
37
- # Convert timestamps
38
- df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
39
- if df['timestamp'].isnull().any():
40
- raise ValueError("Invalid timestamp format")
41
-
42
- # Convert values to numeric
43
- df['value'] = pd.to_numeric(df['value'], errors='raise')
44
 
45
- return df.sort_values('timestamp')
46
 
47
  except Exception as e:
48
  logger.error(f"Data validation error: {str(e)}")
49
- raise
50
 
51
  def detect_anomalies(data_input, sensitivity=3.0):
52
  """Perform reconstruction-based anomaly detection"""
@@ -54,17 +48,19 @@ def detect_anomalies(data_input, sensitivity=3.0):
54
  df = validate_data(data_input)
55
  values = df['value'].values.astype(np.float32)
56
 
57
- # Reshape to 3D format (batch, sequence, features)
58
  values_3d = values.reshape(1, -1, 1)
59
 
60
- # Get reconstruction - using explicit parameter name
61
- reconstructed = model.reconstruct(X=values_3d)
62
-
63
- # Calculate reconstruction error (MAE)
64
  errors = np.abs(values - reconstructed[0,:,0])
65
 
66
- # Dynamic threshold (z-score based)
67
- threshold = np.mean(errors) + sensitivity * np.std(errors)
 
 
 
 
68
  df['anomaly_score'] = errors
69
  df['is_anomaly'] = errors > threshold
70
 
@@ -74,38 +70,36 @@ def detect_anomalies(data_input, sensitivity=3.0):
74
  ax.scatter(
75
  df.loc[df['is_anomaly'], 'timestamp'],
76
  df.loc[df['is_anomaly'], 'value'],
77
- color='red', s=100, label=f'Anomaly (>{threshold:.2f})'
78
  )
79
- ax.set_title('Sensor Data Anomaly Detection')
80
  ax.set_xlabel('Timestamp')
81
  ax.set_ylabel('Value')
82
  ax.legend()
83
  ax.grid(True)
84
  plt.tight_layout()
85
 
86
- # Limit DataFrame display size
87
- display_df = df[['timestamp', 'value', 'anomaly_score', 'is_anomaly']].head(20)
 
 
 
 
 
 
 
88
 
89
- return (
90
- fig,
91
- {"statistics": {
92
- "data_points": len(df),
93
- "anomalies_detected": int(df['is_anomaly'].sum()),
94
- "detection_threshold": float(threshold),
95
- "max_anomaly_score": float(np.max(errors))
96
- }},
97
- display_df.to_dict('records')
98
- )
99
 
100
  except Exception as e:
101
- logger.error(f"Detection error: {str(e)}")
102
- return (
103
- None,
104
- {"error": str(e)},
105
- None
106
- )
107
 
108
- # Default data with clear anomaly
109
  DEFAULT_DATA = """timestamp,value
110
  2025-04-01 00:00:00,100
111
  2025-04-01 01:00:00,102
@@ -121,36 +115,36 @@ DEFAULT_DATA = """timestamp,value
121
  2025-04-01 11:00:00,102
122
  2025-04-01 12:00:00,101"""
123
 
124
- # Gradio Interface
125
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
126
  gr.Markdown("""
127
- # 📈 Equipment Anomaly Detection
128
- Detect unusual patterns in sensor data using MOMENT-1-large model
129
  """)
130
 
131
  with gr.Row():
132
  with gr.Column():
133
- data_input = gr.Textbox(
134
- label="Paste CSV Data",
135
  value=DEFAULT_DATA,
136
  lines=10,
137
- placeholder="timestamp,value\n2025-01-01, 100\n2025-01-02, 105..."
138
  )
139
  sensitivity = gr.Slider(
140
  1.0, 5.0, value=3.0, step=0.1,
141
- label="Detection Sensitivity (z-score)"
142
  )
143
- analyze_btn = gr.Button("Analyze", variant="primary")
144
 
145
  with gr.Column():
146
- plot = gr.Plot(label="Results")
147
- stats = gr.JSON(label="Detection Statistics")
148
- results = gr.JSON(label="Top 20 Records")
149
 
150
  analyze_btn.click(
151
  detect_anomalies,
152
- inputs=[data_input, sensitivity],
153
- outputs=[plot, stats, results]
154
  )
155
 
156
  if __name__ == "__main__":
 
25
  def validate_data(data_input):
26
  """Validate and process input data"""
27
  try:
28
+ df = pd.read_csv(StringIO(data_input))
 
 
 
29
 
30
  # Validate columns
31
  if not all(col in df.columns for col in ['timestamp', 'value']):
32
+ raise ValueError("CSV must contain timestamp and value columns")
33
 
34
+ # Convert and validate data
35
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
36
+ df['value'] = pd.to_numeric(df['value'])
37
+ df = df.sort_values('timestamp').reset_index(drop=True)
 
 
 
38
 
39
+ return df
40
 
41
  except Exception as e:
42
  logger.error(f"Data validation error: {str(e)}")
43
+ raise ValueError(f"Invalid data format: {str(e)}")
44
 
45
  def detect_anomalies(data_input, sensitivity=3.0):
46
  """Perform reconstruction-based anomaly detection"""
 
48
  df = validate_data(data_input)
49
  values = df['value'].values.astype(np.float32)
50
 
51
+ # Reshape to 3D format expected by MOMENT
52
  values_3d = values.reshape(1, -1, 1)
53
 
54
+ # Get reconstruction
55
+ reconstructed = model.reconstruct(values_3d)
 
 
56
  errors = np.abs(values - reconstructed[0,:,0])
57
 
58
+ # Dynamic threshold (modified z-score)
59
+ median = np.median(errors)
60
+ mad = np.median(np.abs(errors - median))
61
+ threshold = median + sensitivity * (1.4826 * mad)
62
+
63
+ # Store results
64
  df['anomaly_score'] = errors
65
  df['is_anomaly'] = errors > threshold
66
 
 
70
  ax.scatter(
71
  df.loc[df['is_anomaly'], 'timestamp'],
72
  df.loc[df['is_anomaly'], 'value'],
73
+ color='red', s=100, label=f'Anomaly (score > {threshold:.2f})'
74
  )
75
+ ax.set_title('Sensor Data with Anomalies Detected')
76
  ax.set_xlabel('Timestamp')
77
  ax.set_ylabel('Value')
78
  ax.legend()
79
  ax.grid(True)
80
  plt.tight_layout()
81
 
82
+ # Prepare statistics
83
+ stats = {
84
+ "data_points": len(df),
85
+ "anomalies_detected": int(df['is_anomaly'].sum()),
86
+ "detection_threshold": float(threshold),
87
+ "max_anomaly_score": float(np.max(errors)),
88
+ "median_value": float(median),
89
+ "mean_value": float(np.mean(values))
90
+ }
91
 
92
+ # Prepare sample records (first 20)
93
+ sample_records = df.head(20).to_dict('records')
94
+
95
+ return fig, stats, sample_records
 
 
 
 
 
 
96
 
97
  except Exception as e:
98
+ error_msg = str(e)
99
+ logger.error(f"Detection error: {error_msg}")
100
+ return None, {"error": error_msg}, None
 
 
 
101
 
102
+ # Default sample data
103
  DEFAULT_DATA = """timestamp,value
104
  2025-04-01 00:00:00,100
105
  2025-04-01 01:00:00,102
 
115
  2025-04-01 11:00:00,102
116
  2025-04-01 12:00:00,101"""
117
 
118
+ # Create Gradio interface
119
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
  gr.Markdown("""
121
+ # 🏭 Equipment Anomaly Detection
122
+ ### Using MOMENT-1-large foundation model
123
  """)
124
 
125
  with gr.Row():
126
  with gr.Column():
127
+ input_data = gr.Textbox(
128
+ label="Enter CSV Data",
129
  value=DEFAULT_DATA,
130
  lines=10,
131
+ placeholder="timestamp,value\n2025-01-01 00:00:00,100\n..."
132
  )
133
  sensitivity = gr.Slider(
134
  1.0, 5.0, value=3.0, step=0.1,
135
+ label="Detection Sensitivity"
136
  )
137
+ analyze_btn = gr.Button("Detect Anomalies!", variant="primary")
138
 
139
  with gr.Column():
140
+ plot_output = gr.Plot(label="Detection Results")
141
+ stats_output = gr.JSON(label="Statistics Summary")
142
+ records_output = gr.JSON(label="Sample Records (First 20)")
143
 
144
  analyze_btn.click(
145
  detect_anomalies,
146
+ inputs=[input_data, sensitivity],
147
+ outputs=[plot_output, stats_output, records_output]
148
  )
149
 
150
  if __name__ == "__main__":