Spaces:

Agents-MCP-Hackathon
/

Statistical-Analysis-MCP

Running

App Files Files Community

Statistical-Analysis-MCP / app.py

JG1310

Update app.py

e09dd64 verified 14 days ago

raw

history blame contribute delete

127 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	from scipy import stats
	from typing import List, Dict, Any, Optional, Union

	def parse_numeric_input(data: str) -> List[float]:
	"""
	Parse comma-separated string of numbers into a list of floats.

	Args:
	data (str): Comma-separated string of numbers (e.g., "1.2,2.3,3.4,2.1")

	Returns:
	List[float]: Parsed numeric data

	Raises:
	ValueError: If data cannot be parsed as numeric values

	Example:
	>>> parse_numeric_input("85.2,90.1,78.5,92.3")
	[85.2, 90.1, 78.5, 92.3]
	"""
	try:
	parsed = [float(x.strip()) for x in data.split(',') if x.strip()]
	if not parsed:
	raise ValueError("No valid numbers found in input string")
	return parsed
	except ValueError as e:
	if "could not convert" in str(e):
	raise ValueError(f"Cannot parse '{data}' as comma-separated numbers")
	raise e

	def welch_t_test(
	dataframe: Optional[pd.DataFrame] = None,
	group1_str: Optional[str] = None,
	group2_str: Optional[str] = None,
	alternative: str = "two-sided",
	alpha: float = 0.05,
	effect_thresholds: str = "0.2,0.5,0.8"
	) -> Dict[str, Any]:
	"""
	Accepts two groups of numeric data as comma-separated strings or DataFrame columns and performs Welch's t-test. This test determines whether two independent groups have significantly different means.
	This test is valid even when populations have different variances. Default to this test instead of students t-test if you are unsure about population variance.
	This test calculates a t-statistic using Welch's formula that accounts for unequal variances. Given an alternative hypothesis (group1 ≠ group2, group1 < group2, or group1 > group2),
	it outputs the p-value: the probability of observing this result (or more extreme) if no true difference exists. Results are considered statistically significant
	when p-value < alpha (typically 0.05). Cohen's d measures practical effect size, calculated using pooled standard deviation for consistency with other t-tests, with interpretation:
	\|d\| < 0.2 = negligible, 0.2-0.5 = small, 0.5-0.8 = medium, >0.8 = large (custom thresholds may be used).
	EXAMPLE USE CASES: treatment vs control groups, before/after measurements with different participants,
	comparing performance between demographic groups.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
	If provided, group1_str and group2_str will be ignored.
	group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
	Example: "12.1,15.3,18.7,14.2,16.8" (reaction times for Group A)
	Only used if dataframe is None or empty.
	group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
	Example: "22.4,19.8,25.1,21.3" (reaction times for Group B)
	Only used if dataframe is None or empty.
	alternative (str): Direction of the alternative hypothesis:
	- "two-sided": group1 mean ≠ group2 mean (different in either direction)
	- "less": group1 mean < group2 mean (group1 is smaller)
	- "greater": group1 mean > group2 mean (group1 is larger)
	alpha (float): Significance level for the test (probability of Type I error).
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "Welch's t-test (unequal variances)"
	- t_statistic (float): The calculated t-value using Welch's formula
	- p_value (float): Probability of observing this result if null hypothesis is true
	- degrees_of_freedom (float): Welch's adjusted df (usually non-integer), accounts for unequal variances
	- cohens_d (float): Standardized effect size. Positive means group1 > group2, negative means group1 < group2
	- pooled_std (float): Pooled standard deviation used in effect size calculation
	- group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
	- group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of Cohen's d magnitude
	- alternative_hypothesis (str): Echo of alternative parameter
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first two columns automatically
	if len(dataframe.columns) < 2:
	return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}

	# Extract and validate data from first two columns
	try:
	# Convert to numeric, coercing errors to NaN
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
	col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')

	# Remove NaN values and convert to list
	group1 = col1_numeric.dropna().tolist()
	group2 = col2_numeric.dropna().tolist()

	# Check if we lost too much data due to non-numeric values
	original_count1 = len(dataframe.iloc[:, 0].dropna())
	original_count2 = len(dataframe.iloc[:, 1].dropna())

	if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}

	if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}

	input_method = "dataframe"

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
	try:
	group1 = parse_numeric_input(group1_str)
	group2 = parse_numeric_input(group2_str)
	input_method = "strings"
	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}

	# Validate extracted data
	if len(group1) < 2:
	return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}

	if len(group2) < 2:
	return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}

	# Perform Welch's t-test analysis
	# Convert to numpy arrays for calculations
	data1 = np.array(group1)
	data2 = np.array(group2)

	# Perform Welch's t-test (unequal variances)
	t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=False, alternative=alternative)

	# Calculate descriptive statistics
	desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
	desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}

	# Welch's degrees of freedom formula
	s1_sq, s2_sq = desc1["std"]2, desc2["std"]2
	n1, n2 = desc1["n"], desc2["n"]
	df = (s1_sq/n1 + s2_sq/n2)2 / ((s1_sq/n1)2/(n1-1) + (s2_sq/n2)**2/(n2-1))

	# Effect size (Cohen's d using pooled standard deviation for consistency)
	# For Welch's test, we still typically use pooled SD for Cohen's d calculation
	pooled_std = np.sqrt(((len(data1)-1)desc1["std"]2 + (len(data2)-1)desc2["std"]**2) / (len(data1) + len(data2) - 2))
	cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std

	# Interpretation using Cohen's canonical benchmarks
	significant = p_value < alpha
	abs_d = abs(cohens_d)
	small_threshold, medium_threshold, large_threshold = thresholds
	if abs_d < small_threshold:
	effect_size_interp = "negligible"
	elif abs_d < medium_threshold:
	effect_size_interp = "small"
	elif abs_d < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "Welch's t-test",
	"t_statistic": t_stat,
	"p_value": p_value,
	"degrees_of_freedom": df,
	"cohens_d": cohens_d,
	"pooled_std": pooled_std,
	"group1_stats": desc1,
	"group2_stats": desc2,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alternative_hypothesis": alternative,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in Welch's t-test: {str(e)}"}

	def student_t_test(
	dataframe: Optional[pd.DataFrame] = None,
	group1_str: Optional[str] = None,
	group2_str: Optional[str] = None,
	alternative: str = "two-sided",
	alpha: float = 0.05,
	effect_thresholds: str = "0.2,0.5,0.8"
	) -> Dict[str, Any]:
	"""
	Accepts two groups of numeric data as comma-separated strings or DataFrame columns and performs Student's t-test.
	This test determines whether two independent groups have significantly different means, assuming populations from which the groups were sampled have equal
	variances (if this assumption is violated, or if equal population variance cannot be verified, use Welch's t-test instead). The test calculates a t-statistic quantifying the mean
	difference as a multiple of pooled standard deviation. Given an alternative hypothesis (group1 ≠ group2, group1 < group2, or group1 > group2),
	it outputs the p-value: the probability of observing this result (or more extreme) if no true difference exists. Results are statistically significant
	when p-value < alpha (typically 0.05). Cohen's d measures practical effect size, standardized by pooled standard deviation, with interpretation:
	\|d\| < 0.2 = negligible, 0.2-0.5 = small, 0.5-0.8 = medium, >0.8 = large (custom thresholds may be used).
	EXAMPLE USE CASES: treatment vs control groups, before/after measurements with different participants,
	comparing performance between demographic groups.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
	If provided, group1_str and group2_str will be ignored.
	group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
	Example: "85.2,90.1,78.5,92.3" (test scores for Group A)
	Only used if dataframe is None or empty.
	group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
	Example: "88.1,85.7,91.2,87.4" (test scores for Group B)
	Only used if dataframe is None or empty.
	alternative (str): Direction of the alternative hypothesis:
	- "two-sided": group1 mean ≠ group2 mean (different in either direction)
	- "less": group1 mean < group2 mean (group1 is smaller)
	- "greater": group1 mean > group2 mean (group1 is larger)
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
	These are Cohen's canonical benchmarks for effect size interpretation.

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "Student's t-test"
	- t_statistic (float): The calculated t-value, which measures how many standard errors the difference
	between group means is away from zero (assuming the null hypothesis is true).
	Larger absolute values indicate the observed difference is less likely under the null hypothesis.
	- p_value (float): Probability of observing this result (or more extreme) if null hypothesis is true.
	Values < alpha indicate statistical significance.
	- degrees_of_freedom (int): df = n1 + n2 - 2, degrees of freedom for the pooled variance estimate, used for determining critical t-values.
	- cohens_d (float): Effect size measure. Positive means group1 > group2, negative means group1 < group2.
	Interpreted using Cohen's canonical benchmarks: negligible (<0.2), small (0.2), medium (0.5), large (0.8).
	- pooled_std (float): Combined standard deviation used in Cohen's d calculation.
	- group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
	- group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
	- significant (bool): True if p_value < alpha, False otherwise
	- effect_size (str): Categorical interpretation ("negligible", "small", "medium", "large") based on \|cohens_d\| and effect_thresholds
	- alternative_hypothesis (str): Echo of the alternative parameter used
	- alpha (float): Echo of the significance level used
	- effect_thresholds (List[float]): Echo of the thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first two columns automatically
	if len(dataframe.columns) < 2:
	return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}

	# Extract and validate data from first two columns
	try:
	# Convert to numeric, coercing errors to NaN
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
	col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')

	# Remove NaN values and convert to list
	group1 = col1_numeric.dropna().tolist()
	group2 = col2_numeric.dropna().tolist()

	# Check if we lost too much data due to non-numeric values
	original_count1 = len(dataframe.iloc[:, 0].dropna())
	original_count2 = len(dataframe.iloc[:, 1].dropna())

	if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}

	if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}

	input_method = "dataframe"

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
	try:
	group1 = parse_numeric_input(group1_str)
	group2 = parse_numeric_input(group2_str)
	input_method = "strings"
	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}

	# Validate extracted data
	if len(group1) < 2:
	return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}

	if len(group2) < 2:
	return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}

	# Perform Student's t-test analysis directly
	# Convert to numpy arrays for calculations
	data1 = np.array(group1)
	data2 = np.array(group2)

	# Perform Student's t-test (equal variances)
	t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=True, alternative=alternative)

	# Calculate descriptive statistics
	desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
	desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}

	# Degrees of freedom (pooled)
	df = len(data1) + len(data2) - 2

	# Effect size (Cohen's d using pooled standard deviation)
	pooled_std = np.sqrt(((len(data1)-1)desc1["std"]2 + (len(data2)-1)desc2["std"]**2) / df)
	cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std

	# Interpretation using Cohen's canonical benchmarks
	significant = p_value < alpha
	abs_d = abs(cohens_d)
	small_threshold, medium_threshold, large_threshold = thresholds
	if abs_d < small_threshold:
	effect_size_interp = "negligible"
	elif abs_d < medium_threshold:
	effect_size_interp = "small"
	elif abs_d < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "Student's t-test",
	"t_statistic": t_stat,
	"p_value": p_value,
	"degrees_of_freedom": df,
	"cohens_d": cohens_d,
	"pooled_std": pooled_std,
	"group1_stats": desc1,
	"group2_stats": desc2,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alternative_hypothesis": alternative,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in flexible t-test: {str(e)}"}

	def paired_t_test(
	dataframe: Optional[pd.DataFrame] = None,
	group1_str: Optional[str] = None,
	group2_str: Optional[str] = None,
	alternative: str = "two-sided",
	alpha: float = 0.05,
	effect_thresholds: str = "0.2,0.5,0.8"
	) -> Dict[str, Any]:
	"""
	Accepts two groups of paired numeric data as comma-separated strings or DataFrame columns and performs a paired samples t-test.
	This test determines whether there is a significant difference between two related measurements (same subjects measured twice),
	such as before/after treatment measurements. Unlike independent samples t-tests, this test accounts for the correlation between
	paired observations, making it more powerful for detecting differences in repeated measures designs. The test calculates a t-statistic
	based on the mean of the differences between paired observations. Given an alternative hypothesis (group1 ≠ group2, group1 < group2,
	or group1 > group2), it outputs the p-value: the probability of observing this result (or more extreme) if no true difference exists.
	Results are statistically significant when p-value < alpha (typically 0.05). Cohen's d measures practical effect size, calculated
	as the mean difference divided by the standard deviation of differences, with interpretation: \|d\| < 0.2 = negligible, 0.2-0.5 = small,
	0.5-0.8 = medium, >0.8 = large (custom thresholds may be used).
	EXAMPLE USE CASES: before/after treatment measurements on same subjects, pre/post test scores, repeated measurements over time.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing paired data in first two columns.
	If provided, group1_str and group2_str will be ignored.
	group1_str (Optional[str]): Comma-separated string of numeric values for the first measurement.
	Example: "85.2,90.1,78.5,92.3" (pre-test scores)
	Only used if dataframe is None or empty.
	group2_str (Optional[str]): Comma-separated string of numeric values for the second measurement.
	Example: "88.1,95.7,82.2,94.4" (post-test scores)
	Only used if dataframe is None or empty.
	alternative (str): Direction of the alternative hypothesis:
	- "two-sided": group1 mean ≠ group2 mean (different in either direction)
	- "less": group1 mean < group2 mean (group1 is smaller)
	- "greater": group1 mean > group2 mean (group1 is larger)
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "Paired samples t-test"
	- t_statistic (float): The calculated t-value based on mean difference and standard error of differences
	- p_value (float): Probability of observing this result if null hypothesis is true
	- degrees_of_freedom (int): df = n - 1, where n is the number of paired observations
	- cohens_d (float): Effect size measure. Positive means group2 > group1, negative means group1 > group2
	- pooled_std (float): Standard deviation of the differences (used in Cohen's d calculation)
	- group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
	- group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of Cohen's d magnitude
	- alternative_hypothesis (str): Echo of alternative parameter
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first two columns automatically
	if len(dataframe.columns) < 2:
	return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}

	# Extract and validate data from first two columns
	try:
	# Convert to numeric, coercing errors to NaN
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
	col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')

	# Remove NaN values and convert to list
	group1 = col1_numeric.dropna().tolist()
	group2 = col2_numeric.dropna().tolist()

	# Check if we lost too much data due to non-numeric values
	original_count1 = len(dataframe.iloc[:, 0].dropna())
	original_count2 = len(dataframe.iloc[:, 1].dropna())

	if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}

	if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
	return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}

	input_method = "dataframe"

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
	try:
	group1 = parse_numeric_input(group1_str)
	group2 = parse_numeric_input(group2_str)
	input_method = "strings"
	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}

	# Validate extracted data - paired samples must have equal length
	if len(group1) != len(group2):
	return {"error": f"Paired samples must have equal length. Group1 has {len(group1)} observations, Group2 has {len(group2)} observations."}

	if len(group1) < 2:
	return {"error": f"Need at least 2 paired observations. Found {len(group1)} pairs."}

	# Perform paired samples t-test
	# Convert to numpy arrays for calculations
	data1 = np.array(group1)
	data2 = np.array(group2)

	# Perform paired t-test
	t_stat, p_value = stats.ttest_rel(data1, data2, alternative=alternative)

	# Calculate descriptive statistics
	desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
	desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}

	# Calculate differences and effect size
	differences = data2 - data1
	mean_diff = np.mean(differences)
	std_diff = np.std(differences, ddof=1)

	# Degrees of freedom for paired t-test
	df = len(data1) - 1

	# Effect size (Cohen's d for paired samples: mean difference / std of differences)
	cohens_d = mean_diff / std_diff

	# Interpretation using Cohen's canonical benchmarks
	significant = p_value < alpha
	abs_d = abs(cohens_d)
	small_threshold, medium_threshold, large_threshold = thresholds
	if abs_d < small_threshold:
	effect_size_interp = "negligible"
	elif abs_d < medium_threshold:
	effect_size_interp = "small"
	elif abs_d < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "Paired samples t-test",
	"t_statistic": t_stat,
	"p_value": p_value,
	"degrees_of_freedom": df,
	"cohens_d": cohens_d,
	"pooled_std": std_diff, # For paired t-test, this is std of differences
	"group1_stats": desc1,
	"group2_stats": desc2,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alternative_hypothesis": alternative,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in paired t-test: {str(e)}"}

	def one_sample_t_test(
	dataframe: Optional[pd.DataFrame] = None,
	group_str: Optional[str] = None,
	population_mean: float = 0.0,
	alternative: str = "two-sided",
	alpha: float = 0.05,
	effect_thresholds: str = "0.2,0.5,0.8"
	) -> Dict[str, Any]:
	"""
	Accepts a single group of numeric data as comma-separated string or DataFrame column and performs a one-sample t-test
	against a known or hypothesized population mean. This test determines whether the sample mean differs significantly
	from the specified population mean. The test calculates a t-statistic quantifying how many standard errors the sample
	mean is away from the hypothesized population mean. Given an alternative hypothesis (sample ≠ population, sample < population,
	or sample > population), it outputs the p-value: the probability of observing this result (or more extreme) if the true
	population mean equals the hypothesized value. Results are statistically significant when p-value < alpha (typically 0.05).
	Cohen's d measures practical effect size, calculated as the difference between sample and population means divided by the
	sample standard deviation, with interpretation: \|d\| < 0.2 = negligible, 0.2-0.5 = small, 0.5-0.8 = medium, >0.8 = large
	(custom thresholds may be used).
	EXAMPLE USE CASES: testing if sample mean differs from known standard, quality control against specification,
	comparing sample performance against established benchmark.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing sample data in first column.
	If provided, group_str will be ignored.
	group_str (Optional[str]): Comma-separated string of numeric values for the sample.
	Example: "85.2,90.1,78.5,92.3" (test scores)
	Only used if dataframe is None or empty.
	population_mean (float): Hypothesized or known population mean to test against.
	alternative (str): Direction of the alternative hypothesis:
	- "two-sided": sample mean ≠ population mean (different in either direction)
	- "less": sample mean < population mean (sample is smaller)
	- "greater": sample mean > population mean (sample is larger)
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "One-sample t-test"
	- t_statistic (float): The calculated t-value measuring sample mean deviation from population mean
	- p_value (float): Probability of observing this result if null hypothesis is true
	- degrees_of_freedom (int): df = n - 1, where n is the sample size
	- cohens_d (float): Effect size measure. Positive means sample > population, negative means sample < population
	- pooled_std (float): Sample standard deviation (used in Cohen's d calculation)
	- group_stats (dict): Descriptive statistics for the sample (mean, std, n)
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of Cohen's d magnitude
	- alternative_hypothesis (str): Echo of alternative parameter
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first column only
	if len(dataframe.columns) < 1:
	return {"error": f"DataFrame must have at least 1 column. Found {len(dataframe.columns)} columns."}

	# Extract and validate data from first column
	try:
	# Convert to numeric, coercing errors to NaN
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')

	# Remove NaN values and convert to list
	group = col1_numeric.dropna().tolist()

	# Check if we lost too much data due to non-numeric values
	original_count = len(dataframe.iloc[:, 0].dropna())

	if len(group) < original_count * 0.5: # Lost more than 50% of data
	return {"error": f"Column 1 contains too many non-numeric values. Only {len(group)} out of {original_count} values could be converted to numbers."}

	except Exception as e:
	return {"error": f"Error processing DataFrame column: {str(e)}. Ensure column contains numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif group_str and group_str.strip():
	try:
	group = parse_numeric_input(group_str)
	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR a comma-separated string for the sample. Do not leave input empty."}

	# Validate extracted data
	if len(group) < 2:
	return {"error": f"Sample must have at least 2 observations. Found {len(group)} values."}

	# Perform one-sample t-test
	# Convert to numpy array for calculations
	data = np.array(group)

	# Perform one-sample t-test
	t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)

	# Calculate descriptive statistics
	group_stats = {"mean": np.mean(data), "std": np.std(data, ddof=1), "n": len(data)}

	# Degrees of freedom
	df = len(data) - 1

	# Effect size (Cohen's d for one-sample: (sample_mean - population_mean) / sample_std)
	sample_std = group_stats["std"]
	cohens_d = (group_stats["mean"] - population_mean) / sample_std

	# Interpretation using Cohen's canonical benchmarks
	significant = p_value < alpha
	abs_d = abs(cohens_d)
	small_threshold, medium_threshold, large_threshold = thresholds
	if abs_d < small_threshold:
	effect_size_interp = "negligible"
	elif abs_d < medium_threshold:
	effect_size_interp = "small"
	elif abs_d < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "One-sample t-test",
	"t_statistic": t_stat,
	"p_value": p_value,
	"degrees_of_freedom": df,
	"cohens_d": cohens_d,
	"pooled_std": sample_std,
	"group_stats": group_stats,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alternative_hypothesis": alternative,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in one-sample t-test: {str(e)}"}


	def one_way_anova(
	dataframe: Optional[pd.DataFrame] = None,
	groups_str: Optional[str] = None,
	alpha: float = 0.05,
	effect_thresholds: str = "0.01,0.06,0.14"
	) -> Dict[str, Any]:
	"""
	Accepts multiple groups of numeric data as semicolon-separated groups or DataFrame columns and performs a one-way ANOVA
	(Analysis of Variance). This test determines whether there are statistically significant differences between the means
	of three or more independent groups. ANOVA tests the null hypothesis that all group means are equal against the alternative
	that at least one group mean differs from the others. The test calculates an F-statistic by comparing the variance between
	groups to the variance within groups. A significant result (p-value < alpha) indicates that at least one group differs,
	but does not identify which specific groups differ (post-hoc tests needed for pairwise comparisons). Eta-squared (η²)
	measures effect size as the proportion of total variance explained by group membership, with interpretation: η² < 0.01 = negligible,
	0.01-0.06 = small, 0.06-0.14 = medium, >0.14 = large (custom thresholds may be used).
	EXAMPLE USE CASES: comparing means across multiple treatment conditions, testing differences between multiple demographic groups,
	evaluating performance across several experimental conditions.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing group data in columns. All columns will be treated as separate groups.
	If provided, groups_str will be ignored.
	groups_str (Optional[str]): Multiple groups separated by semicolons, with each group containing comma-separated values.
	Example: "85.2,90.1,78.5;88.1,85.7,91.2;82.3,87.4,89.1" (3 groups with their respective values)
	Only used if dataframe is None or empty.
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining eta-squared effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.01,0.06,0.14" means: <0.01=negligible, 0.01-0.06=small, 0.06-0.14=medium, >0.14=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "One-way ANOVA"
	- f_statistic (float): The calculated F-value comparing between-group to within-group variance
	- p_value (float): Probability of observing this result if null hypothesis is true
	- degrees_of_freedom (dict): Contains df_between (groups-1) and df_within (total_n - groups)
	- eta_squared (float): Effect size measure (proportion of variance explained by groups)
	- group_stats (List[dict]): Descriptive statistics for each group (mean, std, n)
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of eta-squared magnitude
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.01,0.06,0.14')"}

	groups = []

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use all columns as separate groups
	if len(dataframe.columns) < 2:
	return {"error": f"DataFrame must have at least 2 columns for ANOVA. Found {len(dataframe.columns)} columns."}

	# Extract and validate data from all columns
	try:
	for col_idx, col in enumerate(dataframe.columns):
	col_numeric = pd.to_numeric(dataframe.iloc[:, col_idx], errors='coerce')
	group_data = col_numeric.dropna().tolist()

	# Check if we have enough data
	original_count = len(dataframe.iloc[:, col_idx].dropna())
	if len(group_data) < original_count * 0.5: # Lost more than 50% of data
	return {"error": f"Column {col_idx+1} contains too many non-numeric values. Only {len(group_data)} out of {original_count} values could be converted to numbers."}

	if len(group_data) < 2:
	return {"error": f"Column {col_idx+1} must have at least 2 observations. Found {len(group_data)} values."}

	groups.append(group_data)

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif groups_str and groups_str.strip():
	try:
	# Split by semicolon to separate groups
	group_strings = [group.strip() for group in groups_str.split(';') if group.strip()]

	if len(group_strings) < 2:
	return {"error": "ANOVA requires at least 2 groups. Please provide groups separated by semicolons (;)."}

	for i, group_str in enumerate(group_strings):
	try:
	group_data = parse_numeric_input(group_str)
	if len(group_data) < 2:
	return {"error": f"Group {i+1} must have at least 2 observations. Found {len(group_data)} values."}
	groups.append(group_data)
	except ValueError as e:
	return {"error": f"String parsing error for group {i+1}: {str(e)}"}

	except Exception as e:
	return {"error": f"Error parsing groups string: {str(e)}. Use format 'group1_values;group2_values;group3_values' where each group contains comma-separated numbers."}

	else:
	return {"error": "Please provide either a DataFrame with data OR a semicolon-separated string of groups. Do not leave input empty."}

	# Validate we have enough groups
	if len(groups) < 2:
	return {"error": "ANOVA requires at least 2 groups. Please provide data for at least 2 groups."}

	# Perform one-way ANOVA
	# Convert to numpy arrays for calculations
	numpy_groups = [np.array(group) for group in groups]

	# Perform ANOVA
	f_stat, p_value = stats.f_oneway(*numpy_groups)

	# Calculate descriptive statistics for each group
	group_stats = []
	all_data = []
	for i, group in enumerate(numpy_groups):
	group_stats.append({
	"group": i+1,
	"mean": np.mean(group),
	"std": np.std(group, ddof=1),
	"n": len(group)
	})
	all_data.extend(group)

	# Calculate effect size (eta-squared)
	all_data = np.array(all_data)
	overall_mean = np.mean(all_data)

	# Sum of squares
	ss_total = np.sum((all_data - overall_mean)**2)
	ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in numpy_groups)

	eta_squared = ss_between / ss_total if ss_total > 0 else 0

	# Degrees of freedom
	df_between = len(groups) - 1
	df_within = len(all_data) - len(groups)

	# Interpretation using effect size thresholds
	significant = p_value < alpha
	small_threshold, medium_threshold, large_threshold = thresholds
	if eta_squared < small_threshold:
	effect_size_interp = "negligible"
	elif eta_squared < medium_threshold:
	effect_size_interp = "small"
	elif eta_squared < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "One-way ANOVA",
	"f_statistic": f_stat,
	"p_value": p_value,
	"degrees_of_freedom": {"df_between": df_between, "df_within": df_within},
	"eta_squared": eta_squared,
	"group_stats": group_stats,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in one-way ANOVA: {str(e)}"}

	def multi_way_anova(
	dataframe: Optional[pd.DataFrame] = None,
	dependent_var: Optional[str] = None,
	factors: Optional[str] = None,
	alpha: float = 0.05,
	effect_thresholds: str = "0.01,0.06,0.14",
	include_interactions: bool = True,
	max_interaction_order: Optional[int] = None,
	sum_squares_type: int = 2
	) -> Dict[str, Any]:
	"""
	Accepts multiple categorical factors and performs Multi-Way ANOVA to determine whether there are
	statistically significant differences between group means when multiple factors are involved simultaneously.
	Multi-way ANOVA extends the one-way ANOVA framework to handle complex experimental designs with multiple
	categorical independent variables (factors), each with two or more levels. Unlike one-way ANOVA which tests
	a single factor, multi-way ANOVA can simultaneously test: (1) main effects of each individual factor,
	(2) interaction effects between factors, and (3) higher-order interactions. The test uses F-statistics to
	compare variance between groups to variance within groups for each effect. Eta-squared (η²) measures effect
	size as the proportion of total variance explained by each factor and interaction, with interpretation:
	η² < 0.01 = negligible, 0.01-0.06 = small, 0.06-0.14 = medium, >0.14 = large (custom thresholds may be used).
	EXAMPLE USE CASES: 2-way ANOVA for treatment × gender effects on blood pressure, 3-way ANOVA for teaching
	method × school type × student age on test scores, 4-way ANOVA for drug × dose × gender × age effects on recovery.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing the experimental data with factors as columns
	and the dependent variable. All factors must be categorical.
	If provided, dependent_var and factors parameters are required.
	dependent_var (Optional[str]): Name of the dependent (outcome) variable column in the DataFrame.
	Must be a continuous numeric variable.
	Example: "test_score", "recovery_time", "blood_pressure"
	factors (Optional[str]): Comma-separated string of factor column names from the DataFrame.
	Format: "factor1,factor2,factor3"
	Example: "treatment,gender,age_group" for a 3-way ANOVA
	Each factor must be categorical with 2 or more levels.
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining eta-squared effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.01,0.06,0.14" means: <0.01=negligible, 0.01-0.06=small, 0.06-0.14=medium, >0.14=large
	These follow Cohen's conventions for eta-squared interpretation.
	include_interactions (bool): Whether to include interaction terms in the model.
	True (default): Tests main effects AND interactions
	False: Tests only main effects (additive model)
	max_interaction_order (Optional[int]): Maximum order of interactions to include in the model.
	If None, includes all possible interactions up to the number of factors.
	Example: For 4 factors, setting to 2 includes only 2-way interactions.
	Useful for simplifying complex models with many factors.
	sum_squares_type (int): Type of sum of squares calculation for the ANOVA table.
	Type 1: Sequential (depends on order of factors)
	Type 2: Marginal (recommended for balanced designs, default)
	Type 3: Partial (recommended for unbalanced designs)

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Description of the multi-way ANOVA performed (e.g., "3-way ANOVA with interactions")
	- anova_table (pd.DataFrame): Complete ANOVA table with sum of squares, F-statistics, p-values, etc.
	- significant_effects (List[str]): List of statistically significant main effects and interactions
	- effect_sizes (Dict[str, float]): Eta-squared values for each effect measuring proportion of variance explained
	- effect_interpretations (Dict[str, str]): Categorical interpretation of each effect size ("negligible", "small", "medium", "large")
	- factor_summaries (Dict[str, dict]): Descriptive statistics for each factor level
	- model_summary (dict): Overall model statistics (R², F-statistic, AIC, BIC, etc.)
	- formula_used (str): The statsmodels formula string used for the analysis
	- design_summary (dict): Information about the experimental design (balanced/unbalanced, sample sizes)
	- alpha (float): Echo of significance level used
	- factors_analyzed (List[str]): Echo of factors included in the analysis
	- sum_squares_type (int): Echo of sum of squares type used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.01,0.06,0.14')"}

	# Validate inputs
	if dataframe is None or dataframe.empty:
	return {"error": "DataFrame cannot be None or empty"}

	if not dependent_var:
	return {"error": "Dependent variable name is required"}

	if dependent_var not in dataframe.columns:
	return {"error": f"Dependent variable '{dependent_var}' not found in DataFrame columns"}

	if not factors:
	return {"error": "Factor names are required. Provide as comma-separated string (e.g., 'factor1,factor2,factor3')"}

	# Parse factors
	try:
	factor_list = [f.strip() for f in factors.split(',') if f.strip()]
	if len(factor_list) < 2:
	return {"error": "At least 2 factors are required for multi-way ANOVA"}
	except:
	return {"error": "Invalid factors format. Use comma-separated factor names (e.g., 'treatment,gender,age_group')"}

	# Check factors exist in DataFrame
	missing_factors = [f for f in factor_list if f not in dataframe.columns]
	if missing_factors:
	return {"error": f"Factors not found in DataFrame: {missing_factors}"}

	# Validate sum of squares type
	if sum_squares_type not in [1, 2, 3]:
	return {"error": "sum_squares_type must be 1, 2, or 3"}

	# Clean and prepare the data
	analysis_columns = [dependent_var] + factor_list
	analysis_df = dataframe[analysis_columns].copy()

	# Remove rows with missing values
	initial_rows = len(analysis_df)
	analysis_df = analysis_df.dropna()
	final_rows = len(analysis_df)

	if final_rows < initial_rows * 0.5:
	return {"error": f"Too much missing data: only {final_rows} out of {initial_rows} rows usable"}

	if final_rows < 20:
	return {"error": f"Insufficient data after removing missing values: {final_rows} rows remaining (minimum 20 required)"}

	# Validate dependent variable is numeric
	try:
	analysis_df[dependent_var] = pd.to_numeric(analysis_df[dependent_var])
	except:
	return {"error": f"Dependent variable '{dependent_var}' must be numeric"}

	# Ensure factors are categorical and check levels
	factor_level_counts = {}
	for factor in factor_list:
	analysis_df[factor] = analysis_df[factor].astype('category')
	unique_levels = len(analysis_df[factor].cat.categories)
	factor_level_counts[factor] = unique_levels

	if unique_levels < 2:
	return {"error": f"Factor '{factor}' must have at least 2 levels. Found {unique_levels} level(s)"}

	if unique_levels > 20:
	return {"error": f"Factor '{factor}' has too many levels ({unique_levels}). Consider combining levels or using a different analysis method"}

	# Check for sufficient observations per factor combination
	try:
	cell_counts = analysis_df.groupby(factor_list).size()
	min_cell_size = cell_counts.min()
	empty_cells = (cell_counts == 0).sum()

	if min_cell_size < 2:
	return {"error": f"Some factor combinations have fewer than 2 observations. Minimum cell size: {min_cell_size}"}

	if empty_cells > 0:
	return {"error": f"Missing data: {empty_cells} factor combinations have no observations"}

	except Exception as e:
	return {"error": f"Error checking experimental design: {str(e)}"}

	# Build formula components
	formula_terms = []

	# Add main effects (always included)
	for factor in factor_list:
	formula_terms.append(f"C({factor})")

	# Add interaction terms if requested
	if include_interactions and len(factor_list) > 1:
	max_order = max_interaction_order if max_interaction_order is not None else len(factor_list)
	max_order = min(max_order, len(factor_list)) # Don't exceed number of factors

	# Generate all interaction combinations
	for order in range(2, max_order + 1):
	for combination in itertools.combinations(factor_list, order):
	interaction_term = ":".join([f"C({factor})" for factor in combination])
	formula_terms.append(interaction_term)

	# Build the complete formula
	formula = f"{dependent_var} ~ " + " + ".join(formula_terms)

	# Fit the model
	try:
	model = ols(formula, data=analysis_df).fit()
	except Exception as e:
	return {"error": f"Model fitting failed: {str(e)}. This may indicate perfect multicollinearity or insufficient data variation"}

	# Generate ANOVA table
	try:
	anova_table = sm.stats.anova_lm(model, typ=sum_squares_type)
	except Exception as e:
	return {"error": f"ANOVA table generation failed: {str(e)}"}

	# Calculate effect sizes (eta-squared)
	effect_sizes = {}
	effect_interpretations = {}
	total_ss = anova_table['sum_sq'].sum()

	for index, row in anova_table.iterrows():
	if index != 'Residual':
	eta_squared = row['sum_sq'] / total_ss
	effect_sizes[index] = eta_squared

	# Interpret effect size
	small_threshold, medium_threshold, large_threshold = thresholds
	if eta_squared < small_threshold:
	effect_interpretations[index] = "negligible"
	elif eta_squared < medium_threshold:
	effect_interpretations[index] = "small"
	elif eta_squared < large_threshold:
	effect_interpretations[index] = "medium"
	else:
	effect_interpretations[index] = "large"

	# Identify significant effects
	significant_effects = []
	for index, row in anova_table.iterrows():
	if index != 'Residual' and row['PR(>F)'] < alpha:
	significant_effects.append(index)

	# Calculate factor summaries
	factor_summaries = {}
	for factor in factor_list:
	factor_stats = analysis_df.groupby(factor)[dependent_var].agg(['mean', 'std', 'count']).round(4)
	factor_summaries[factor] = factor_stats.to_dict('index')

	# Model summary statistics
	model_summary = {
	"r_squared": model.rsquared,
	"adj_r_squared": model.rsquared_adj,
	"f_statistic": model.fvalue,
	"f_pvalue": model.f_pvalue,
	"aic": model.aic,
	"bic": model.bic,
	"df_model": model.df_model,
	"df_resid": model.df_resid,
	"n_observations": int(model.nobs),
	"mse_resid": model.mse_resid
	}

	# Design summary
	total_combinations = np.prod(list(factor_level_counts.values()))
	observed_combinations = len(cell_counts)
	balanced = len(cell_counts.unique()) == 1 # All cells have same count

	design_summary = {
	"n_factors": len(factor_list),
	"factor_levels": factor_level_counts,
	"total_possible_combinations": total_combinations,
	"observed_combinations": observed_combinations,
	"is_balanced": balanced,
	"min_cell_size": int(min_cell_size),
	"max_cell_size": int(cell_counts.max()),
	"mean_cell_size": round(cell_counts.mean(), 2)
	}

	# Determine test description
	n_factors = len(factor_list)
	test_description = f"{n_factors}-way ANOVA"

	if include_interactions:
	max_order_desc = max_interaction_order if max_interaction_order else n_factors
	test_description += f" with interactions (up to {max_order_desc}-way)"
	else:
	test_description += " (main effects only)"

	return {
	"test_type": test_description,
	"anova_table": anova_table,
	"significant_effects": significant_effects,
	"effect_sizes": effect_sizes,
	"effect_interpretations": effect_interpretations,
	"factor_summaries": factor_summaries,
	"model_summary": model_summary,
	"formula_used": formula,
	"design_summary": design_summary,
	"alpha": alpha,
	"factors_analyzed": factor_list,
	"sum_squares_type": sum_squares_type,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in multi-way ANOVA: {str(e)}"}

	def chi_square_test(
	dataframe: Optional[pd.DataFrame] = None,
	observed_str: Optional[str] = None,
	expected_str: Optional[str] = None,
	alpha: float = 0.05,
	effect_thresholds: str = "0.1,0.3,0.5"
	) -> Dict[str, Any]:
	"""
	Accepts observed frequencies (and optionally expected frequencies) as comma-separated strings or DataFrame columns
	and performs a chi-square goodness of fit test. This test determines whether observed categorical data frequencies
	differ significantly from expected frequencies. If no expected frequencies are provided, the test assumes equal
	distribution across all categories. The test calculates a chi-square statistic measuring the discrepancy between
	observed and expected frequencies. A significant result (p-value < alpha) indicates that the observed distribution
	differs from the expected distribution. Cramér's V measures effect size as the strength of association, with
	interpretation: V < 0.1 = negligible, 0.1-0.3 = small, 0.3-0.5 = medium, >0.5 = large (custom thresholds may be used).
	EXAMPLE USE CASES: testing if dice rolls follow uniform distribution, comparing observed vs expected sales across
	categories, analyzing survey response distributions.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing frequency data in first column (observed) and
	optionally second column (expected). If provided, string parameters will be ignored.
	observed_str (Optional[str]): Comma-separated string of observed frequencies.
	Example: "25,30,20,15" (frequencies for 4 categories)
	Only used if dataframe is None or empty.
	expected_str (Optional[str]): Comma-separated string of expected frequencies (optional).
	Example: "22.5,22.5,22.5,22.5" (equal distribution)
	If not provided, assumes equal distribution. Only used if dataframe is None or empty.
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining Cramér's V effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.1,0.3,0.5" means: <0.1=negligible, 0.1-0.3=small, 0.3-0.5=medium, >0.5=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Always "Chi-square goodness of fit test"
	- chi_square_statistic (float): The calculated chi-square value measuring discrepancy from expected
	- p_value (float): Probability of observing this result if null hypothesis is true
	- degrees_of_freedom (int): df = categories - 1
	- cramers_v (float): Effect size measure (strength of association)
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of Cramér's V magnitude
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.1,0.3,0.5')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first column for observed, second column for expected (if available)
	if len(dataframe.columns) < 1:
	return {"error": f"DataFrame must have at least 1 column. Found {len(dataframe.columns)} columns."}

	try:
	# Convert first column to numeric (observed frequencies)
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
	observed = col1_numeric.dropna().tolist()

	# Check if we lost too much data
	original_count1 = len(dataframe.iloc[:, 0].dropna())
	if len(observed) < original_count1 * 0.5:
	return {"error": f"Column 1 contains too many non-numeric values. Only {len(observed)} out of {original_count1} values could be converted to numbers."}

	# Check for second column (expected frequencies)
	if len(dataframe.columns) >= 2:
	col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
	expected = col2_numeric.dropna().tolist()

	if len(expected) != len(observed):
	return {"error": "Observed and expected columns must have the same number of valid entries."}
	else:
	# Calculate equal distribution
	total = sum(observed)
	expected = [total / len(observed)] * len(observed)

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif observed_str and observed_str.strip():
	try:
	observed = parse_numeric_input(observed_str)

	if expected_str and expected_str.strip():
	expected = parse_numeric_input(expected_str)
	if len(observed) != len(expected):
	return {"error": "Observed and expected must have the same number of categories."}
	else:
	# Calculate equal distribution
	total = sum(observed)
	expected = [total / len(observed)] * len(observed)

	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR a comma-separated string for observed frequencies. Do not leave input empty."}

	# Validate extracted data
	if len(observed) < 2:
	return {"error": f"Need at least 2 categories for chi-square test. Found {len(observed)} categories."}

	# Check for non-negative frequencies
	if any(x < 0 for x in observed) or any(x < 0 for x in expected):
	return {"error": "Frequencies cannot be negative."}

	# Check for zero expected frequencies
	if any(x <= 0 for x in expected):
	return {"error": "Expected frequencies must be greater than zero."}

	# Perform chi-square goodness of fit test
	observed_array = np.array(observed)
	expected_array = np.array(expected)

	# Perform chi-square test
	chi2_stat, p_value = stats.chisquare(observed_array, expected_array)

	# Degrees of freedom
	df = len(observed) - 1

	# Effect size (Cramér's V for goodness of fit)
	n = sum(observed)
	cramers_v = np.sqrt(chi2_stat / (n * df)) if df > 0 else 0

	# Interpretation using effect size thresholds
	significant = p_value < alpha
	small_threshold, medium_threshold, large_threshold = thresholds
	if cramers_v < small_threshold:
	effect_size_interp = "negligible"
	elif cramers_v < medium_threshold:
	effect_size_interp = "small"
	elif cramers_v < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": "Chi-square goodness of fit test",
	"chi_square_statistic": chi2_stat,
	"p_value": p_value,
	"degrees_of_freedom": df,
	"cramers_v": cramers_v,
	"significant": significant,
	"effect_size": effect_size_interp,
	"alpha": alpha,
	"effect_thresholds": thresholds
	}

	except Exception as e:
	return {"error": f"Unexpected error in chi-square test: {str(e)}"}


	def correlation_test(
	dataframe: Optional[pd.DataFrame] = None,
	group1_str: Optional[str] = None,
	group2_str: Optional[str] = None,
	method: str = "pearson",
	alpha: float = 0.05,
	effect_thresholds: str = "0.1,0.3,0.5"
	) -> Dict[str, Any]:
	"""
	Accepts two variables as comma-separated strings or DataFrame columns and performs correlation analysis.
	This test determines the strength and direction of the linear relationship between two continuous variables.
	Pearson correlation measures linear relationships, Spearman correlation measures monotonic relationships
	(rank-based), and Kendall's tau is robust to outliers and suitable for small samples. The test calculates
	a correlation coefficient ranging from -1 (perfect negative correlation) to +1 (perfect positive correlation),
	with 0 indicating no linear relationship. A significant result (p-value < alpha) indicates that the observed
	correlation is statistically different from zero. Effect size interpretation: \|r\| < 0.1 = negligible,
	0.1-0.3 = small, 0.3-0.5 = medium, >0.5 = large (custom thresholds may be used).
	EXAMPLE USE CASES: examining relationship between height and weight, analyzing correlation between study time
	and test scores, investigating association between variables in research.

	Args:
	dataframe (Optional[pd.DataFrame]): DataFrame containing two variables in first two columns.
	If provided, group1_str and group2_str will be ignored.
	group1_str (Optional[str]): Comma-separated string of numeric values for the first variable (X).
	Example: "5.2,6.1,4.8,7.3" (hours studied)
	Only used if dataframe is None or empty.
	group2_str (Optional[str]): Comma-separated string of numeric values for the second variable (Y).
	Example: "78,85,72,92" (test scores)
	Only used if dataframe is None or empty.
	method (str): Correlation method to use:
	- "pearson": Pearson product-moment correlation (linear relationships)
	- "spearman": Spearman rank correlation (monotonic relationships)
	- "kendall": Kendall's tau (robust to outliers, good for small samples)
	alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
	Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
	effect_thresholds (str): Three comma-separated values defining correlation effect size boundaries.
	Format: "small_threshold,medium_threshold,large_threshold"
	Default "0.1,0.3,0.5" means: <0.1=negligible, 0.1-0.3=small, 0.3-0.5=medium, >0.5=large

	Returns:
	dict: Comprehensive test results with the following keys:
	- test_type (str): Type of correlation test performed
	- correlation_coefficient (float): The calculated correlation coefficient (-1 to +1)
	- p_value (float): Probability of observing this result if null hypothesis (no correlation) is true
	- sample_size (int): Number of paired observations
	- significant (bool): True if p_value < alpha
	- effect_size (str): Categorical interpretation of correlation magnitude
	- method (str): Echo of correlation method used
	- alpha (float): Echo of significance level used
	- effect_thresholds (List[float]): Echo of effect size thresholds used
	- group1_stats (dict): Descriptive statistics for first variable (mean, std, n)
	- group2_stats (dict): Descriptive statistics for second variable (mean, std, n)
	"""
	try:
	# Parse effect size thresholds
	try:
	thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
	if len(thresholds) != 3:
	return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
	except:
	return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.1,0.3,0.5')"}

	# Method 1: DataFrame input (preferred for LLMs and data pipelines)
	if dataframe is not None and not dataframe.empty:
	# Use first two columns
	if len(dataframe.columns) < 2:
	return {"error": f"DataFrame must have at least 2 columns for correlation. Found {len(dataframe.columns)} columns."}

	try:
	# Convert to numeric, coercing errors to NaN
	col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
	col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')

	# Remove rows where either value is NaN
	valid_mask = ~(col1_numeric.isna() \| col2_numeric.isna())
	group1 = col1_numeric[valid_mask].tolist()
	group2 = col2_numeric[valid_mask].tolist()

	# Check if we lost too much data
	original_count = len(dataframe)
	if len(group1) < original_count * 0.5:
	return {"error": f"Too many non-numeric values in the data. Only {len(group1)} out of {original_count} rows could be used."}

	except Exception as e:
	return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}

	# Method 2: String input (preferred for humans and simple use cases)
	elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
	try:
	group1 = parse_numeric_input(group1_str)
	group2 = parse_numeric_input(group2_str)

	if len(group1) != len(group2):
	return {"error": f"Variables must have the same number of observations. Variable 1 has {len(group1)}, Variable 2 has {len(group2)}."}

	except ValueError as e:
	return {"error": f"String parsing error: {str(e)}"}

	else:
	return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both variables. Do not leave inputs empty."}

	# Validate extracted data
	if len(group1) < 3:
	return {"error": f"Need at least 3 paired observations for correlation. Found {len(group1)} pairs."}

	# Perform correlation analysis
	data1 = np.array(group1)
	data2 = np.array(group2)

	# Choose correlation method
	method_lower = method.lower()
	if method_lower == "pearson":
	corr_coef, p_value = stats.pearsonr(data1, data2)
	test_name = "Pearson correlation"
	elif method_lower == "spearman":
	corr_coef, p_value = stats.spearmanr(data1, data2)
	test_name = "Spearman rank correlation"
	elif method_lower == "kendall":
	corr_coef, p_value = stats.kendalltau(data1, data2)
	test_name = "Kendall's tau correlation"
	else:
	return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}

	# Calculate descriptive statistics
	desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
	desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}

	# Interpretation using effect size thresholds
	significant = p_value < alpha
	abs_corr = abs(corr_coef)
	small_threshold, medium_threshold, large_threshold = thresholds
	if abs_corr < small_threshold:
	effect_size_interp = "negligible"
	elif abs_corr < medium_threshold:
	effect_size_interp = "small"
	elif abs_corr < large_threshold:
	effect_size_interp = "medium"
	else:
	effect_size_interp = "large"

	return {
	"test_type": test_name,
	"correlation_coefficient": corr_coef,
	"p_value": p_value,
	"sample_size": len(data1),
	"significant": significant,
	"effect_size": effect_size_interp,
	"method": method_lower,
	"alpha": alpha,
	"effect_thresholds": thresholds,
	"group1_stats": desc1,
	"group2_stats": desc2
	}

	except Exception as e:
	return {"error": f"Unexpected error in correlation test: {str(e)}"}

	# SHARED UTILITY FUNCTIONS (Hidden from MCP)
	def load_uploaded_file(file_path, has_header_flag):
	"""Shared function to load uploaded files and return both the DataFrame and preview."""
	if file_path is None:
	return None, None

	try:
	# Determine header parameter for pandas
	header_param = 0 if has_header_flag else None

	if file_path.endswith('.csv'):
	df = pd.read_csv(file_path, header=header_param)
	elif file_path.endswith(('.xlsx', '.xls')):
	df = pd.read_excel(file_path, header=header_param)
	else:
	return None, pd.DataFrame({'Error': ['Unsupported file format']})

	# Take only first two columns
	if len(df.columns) >= 2:
	df_subset = df.iloc[:, :2].copy()

	# Set column names based on whether headers were detected
	if has_header_flag and not str(df_subset.columns[0]).startswith('Unnamed'):
	# Keep original column names if they exist and aren't auto-generated
	df_subset.columns = [str(df_subset.columns[0]), str(df_subset.columns[1])]
	else:
	# Use default names
	df_subset.columns = ['Group1', 'Group2']

	# Convert columns to numeric, replacing non-numeric with NaN
	df_subset.iloc[:, 0] = pd.to_numeric(df_subset.iloc[:, 0], errors='coerce')
	df_subset.iloc[:, 1] = pd.to_numeric(df_subset.iloc[:, 1], errors='coerce')

	# Remove rows where both values are NaN
	df_subset = df_subset.dropna(how='all')

	# Return full dataframe for processing and preview for display
	preview = df_subset.head(10) # Show first 10 rows
	return df_subset, preview
	else:
	error_df = pd.DataFrame({'Error': ['File must have at least 2 columns']})
	return None, error_df
	except Exception as e:
	error_df = pd.DataFrame({'Error': [f"Failed to load file: {str(e)}"]})
	return None, error_df

	def toggle_input_method(method):
	"""Toggle between file upload and text input sections."""
	if method == "File Upload":
	return gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True)

	def clear_all():
	"""Clear all form inputs and reset to defaults."""
	return (
	"File Upload", # input_method
	None, # loaded_dataframe
	None, # data_preview
	"", # group1_str
	"", # group2_str
	"two-sided", # alternative
	0.05, # alpha
	"0.2,0.5,0.8", # effect_thresholds
	{} # output
	)

	def load_example():
	"""Load example data for demonstration purposes."""
	example_df = pd.DataFrame({
	'Treatment': [85.2, 90.1, 78.5, 92.3, 88.7, 86.4, 89.2],
	'Control': [88.1, 85.7, 91.2, 87.4, 89.3, 90.8, 86.9]
	})
	preview = example_df.head(10)
	return "File Upload", example_df, preview, "", ""

	# COMPONENT FACTORY FUNCTIONS
	def create_input_components():
	"""Create reusable input components for both test tabs."""
	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload", "Text Input"],
	value="File Upload",
	label="Choose Input Method",
	info="Select how you want to provide your data"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file - first two columns will be used as Group 1 and Group 2")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview (first two columns)",
	interactive=False,
	row_count=5
	)

	# Text input section
	with gr.Group(visible=False) as text_section:
	gr.Markdown("### Text Input")
	gr.Markdown("Enter comma-separated numbers for each group")

	group1_str = gr.Textbox(
	placeholder="85.2,90.1,78.5,92.3,88.7",
	label="Group 1 Data",
	info="Comma-separated numbers (e.g., test scores for condition A)"
	)
	group2_str = gr.Textbox(
	placeholder="88.1,85.7,91.2,87.4,89.3",
	label="Group 2 Data",
	info="Comma-separated numbers (e.g., test scores for condition B)"
	)

	return input_method, file_section, text_section, file_upload, has_header, data_preview, group1_str, group2_str

	def create_parameter_components():
	"""Create reusable parameter components for both test tabs."""
	gr.Markdown("### Test Parameters")
	with gr.Row():
	alternative = gr.Dropdown(
	choices=["two-sided", "less", "greater"],
	value="two-sided",
	label="Alternative Hypothesis",
	info="two-sided: groups differ; less: group1 < group2; greater: group1 > group2"
	)
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance (typically 0.05)"
	)
	effect_thresholds = gr.Textbox(
	value="0.2,0.5,0.8",
	label="Effect Size Thresholds",
	info="Cohen's d boundaries: small,medium,large (Cohen's canonical values)"
	)

	return alternative, alpha, effect_thresholds

	def create_t_test_tab(test_function, test_name, description):
	"""
	Factory function to create a complete t-test tab with all components and handlers.

	Args:
	test_function: The statistical function to call (student_t_test or welch_t_test)
	test_name: Display name for the tab (e.g., "Student's T-Test")
	description: Markdown description to show at the top of the tab

	Returns:
	dict: Dictionary containing all created components and state for external reference
	"""

	with gr.TabItem(test_name):
	gr.Markdown(description)

	# Create input components
	(input_method, file_section, text_section, file_upload,
	has_header, data_preview, group1_str, group2_str) = create_input_components()

	# Create parameter components
	alternative, alpha, effect_thresholds = create_parameter_components()

	# Create action buttons
	with gr.Row():
	run_button = gr.Button(f"Run {test_name}", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Statistical Test Results")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# EVENT HANDLERS
	# Toggle between input methods
	input_method.change(
	fn=toggle_input_method,
	inputs=input_method,
	outputs=[file_section, text_section],
	show_api=False # Hide UI helper from MCP
	)

	# File upload handlers
	file_upload.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False # Hide UI helper from MCP
	)

	has_header.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False # Hide UI helper from MCP
	)

	# MAIN STATISTICAL FUNCTION CALL - This will be exposed to MCP!
	run_button.click(
	fn=test_function, # Direct call to the statistical function
	inputs=[
	loaded_dataframe, # dataframe
	group1_str, # group1_str
	group2_str, # group2_str
	alternative, # alternative
	alpha, # alpha
	effect_thresholds # effect_thresholds
	],
	outputs=output
	# Note: No show_api=False here - we want the main function exposed to MCP!
	)

	# Clear form handler
	clear_button.click(
	fn=clear_all,
	outputs=[
	input_method, loaded_dataframe, data_preview,
	group1_str, group2_str, alternative,
	alpha, effect_thresholds, output
	],
	show_api=False # Hide UI helper from MCP
	)

	# Example data handler
	example_button.click(
	fn=load_example,
	outputs=[input_method, loaded_dataframe, data_preview,
	group1_str, group2_str],
	show_api=False # Hide UI helper from MCP
	)

	# Return components for external reference if needed
	return {
	'input_method': input_method,
	'file_upload': file_upload,
	'has_header': has_header,
	'data_preview': data_preview,
	'group1_str': group1_str,
	'group2_str': group2_str,
	'alternative': alternative,
	'alpha': alpha,
	'effect_thresholds': effect_thresholds,
	'run_button': run_button,
	'clear_button': clear_button,
	'example_button': example_button,
	'output': output,
	'loaded_dataframe': loaded_dataframe
	}

	def create_one_sample_t_test_tab():
	"""Create a complete one-sample t-test tab with all components and handlers."""

	with gr.TabItem("One-Sample T-Test"):
	gr.Markdown("Test a sample against a known population mean")

	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload", "Text Input"],
	value="File Upload",
	label="Choose Input Method",
	info="Select how you want to provide your data"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file - first column will be used as sample data")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview (first column)",
	interactive=False,
	row_count=5
	)

	# Text input section
	with gr.Group(visible=False) as text_section:
	gr.Markdown("### Text Input")
	gr.Markdown("Enter comma-separated numbers for your sample")

	group_str = gr.Textbox(
	placeholder="85.2,90.1,78.5,92.3,88.7",
	label="Sample Data",
	info="Comma-separated numbers (e.g., test scores, measurements)"
	)

	# Test parameters
	gr.Markdown("### Test Parameters")
	with gr.Row():
	population_mean = gr.Number(
	value=0.0,
	label="Population Mean (μ₀)",
	info="Known or hypothesized population mean to test against"
	)
	alternative = gr.Dropdown(
	choices=["two-sided", "less", "greater"],
	value="two-sided",
	label="Alternative Hypothesis",
	info="two-sided: sample ≠ population; less: sample < population; greater: sample > population"
	)

	with gr.Row():
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance (typically 0.05)"
	)
	effect_thresholds = gr.Textbox(
	value="0.2,0.5,0.8",
	label="Effect Size Thresholds",
	info="Cohen's d boundaries: small,medium,large"
	)

	# Action buttons
	with gr.Row():
	run_button = gr.Button("Run One-Sample T-Test", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Statistical Test Results")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# EVENT HANDLERS
	# Toggle between input methods
	input_method.change(
	fn=toggle_input_method,
	inputs=input_method,
	outputs=[file_section, text_section],
	show_api=False
	)

	# File upload handlers
	file_upload.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	has_header.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	# MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
	run_button.click(
	fn=one_sample_t_test,
	inputs=[
	loaded_dataframe, # dataframe
	group_str, # group_str
	population_mean, # population_mean
	alternative, # alternative
	alpha, # alpha
	effect_thresholds # effect_thresholds
	],
	outputs=output
	)

	# Clear form handler
	def clear_one_sample():
	return (
	"File Upload", # input_method
	None, # loaded_dataframe
	None, # data_preview
	"", # group_str
	0.0, # population_mean
	"two-sided", # alternative
	0.05, # alpha
	"0.2,0.5,0.8", # effect_thresholds
	{} # output
	)

	clear_button.click(
	fn=clear_one_sample,
	outputs=[
	input_method, loaded_dataframe, data_preview,
	group_str, population_mean, alternative,
	alpha, effect_thresholds, output
	],
	show_api=False
	)

	# Example data handler
	def load_one_sample_example():
	example_data = "100,105,98,102,97,103,99,101,96,104"
	return "Text Input", None, None, example_data, 100.0

	example_button.click(
	fn=load_one_sample_example,
	outputs=[input_method, loaded_dataframe, data_preview, group_str, population_mean],
	show_api=False
	)


	def create_anova_tab():
	"""Create a complete one-way ANOVA tab with all components and handlers."""

	with gr.TabItem("One-Way ANOVA"):
	gr.Markdown("Compare means across multiple independent groups")

	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload", "Text Input"],
	value="File Upload",
	label="Choose Input Method",
	info="Select how you want to provide your data"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file - each column will be treated as a separate group")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview (all columns as groups)",
	interactive=False,
	row_count=5
	)

	# Text input section
	with gr.Group(visible=False) as text_section:
	gr.Markdown("### Text Input")
	gr.Markdown("Enter groups separated by semicolons (;) with comma-separated values within each group")

	groups_str = gr.Textbox(
	placeholder="85.2,90.1,78.5;88.1,85.7,91.2;82.3,87.4,89.1",
	label="Groups Data",
	info="Format: group1_values;group2_values;group3_values (e.g., treatment A;treatment B;control)",
	lines=3
	)

	gr.Markdown("Example: `85.2,90.1,78.5;88.1,85.7,91.2;82.3,87.4,89.1` represents 3 groups with their respective measurements")

	# Test parameters
	gr.Markdown("### Test Parameters")
	with gr.Row():
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance (typically 0.05)"
	)
	effect_thresholds = gr.Textbox(
	value="0.01,0.06,0.14",
	label="Effect Size Thresholds",
	info="Eta-squared (η²) boundaries: small,medium,large"
	)

	# Action buttons
	with gr.Row():
	run_button = gr.Button("Run One-Way ANOVA", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Statistical Test Results")

	# Interpretation note
	gr.Markdown("""
	### Post-Hoc Note
	If ANOVA shows significant differences (p < α), consider running post-hoc tests to identify which specific groups differ from each other.
	""")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# EVENT HANDLERS
	# Toggle between input methods
	input_method.change(
	fn=toggle_input_method,
	inputs=input_method,
	outputs=[file_section, text_section],
	show_api=False
	)

	# File upload handlers
	file_upload.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	has_header.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	# MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
	run_button.click(
	fn=one_way_anova,
	inputs=[
	loaded_dataframe, # dataframe
	groups_str, # groups_str
	alpha, # alpha
	effect_thresholds # effect_thresholds
	],
	outputs=output
	)

	# Clear form handler
	def clear_anova():
	return (
	"File Upload", # input_method
	None, # loaded_dataframe
	None, # data_preview
	"", # groups_str
	0.05, # alpha
	"0.01,0.06,0.14", # effect_thresholds
	{} # output
	)

	clear_button.click(
	fn=clear_anova,
	outputs=[
	input_method, loaded_dataframe, data_preview,
	groups_str, alpha, effect_thresholds, output
	],
	show_api=False
	)

	# Example data handler
	def load_anova_example():
	example_data = "85.2,90.1,78.5,92.3;88.1,85.7,91.2,87.4;82.3,87.4,89.1,83.7"
	return "Text Input", None, None, example_data

	example_button.click(
	fn=load_anova_example,
	outputs=[input_method, loaded_dataframe, data_preview, groups_str],
	show_api=False
	)

	def create_multi_way_anova_tab():
	"""Create a complete multi-way ANOVA tab with all components and handlers."""

	with gr.TabItem("Multi-Way ANOVA"):
	gr.Markdown("Compare means across multiple categorical factors simultaneously")

	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload"],
	value="File Upload",
	label="Input Method",
	info="Multi-way ANOVA requires structured data - file upload recommended"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file with dependent variable and multiple categorical factors")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview",
	interactive=False,
	row_count=10
	)

	# Variable specification
	gr.Markdown("### Variable Specification")
	with gr.Row():
	dependent_var = gr.Dropdown(
	label="Dependent Variable",
	info="Select the continuous outcome variable",
	interactive=True
	)
	factors = gr.Textbox(
	label="Factors (comma-separated)",
	placeholder="treatment,gender,age_group",
	info="Enter factor column names separated by commas",
	lines=2
	)

	# Advanced options
	gr.Markdown("### Analysis Options")
	with gr.Row():
	include_interactions = gr.Checkbox(
	label="Include Interactions",
	value=True,
	info="Test for interaction effects between factors"
	)
	max_interaction_order = gr.Number(
	label="Max Interaction Order",
	value=None,
	minimum=2,
	step=1,
	info="Maximum interaction order (leave empty for all interactions)"
	)

	with gr.Row():
	sum_squares_type = gr.Dropdown(
	choices=[1, 2, 3],
	value=2,
	label="Sum of Squares Type",
	info="Type 2 for balanced, Type 3 for unbalanced designs"
	)
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance"
	)

	with gr.Row():
	effect_thresholds = gr.Textbox(
	value="0.01,0.06,0.14",
	label="Effect Size Thresholds",
	info="Eta-squared boundaries: small,medium,large"
	)

	# Action buttons
	with gr.Row():
	run_button = gr.Button("Run Multi-Way ANOVA", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Multi-Way ANOVA Results")

	# Information section
	with gr.Accordion("Multi-Way ANOVA Information", open=False):
	gr.Markdown("""
	### What is Multi-Way ANOVA?

	Multi-way ANOVA extends one-way ANOVA to handle multiple categorical factors simultaneously:

	Main Effects: How each factor independently affects the outcome
	Interaction Effects: How factors work together (non-additively)

	### Example Designs:
	- 2-way: Treatment (A,B,C) × Gender (Male,Female) → 6 combinations
	- 3-way: Drug (A,B) × Dose (Low,High) × Age (Young,Old) → 8 combinations
	- 4-way: Method (A,B) × School (Public,Private) × Gender (M,F) × Grade (1st,2nd) → 16 combinations

	### Requirements:
	- All factors must be categorical (not continuous)
	- Dependent variable must be continuous
	- At least 2 observations per factor combination
	- Independence, normality, and equal variances assumptions
	""")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# Helper function to load and preview file data
	def load_multi_way_file(file_path, has_header_flag):
	if file_path is None:
	return None, None, []

	try:
	# Determine header parameter
	header_param = 0 if has_header_flag else None

	if file_path.endswith('.csv'):
	df = pd.read_csv(file_path, header=header_param)
	elif file_path.endswith(('.xlsx', '.xls')):
	df = pd.read_excel(file_path, header=header_param)
	else:
	return None, pd.DataFrame({'Error': ['Unsupported file format']}), []

	# Set column names if no header
	if not has_header_flag:
	df.columns = [f'Column_{i+1}' for i in range(len(df.columns))]

	# Get column options for dropdown
	column_options = list(df.columns)

	# Return dataframe, preview, and column options
	preview = df.head(15)
	return df, preview, column_options

	except Exception as e:
	error_df = pd.DataFrame({'Error': [f"Failed to load file: {str(e)}"]})
	return None, error_df, []

	# Clear form function
	def clear_multi_way():
	return (
	None, # loaded_dataframe
	None, # data_preview
	[], # dependent_var choices
	None, # dependent_var value
	"", # factors
	True, # include_interactions
	None, # max_interaction_order
	2, # sum_squares_type
	0.05, # alpha
	"0.01,0.06,0.14", # effect_thresholds
	{} # output
	)

	# Example data function
	def load_multi_way_example():
	# Create example 3-way ANOVA data
	np.random.seed(42)

	treatments = ['Control', 'Treatment_A', 'Treatment_B']
	genders = ['Male', 'Female']
	ages = ['Young', 'Old']

	data = []
	for treatment in treatments:
	for gender in genders:
	for age in ages:
	# Generate scores with different effects
	base_score = 50
	treatment_effect = {'Control': 0, 'Treatment_A': 8, 'Treatment_B': 12}[treatment]
	gender_effect = {'Male': 3, 'Female': -3}[gender]
	age_effect = {'Young': 5, 'Old': -5}[age]

	# Add interaction: Treatment_B works better for older patients
	interaction_effect = 0
	if treatment == 'Treatment_B' and age == 'Old':
	interaction_effect = 6

	n_per_cell = 15
	mean_score = base_score + treatment_effect + gender_effect + age_effect + interaction_effect
	scores = np.random.normal(mean_score, 6, n_per_cell)

	for score in scores:
	data.append({
	'test_score': round(score, 2),
	'treatment': treatment,
	'gender': gender,
	'age_group': age
	})

	df = pd.DataFrame(data)
	preview = df.head(15)
	column_options = list(df.columns)

	return df, preview, column_options, 'test_score', 'treatment,gender,age_group'

	# EVENT HANDLERS

	# File upload handlers
	file_upload.change(
	fn=load_multi_way_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview, dependent_var],
	show_api=False
	)

	has_header.change(
	fn=load_multi_way_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview, dependent_var],
	show_api=False
	)

	# MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
	run_button.click(
	fn=multi_way_anova,
	inputs=[
	loaded_dataframe, # dataframe
	dependent_var, # dependent_var
	factors, # factors
	alpha, # alpha
	effect_thresholds, # effect_thresholds
	include_interactions, # include_interactions
	max_interaction_order, # max_interaction_order
	sum_squares_type # sum_squares_type
	],
	outputs=output
	)

	# Clear form handler
	clear_button.click(
	fn=clear_multi_way,
	outputs=[
	loaded_dataframe, data_preview, dependent_var, dependent_var,
	factors, include_interactions, max_interaction_order,
	sum_squares_type, alpha, effect_thresholds, output
	],
	show_api=False
	)

	# Example data handler
	example_button.click(
	fn=load_multi_way_example,
	outputs=[loaded_dataframe, data_preview, dependent_var, dependent_var, factors],
	show_api=False
	)

	def create_chi_square_tab():
	"""Create a complete chi-square goodness of fit test tab with all components and handlers."""

	with gr.TabItem("Chi-Square Test"):
	gr.Markdown("Test if observed frequencies differ from expected frequencies")

	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload", "Text Input"],
	value="File Upload",
	label="Choose Input Method",
	info="Select how you want to provide your data"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file - first column: observed frequencies, second column: expected frequencies (optional)")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview (observed and expected frequencies)",
	interactive=False,
	row_count=5
	)

	# Text input section
	with gr.Group(visible=False) as text_section:
	gr.Markdown("### Text Input")
	gr.Markdown("Enter comma-separated frequency values")

	observed_str = gr.Textbox(
	placeholder="25,30,20,15",
	label="Observed Frequencies",
	info="Comma-separated observed frequencies for each category"
	)

	expected_str = gr.Textbox(
	placeholder="22.5,22.5,22.5,22.5",
	label="Expected Frequencies (Optional)",
	info="Comma-separated expected frequencies. Leave empty for equal distribution"
	)

	# Test parameters
	gr.Markdown("### Test Parameters")
	with gr.Row():
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance (typically 0.05)"
	)
	effect_thresholds = gr.Textbox(
	value="0.1,0.3,0.5",
	label="Effect Size Thresholds",
	info="Cramér's V boundaries: small,medium,large"
	)

	# Action buttons
	with gr.Row():
	run_button = gr.Button("Run Chi-Square Test", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Statistical Test Results")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# EVENT HANDLERS
	# Toggle between input methods
	input_method.change(
	fn=toggle_input_method,
	inputs=input_method,
	outputs=[file_section, text_section],
	show_api=False
	)

	# File upload handlers
	file_upload.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	has_header.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	# MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
	run_button.click(
	fn=chi_square_test,
	inputs=[
	loaded_dataframe, # dataframe
	observed_str, # observed_str
	expected_str, # expected_str
	alpha, # alpha
	effect_thresholds # effect_thresholds
	],
	outputs=output
	)

	# Clear form handler
	def clear_chi_square():
	return (
	"File Upload", # input_method
	None, # loaded_dataframe
	None, # data_preview
	"", # observed_str
	"", # expected_str
	0.05, # alpha
	"0.1,0.3,0.5", # effect_thresholds
	{} # output
	)

	clear_button.click(
	fn=clear_chi_square,
	outputs=[
	input_method, loaded_dataframe, data_preview,
	observed_str, expected_str, alpha, effect_thresholds, output
	],
	show_api=False
	)

	# Example data handler
	def load_chi_square_example():
	observed_example = "25,30,20,15"
	expected_example = "22.5,22.5,22.5,22.5"
	return "Text Input", None, None, observed_example, expected_example

	example_button.click(
	fn=load_chi_square_example,
	outputs=[input_method, loaded_dataframe, data_preview, observed_str, expected_str],
	show_api=False
	)


	def create_correlation_tab():
	"""Create a complete correlation analysis tab with all components and handlers."""

	with gr.TabItem("Correlation Test"):
	gr.Markdown("Analyze the relationship between two continuous variables")

	# Input method selector
	input_method = gr.Radio(
	choices=["File Upload", "Text Input"],
	value="File Upload",
	label="Choose Input Method",
	info="Select how you want to provide your data"
	)

	# File upload input section
	with gr.Group(visible=True) as file_section:
	gr.Markdown("### File Upload")
	gr.Markdown("Upload CSV or Excel file - first two columns will be used as the two variables")

	with gr.Row():
	file_upload = gr.File(
	label="Upload CSV/Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	type="filepath"
	)
	has_header = gr.Checkbox(
	label="File has header row",
	value=True,
	info="Check if first row contains column names"
	)

	# Display loaded data preview
	data_preview = gr.Dataframe(
	label="Data Preview (first two columns as variables)",
	interactive=False,
	row_count=5
	)

	# Text input section
	with gr.Group(visible=False) as text_section:
	gr.Markdown("### Text Input")
	gr.Markdown("Enter comma-separated values for each variable")

	group1_str = gr.Textbox(
	placeholder="5.2,6.1,4.8,7.3,5.9",
	label="Variable 1 (X)",
	info="Comma-separated numbers (e.g., hours studied, height, age)"
	)

	group2_str = gr.Textbox(
	placeholder="78,85,72,92,81",
	label="Variable 2 (Y)",
	info="Comma-separated numbers (e.g., test scores, weight, income)"
	)

	# Test parameters
	gr.Markdown("### Test Parameters")
	with gr.Row():
	method = gr.Dropdown(
	choices=["pearson", "spearman", "kendall"],
	value="pearson",
	label="Correlation Method",
	info="pearson: linear relationships; spearman: monotonic; kendall: robust to outliers"
	)
	alpha = gr.Number(
	value=0.05,
	minimum=0,
	maximum=1,
	step=0.01,
	label="Significance Level (α)",
	info="Probability threshold for statistical significance (typically 0.05)"
	)

	with gr.Row():
	effect_thresholds = gr.Textbox(
	value="0.1,0.3,0.5",
	label="Effect Size Thresholds",
	info="Correlation coefficient boundaries: small,medium,large"
	)

	# Action buttons
	with gr.Row():
	run_button = gr.Button("Run Correlation Test", variant="primary", scale=1)
	clear_button = gr.Button("Clear All", variant="secondary", scale=1)

	# Output display
	output = gr.JSON(label="Statistical Test Results")

	# Example data section
	with gr.Row():
	gr.Markdown("### Quick Examples")
	example_button = gr.Button("Load Example Data", variant="outline")

	# State management
	loaded_dataframe = gr.State(value=None)

	# EVENT HANDLERS
	# Toggle between input methods
	input_method.change(
	fn=toggle_input_method,
	inputs=input_method,
	outputs=[file_section, text_section],
	show_api=False
	)

	# File upload handlers
	file_upload.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	has_header.change(
	fn=load_uploaded_file,
	inputs=[file_upload, has_header],
	outputs=[loaded_dataframe, data_preview],
	show_api=False
	)

	# MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
	run_button.click(
	fn=correlation_test,
	inputs=[
	loaded_dataframe, # dataframe
	group1_str, # group1_str
	group2_str, # group2_str
	method, # method
	alpha, # alpha
	effect_thresholds # effect_thresholds
	],
	outputs=output
	)

	# Clear form handler
	def clear_correlation():
	return (
	"File Upload", # input_method
	None, # loaded_dataframe
	None, # data_preview
	"", # group1_str
	"", # group2_str
	"pearson", # method
	0.05, # alpha
	"0.1,0.3,0.5", # effect_thresholds
	{} # output
	)

	clear_button.click(
	fn=clear_correlation,
	outputs=[
	input_method, loaded_dataframe, data_preview,
	group1_str, group2_str, method, alpha, effect_thresholds, output
	],
	show_api=False
	)

	# Example data handler
	def load_correlation_example():
	x_example = "5.2,6.1,4.8,7.3,5.9,6.8,4.5,7.1"
	y_example = "78,85,72,92,81,89,70,88"
	return "Text Input", None, None, x_example, y_example

	example_button.click(
	fn=load_correlation_example,
	outputs=[input_method, loaded_dataframe, data_preview, group1_str, group2_str],
	show_api=False
	)

	def create_t_test_interface():
	"""Create the complete t-test interface with both Student's and Welch's tabs."""

	with gr.Blocks(title="T-Test Analysis", theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# Statistical Analysis MCP
	""")

	with gr.Tabs():
	# Create Student's t-test tab
	student_components = create_t_test_tab(
	test_function=student_t_test,
	test_name="Student's T-Test",
	description="t-test between independent groups assuming equal population variances"
	)

	# Create Welch's t-test tab
	welch_components = create_t_test_tab(
	test_function=welch_t_test,
	test_name="Welch's T-Test",
	description="t-test between independent groups that does not assume equal population variances"
	)

	# Create paired t-test tab
	paired_components = create_t_test_tab(
	test_function=paired_t_test,
	test_name="Paired T-Test",
	description="t-test between paired groups"
	)

	one_sample_components = create_one_sample_t_test_tab()
	anova_components = create_anova_tab()
	manova_components = create_multi_way_anova_tab()
	chi_square_components = create_chi_square_tab()
	corr_components = create_correlation_tab()

	return demo

	# Main execution
	if __name__ == "__main__":
	demo = create_t_test_interface()
	demo.launch(mcp_server=True)