|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
from math import sqrt, pi |
|
from scipy.stats import skewnorm |
|
|
|
def theoretical_stats_skewnorm(alpha, mu, sigma): |
|
""" |
|
Compute theoretical moments for the Skew-Normal(alpha, loc=mu, scale=sigma). |
|
Returns mean, median, mode (numeric), variance, std dev, IQR, skewness, |
|
kurtosis (Pearson) & excess kurtosis. |
|
""" |
|
m, v, s, exk = skewnorm.stats(alpha, loc=mu, scale=sigma, moments="mvsk") |
|
m = float(m); v = float(v); s = float(s); exk = float(exk) |
|
std = float(np.sqrt(v)) |
|
|
|
|
|
q1 = float(skewnorm.ppf(0.25, alpha, loc=mu, scale=sigma)) |
|
q3 = float(skewnorm.ppf(0.75, alpha, loc=mu, scale=sigma)) |
|
iqr = q3 - q1 |
|
|
|
|
|
median = float(skewnorm.ppf(0.5, alpha, loc=mu, scale=sigma)) |
|
|
|
|
|
xs = np.linspace(m - 6*std, m + 6*std, 4001) |
|
pdf_vals = skewnorm.pdf(xs, alpha, loc=mu, scale=sigma) |
|
mode = float(xs[int(np.argmax(pdf_vals))]) |
|
|
|
return { |
|
"mean": m, |
|
"median": median, |
|
"mode": mode, |
|
"variance": v, |
|
"std_dev": std, |
|
"IQR": iqr, |
|
"range": float("inf"), |
|
"skewness": s, |
|
"kurtosis": exk + 3.0, |
|
"excess_kurtosis": exk |
|
} |
|
|
|
def sample_stats(sample): |
|
n = len(sample) |
|
if n < 2: |
|
s_mean = float(sample[0]) if n == 1 else float("nan") |
|
return { |
|
"mean": s_mean, "median": s_mean, "mode": s_mean, |
|
"variance": 0.0, "std_dev": 0.0, "IQR": 0.0, "range": 0.0, |
|
"skewness": 0.0, "kurtosis": 3.0, "excess_kurtosis": 0.0 |
|
} |
|
|
|
s = np.asarray(sample, dtype=float) |
|
s_mean = float(np.mean(s)) |
|
s_median = float(np.median(s)) |
|
|
|
|
|
counts, bin_edges = np.histogram(s, bins=min(50, max(5, int(np.sqrt(n))))) |
|
max_bin_idx = int(np.argmax(counts)) |
|
mode_est = float((bin_edges[max_bin_idx] + bin_edges[max_bin_idx+1]) / 2.0) |
|
|
|
s_var = float(np.var(s, ddof=1)) |
|
s_std = float(np.sqrt(s_var)) |
|
|
|
q1, q3 = np.percentile(s, [25, 75]) |
|
iqr = q3 - q1 |
|
s_range = float(np.max(s) - np.min(s)) |
|
|
|
m2 = np.mean((s - s_mean)**2) |
|
m3 = np.mean((s - s_mean)**3) |
|
m4 = np.mean((s - s_mean)**4) |
|
if m2 <= 0: |
|
skew, kurt = 0.0, 3.0 |
|
else: |
|
skew = m3 / (m2 ** 1.5) |
|
kurt = m4 / (m2 ** 2) |
|
ex_kurt = kurt - 3.0 |
|
|
|
return { |
|
"mean": s_mean, |
|
"median": s_median, |
|
"mode": mode_est, |
|
"variance": s_var, |
|
"std_dev": s_std, |
|
"IQR": iqr, |
|
"range": s_range, |
|
"skewness": skew, |
|
"kurtosis": kurt, |
|
"excess_kurtosis": ex_kurt |
|
} |
|
|
|
def format_stats_block(title, d): |
|
range_str = "∞" if d["range"] == float("inf") else f"{d['range']:.6g}" |
|
lines = [ |
|
f"**{title}**", |
|
f"- Mean: {d['mean']:.6g}", |
|
f"- Median: {d['median']:.6g}", |
|
f"- Mode: {d['mode']:.6g}", |
|
f"- Variance: {d['variance']:.6g}", |
|
f"- Std Dev: {d['std_dev']:.6g}", |
|
f"- IQR: {d['IQR']:.6g}", |
|
f"- Range: {range_str}", |
|
f"- Skewness: {d['skewness']:.6g}", |
|
f"- Kurtosis: {d['kurtosis']:.6g}", |
|
f"- Excess Kurtosis: {d['excess_kurtosis']:.6g}", |
|
] |
|
return "\n".join(lines) |
|
|
|
def render(alpha, mu, sigma, n, seed, x_min, x_max, bins, show_hist, overlay_empirical_pdf): |
|
sigma = max(1e-6, sigma) |
|
|
|
if x_min >= x_max: |
|
theo_tmp = theoretical_stats_skewnorm(alpha, mu, sigma) |
|
m, std = theo_tmp["mean"], max(1e-9, theo_tmp["std_dev"]) |
|
x_min, x_max = m - 4*std, m + 4*std |
|
|
|
x = np.linspace(x_min, x_max, 800) |
|
y = skewnorm.pdf(x, alpha, loc=mu, scale=sigma) |
|
|
|
rng = np.random.default_rng(int(seed)) |
|
sample = skewnorm.rvs(alpha, loc=mu, scale=sigma, size=int(n), random_state=rng) |
|
|
|
theo = theoretical_stats_skewnorm(alpha, mu, sigma) |
|
samp = sample_stats(sample) |
|
|
|
fig, ax = plt.subplots(figsize=(8, 4.5), dpi=120) |
|
ax.plot(x, y, label="Theoretical PDF (Skew-Normal)") |
|
if show_hist: |
|
ax.hist(sample, bins=int(bins), density=True, alpha=0.5, label="Sample histogram") |
|
|
|
if overlay_empirical_pdf: |
|
bw = 1.06 * max(1e-8, samp["std_dev"]) * (len(sample) ** (-1/5)) |
|
bw = max(bw, 1e-6) |
|
diffs = (x.reshape(-1, 1) - sample.reshape(1, -1)) / bw |
|
kernel_vals = np.exp(-0.5 * diffs**2) / (sqrt(2*pi) * bw) |
|
kde = np.mean(kernel_vals, axis=1) |
|
ax.plot(x, kde, linestyle="--", label="Empirical density (KDE-like)") |
|
|
|
ax.set_title("Skew-Normal & Normal Explorer (α=0 gives Normal)") |
|
ax.set_xlabel("x") |
|
ax.set_ylabel("density") |
|
ax.legend(loc="best") |
|
ax.grid(True, linestyle="--") |
|
|
|
left = format_stats_block("Theoretical (Skew-Normal)", theo) |
|
right = format_stats_block("Sample (from sliders)", samp) |
|
stats_md = left + "\n\n" + right |
|
|
|
return fig, stats_md |
|
|
|
with gr.Blocks(title="Skew-Normal & Normal Explorer") as demo: |
|
gr.Markdown("# Skew-Normal & Normal Explorer") |
|
gr.Markdown( |
|
"Slide **α (skewness)** to skew left/right. **α=0 → Normal(μ, σ²)**. " |
|
"Adjust μ, σ, n, and window. Compare theoretical vs sample stats." |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
alpha = gr.Slider(-15.0, 15.0, value=0.0, step=0.1, label="Skewness (α)") |
|
mu = gr.Slider(-10.0, 10.0, value=0.0, step=0.1, label="Location (μ)") |
|
sigma = gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="Scale (σ)") |
|
n = gr.Slider(10, 200000, value=2000, step=10, label="Sample size (n)") |
|
seed = gr.Slider(0, 99999, value=42, step=1, label="Random seed") |
|
|
|
with gr.Accordion("Plot window & layers", open=False): |
|
x_min = gr.Number(value=-5.0, label="x min") |
|
x_max = gr.Number(value=5.0, label="x max") |
|
bins = gr.Slider(5, 200, value=40, step=1, label="Histogram bins") |
|
show_hist = gr.Checkbox(value=True, label="Show sample histogram") |
|
overlay_empirical_pdf = gr.Checkbox(value=False, label="Overlay empirical density (KDE-like)") |
|
|
|
with gr.Column(scale=2): |
|
plot = gr.Plot(label="Curve / Histogram") |
|
stats = gr.Markdown(label="Descriptive Statistics") |
|
|
|
inputs = [alpha, mu, sigma, n, seed, x_min, x_max, bins, show_hist, overlay_empirical_pdf] |
|
demo.load(render, inputs=inputs, outputs=[plot, stats]) |
|
for w in inputs: |
|
w.change(render, inputs=inputs, outputs=[plot, stats]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|