Spaces:
Runtime error
Runtime error
| """ | |
| ======================================================= | |
| Comparison of LDA and PCA 2D projection of Iris dataset | |
| ======================================================= | |
| The Iris dataset represents 3 kind of Iris flowers (Setosa, Versicolour | |
| and Virginica) with 4 attributes: sepal length, sepal width, petal length | |
| and petal width. | |
| Principal Component Analysis (PCA) applied to this data identifies the | |
| combination of attributes (principal components, or directions in the | |
| feature space) that account for the most variance in the data. Here we | |
| plot the different samples on the 2 first principal components. | |
| Linear Discriminant Analysis (LDA) tries to identify attributes that | |
| account for the most variance *between classes*. In particular, | |
| LDA, in contrast to PCA, is a supervised method, using known class labels. | |
| """ | |
| import matplotlib.pyplot as plt | |
| import gradio as gr | |
| from sklearn import datasets | |
| from sklearn.decomposition import PCA | |
| from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
| # load data | |
| iris = datasets.load_iris() | |
| X = iris.data | |
| y = iris.target | |
| target_names = iris.target_names | |
| # fit PCA | |
| pca = PCA(n_components=2) | |
| X_r = pca.fit(X).transform(X) | |
| # fit LDA | |
| lda = LinearDiscriminantAnalysis(n_components=2) | |
| X_r2 = lda.fit(X, y).transform(X) | |
| # Percentage of variance explained for each components | |
| print( | |
| "explained variance ratio (first two components): %s" | |
| % str(pca.explained_variance_ratio_) | |
| ) | |
| # save models using skop | |
| def plot_lda_pca(): | |
| fig = plt.figure(1, facecolor="w", figsize=(5,5)) | |
| colors = ["navy", "turquoise", "darkorange"] | |
| lw = 2 | |
| for color, i, target_name in zip(colors, [0, 1, 2], target_names): | |
| plt.scatter( | |
| X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=0.8, lw=lw, label=target_name | |
| ) | |
| plt.legend(loc="best", shadow=False, scatterpoints=1) | |
| plt.title("PCA of IRIS dataset") | |
| for color, i, target_name in zip(colors, [0, 1, 2], target_names): | |
| plt.scatter( | |
| X_r2[y == i, 0], X_r2[y == i, 1], alpha=0.8, color=color, label=target_name | |
| ) | |
| plt.legend(loc="best", shadow=False, scatterpoints=1) | |
| plt.title("LDA of IRIS dataset") | |
| return fig | |
| title = "2-D projection of Iris dataset using LDA and PCA" | |
| with gr.Blocks(title=title) as demo: | |
| gr.Markdown(f"# {title}") | |
| gr.Markdown(" This example shows how one can use Prinicipal Components Analysis (PCA) and Factor Analysis (FA) for model selection by observing the likelihood of a held-out dataset with added noise <br>" | |
| " The number of samples (n_samples) will determine the number of data points to produce. <br>" | |
| " The number of components (n_components) will determine the number of components each method will fit to, and will affect the likelihood of the held-out set. <br>" | |
| " The number of features (n_components) determine the number of features the toy dataset X variable will have. <br>" | |
| " For further details please see the sklearn docs:" | |
| ) | |
| gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_lda.html#sphx-glr-auto-examples-decomposition-plot-pca-vs-lda-py)** <br>") | |
| gr.Markdown(" **Dataset** : A toy dataset with corrupted with homoscedastic noise (noise variance is the same for each feature) or heteroscedastic noise (noise variance is the different for each feature) . <br>") | |
| gr.Markdown(" Different number of features and number of components affect how well the low rank space is recovered. <br>" | |
| " Larger Depth trying to overfit and learn even the finner details of the data.<br>" | |
| ) | |
| # with gr.Row(): | |
| # n_samples = gr.Slider(value=100, minimum=10, maximum=1000, step=10, label="n_samples") | |
| # n_components = gr.Slider(value=2, minimum=1, maximum=20, step=1, label="n_components") | |
| # n_features = gr.Slider(value=5, minimum=5, maximum=25, step=1, label="n_features") | |
| # options for n_components | |
| btn = gr.Button(value="Run") | |
| btn.click(plot_lda_pca, outputs= gr.Plot(label='PCA vs LDA clustering') ) # | |
| demo.launch() |