georgeek commited on
Commit
de2b822
Β·
1 Parent(s): 9953ce9
2Tutor.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import importlib
3
+ import os
4
+ import json
5
+ from datetime import datetime
6
+ from dotenv import load_dotenv
7
+ from openai import OpenAI
8
+
9
+ # Load environment variables from .env file
10
+ load_dotenv()
11
+
12
+ # Set page title and layout
13
+ st.set_page_config(page_title="Data Science Tutor", layout="wide")
14
+
15
+ # Hide Streamlit's default page navigation menu
16
+ st.markdown("""
17
+ <style>
18
+ /* Hide the default "Pages" menu in the top-left sidebar */
19
+ [data-testid="stSidebarNav"] {
20
+ display: none;
21
+ }
22
+ </style>
23
+ """, unsafe_allow_html=True)
24
+
25
+ # Sidebar with image above the CRISP-DM Steps
26
+ st.sidebar.image(
27
+ "data2.jpeg", # Replace with your file path or URL
28
+ use_container_width=True
29
+ )
30
+
31
+ # Sidebar navigation
32
+ st.sidebar.title("CRISP-DM Steps")
33
+ sections = {
34
+ "Main Page": None,
35
+ "1. Business Understanding": "1_Business_understanding",
36
+ "2. Data understanding": "2_Data_understanding",
37
+ "3. Data Preparation": "3_Data_preparation",
38
+ "4. Feature Engineering": "4_Feature_engineering",
39
+ "5. Modeling": "5_Modeling",
40
+ "6. Evaluation": "6_Evaluation",
41
+ "7. Deployment & Testing": "7_Deployment",
42
+ "8. ML, Deep Learning & Transformers": "8_Models"
43
+ }
44
+
45
+ # By default, make the first item (Main Page) selected.
46
+ selected_section = st.sidebar.radio("Select a topic:", list(sections.keys()), index=0)
47
+
48
+ # If the user selects β€œMain Page,” just show your introduction content.
49
+ if sections[selected_section] is None:
50
+ st.title("πŸš€ Welcome to the Data Science Tutor!")
51
+ st.markdown(
52
+ """
53
+ <div style="color: #2FA4E7; margin-top: 1rem;">
54
+ <h2>About This App</h2>
55
+ <p>
56
+ This application is designed to guide you through the CRISP-DM process
57
+ for data science projects. Each section in the sidebar highlights a
58
+ different step in the process, providing structured lessons, best
59
+ practices, and hands-on examples.
60
+ </p>
61
+ <h3>App Sections</h3>
62
+ <ul>
63
+ <li><strong>1. Business Understanding</strong> – Clarify project objectives, requirements, and success criteria.</li>
64
+ <li><strong>2. Data Understanding</strong> – Explore data sources, structures, and initial insights.</li>
65
+ <li><strong>3. Data Preparation</strong> – Clean, integrate, and transform the data for modeling.</li>
66
+ <li><strong>4. Feature Engineering</strong> – Engineer and select relevant features for better models.</li>
67
+ <li><strong>5. Modeling</strong> – Develop, train, and tune predictive models.</li>
68
+ <li><strong>6. Evaluation</strong> – Assess performance metrics and refine models.</li>
69
+ <li><strong>7. Deployment & Testing</strong> – Deploy models into production environments and validate.</li>
70
+ <li><strong>8. ML, Deep Learning & Transformers</strong> – Delve deeper into advanced methods and architectures.</li>
71
+ </ul>
72
+ </div>
73
+ """,
74
+ unsafe_allow_html=True
75
+ )
76
+ else:
77
+ # Otherwise, load the selected module from the pages folder
78
+ module_name = f"pages.{sections[selected_section]}"
79
+ module = importlib.import_module(module_name)
80
+ module.run()
81
+
82
+ # OpenAI API Section
83
+ st.sidebar.title("Ask AI")
84
+ api_key = os.getenv("OPENAI_API_KEY")
85
+ if not api_key:
86
+ api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
87
+
88
+ client = OpenAI()
89
+
90
+ # Create side menus for toggles from the pages/ list
91
+ st.sidebar.title("Focus Areas")
92
+ focus_areas = [
93
+ "Data Cleaning & Wrangling",
94
+ "Feature Engineering & Selection",
95
+ "Model Selection & Tuning",
96
+ "Interpretability & Explainability",
97
+ "Model Deployment & Monitoring"
98
+ ]
99
+ selected_focus_areas = [area for area in focus_areas if st.sidebar.checkbox(area)]
100
+
101
+ # Main chat section
102
+ st.title("Data Science Tutor Chat")
103
+ st.image("https://miro.medium.com/v2/resize:fit:100/format:webp/1*NfE0G4nEj4xX7Z_8dSx83g.png")
104
+
105
+ # Initialize conversation in the session state
106
+ if "messages" not in st.session_state:
107
+ st.session_state["messages"] = [
108
+ {"role": "assistant", "content": "How can I assist you with Data Science today?"}
109
+ ]
110
+
111
+ # Initialize context prompt added state
112
+ if "context_prompt_added" not in st.session_state:
113
+ st.session_state["context_prompt_added"] = False
114
+
115
+ st.write("---")
116
+ st.subheader("Chat")
117
+
118
+ for msg in st.session_state["messages"]:
119
+ st.chat_message(msg["role"]).write(msg["content"])
120
+
121
+ if prompt := st.chat_input("Enter your question here:"):
122
+ # Add context to the messages if toggles are selected
123
+ focus_context = ""
124
+ if selected_focus_areas:
125
+ focus_context = f"Focus on {', '.join(selected_focus_areas)} in your response."
126
+
127
+ # Add context based on the selected section
128
+ section_context = f"The user is currently viewing the {selected_section} section. "
129
+
130
+ # If the context prompt hasn't been added yet, build & inject it once;
131
+ # otherwise, just add the user's raw question.
132
+ if not st.session_state["context_prompt_added"]:
133
+ st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}\n{focus_context}"})
134
+ st.session_state["context_prompt_added"] = True
135
+ else:
136
+ st.session_state["messages"].append({"role": "user", "content": f"{section_context}{prompt}"})
137
+
138
+ # Display the latest user message in the chat
139
+ st.chat_message("user").write(st.session_state["messages"][-1]["content"])
140
+
141
+ # Now call GPT-4 with the entire conversation
142
+ completion = client.chat.completions.create(
143
+ model="gpt-4",
144
+ messages=st.session_state["messages"]
145
+ )
146
+ response_text = completion.choices[0].message.content.strip()
147
+
148
+ st.session_state["messages"].append({"role": "assistant", "content": response_text})
149
+ st.chat_message("assistant").write(response_text)
150
+
151
+ # Log the conversation
152
+ log_entry = {
153
+ "timestamp": datetime.now().isoformat(),
154
+ "user_query": prompt,
155
+ "assistant_response": response_text,
156
+ "focus_areas": selected_focus_areas,
157
+ "selected_section": selected_section
158
+ }
159
+ log_file_path = os.path.join("logs", "conversation_logs.json")
160
+ os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
161
+ if os.path.exists(log_file_path):
162
+ with open(log_file_path, "r") as log_file:
163
+ logs = json.load(log_file)
164
+ else:
165
+ logs = []
166
+ logs.append(log_entry)
167
+ with open(log_file_path, "w") as log_file:
168
+ json.dump(logs, log_file, indent=4)
AmesHousing.tsv.csv ADDED
The diff for this file is too large to render. See raw diff
 
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: LLM Tutor
3
- emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: streamlit
@@ -11,4 +11,102 @@ license: mit
11
  short_description: Learn ML/LLM
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: LLM Tutor
3
+ emoji: πŸ‘¨β€πŸŽ“
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: streamlit
 
11
  short_description: Learn ML/LLM
12
  ---
13
 
14
+ # Data Science Tutor
15
+
16
+ ## Overview
17
+
18
+ The **Data Science Tutor** application is designed to guide users through the CRISP-DM process for data science projects. Each section in the sidebar highlights a different step in the process, providing structured lessons, best practices, and hands-on examples. The application also includes an AI-powered chat feature to assist users with their data science queries.
19
+
20
+ ## Project Structure
21
+
22
+ The project is organized into the following main components:
23
+
24
+ ### 1. Main Application (`Tutor.py`)
25
+
26
+ The main application file that sets up the Streamlit interface, including the sidebar navigation, chat functionality, and dynamic content loading based on the selected section.
27
+
28
+ ### 2. Pages Directory (`pages/`)
29
+
30
+ Contains individual Python scripts for each section of the CRISP-DM process. Each script includes detailed content, explanations, and quizzes related to its respective topic.
31
+
32
+ - `1_Business_understanding.py`: Covers the Business Understanding phase.
33
+ - `2_Data_understanding.py`: Covers the Data Understanding phase.
34
+ - `3_Data_preparation.py`: Covers the Data Preparation phase.
35
+ - `4_Feature_engineering.py`: Covers Feature Engineering.
36
+ - `5_Modeling.py`: Covers the Modeling phase.
37
+ - `6_Evaluation.py`: Covers the Evaluation phase.
38
+ - `7_Deployment.py`: Covers Deployment and Testing.
39
+ - `8_Models.py`: Covers ML, Deep Learning, and Transformers.
40
+
41
+ ### 3. Chat Functionality
42
+
43
+ The application includes an AI-powered chat feature that allows users to ask questions related to data science. The chat model's responses are tailored based on the selected section to provide relevant and focused answers.
44
+
45
+ ### 4. Focus Areas
46
+
47
+ Users can select specific focus areas from the sidebar to further refine the context of their queries. The focus areas include:
48
+ - Data Cleaning & Wrangling
49
+ - Feature Engineering & Selection
50
+ - Model Selection & Tuning
51
+ - Interpretability & Explainability
52
+ - Model Deployment & Monitoring
53
+
54
+ ## Installation
55
+
56
+ To run the application locally, follow these steps:
57
+
58
+ 1. Clone the repository:
59
+ ```sh
60
+ git clone https://github.com/your-username/LLM-Tutor.git
61
+ cd LLM-Tutor
62
+ ```
63
+
64
+ 2. Create a virtual environment and activate it:
65
+ ```sh
66
+ python -m venv venv
67
+ source venv/bin/activate # On Windows, use `venv\Scripts\activate`
68
+ ```
69
+
70
+ 3. Install the required dependencies:
71
+ ```sh
72
+ pip install -r requirements.txt
73
+ ```
74
+
75
+ 4. Set up your OpenAI API key:
76
+ - Create a [.env](http://_vscodecontentref_/1) file in the root directory of the project.
77
+ - Add your OpenAI API key to the [.env](http://_vscodecontentref_/2) file:
78
+ ```
79
+ OPENAI_API_KEY=your_openai_api_key
80
+ ```
81
+
82
+ 5. Run the application:
83
+ ```sh
84
+ streamlit run Tutor.py
85
+ ```
86
+
87
+ ## Usage
88
+
89
+ - **Select a CRISP-DM Step**: Use the sidebar to navigate through different steps of the CRISP-DM process.
90
+ - **Ask AI**: Enter your OpenAI API key in the sidebar and ask questions related to data science.
91
+ - **Focus Areas**: Select specific focus areas to refine the context of your queries.
92
+ - **Interactive Content**: Each section includes detailed explanations, key concepts, and quizzes to test your understanding.
93
+
94
+ ## License
95
+
96
+ This project is licensed under the MIT License. See the [LICENSE](http://_vscodecontentref_/3) file for more details.
97
+
98
+ ## Contributing
99
+
100
+ Contributions are welcome! Please read the CONTRIBUTING file for guidelines on how to contribute to this project.
101
+
102
+ ## Acknowledgements
103
+
104
+ - [Streamlit](https://streamlit.io/)
105
+ - [OpenAI](https://www.openai.com/)
106
+ - [CRISP-DM](https://www.sv-europe.com/crisp-dm-methodology/)
107
+
108
+ ---
109
+
110
+ pinned: false
111
+ license: mit
112
+ short_description: Learn ML/LLM
data2.jpeg ADDED
logs/conversation_logs.json ADDED
The diff for this file is too large to render. See raw diff
 
pages/1_Business_understanding.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("Business Understanding")
5
+
6
+ st.write("## Overview")
7
+ st.write("""
8
+ The Business Understanding phase is the first step in the CRISP-DM process. It involves understanding the project objectives and requirements from a business perspective, and then converting this knowledge into a data mining problem definition and a preliminary plan.
9
+ """)
10
+
11
+ st.write("## Key Concepts & Explanations")
12
+ st.markdown("""
13
+ - **Business Goals**: Clearly define the problem (e.g., Predict customer churn).
14
+ - **Constraints**: Time, budget, available data, legal considerations.
15
+ - **Success Metrics**: Accuracy, revenue impact, efficiency gains.
16
+ """)
17
+
18
+ st.write("## Introduction")
19
+ st.write("""
20
+ Business Understanding is crucial for the success of any data science project. It ensures that the project is aligned with the business objectives and that the results will be actionable and valuable to the organization.
21
+ """)
22
+
23
+ st.header("Objectives")
24
+ st.write("""
25
+ - **Understand the Business Objectives**: Gain a clear understanding of the business goals and how they translate into data mining goals.
26
+ - **Assess the Situation**: Evaluate the current situation, including resources, constraints, and risks.
27
+ - **Determine Data Mining Goals**: Define specific data mining goals that align with the business objectives.
28
+ - **Produce a Project Plan**: Develop a detailed project plan that outlines the steps, resources, and timeline for the project.
29
+ """)
30
+
31
+ st.header("Key Activities")
32
+ st.write("""
33
+ - **Identify Business Objectives and Constraints**: Work with stakeholders to identify the key business objectives and any constraints that may impact the project.
34
+ - **Define Success Criteria**: Establish clear criteria for success, including key performance indicators (KPIs) and metrics.
35
+ - **Develop a Project Plan**: Create a comprehensive project plan that includes a timeline, resource allocation, and risk management strategy.
36
+ """)
37
+
38
+ st.write("## Detailed Steps")
39
+ st.write("""
40
+ 1. **Determine Business Objectives**:
41
+ - Interview stakeholders to understand their goals and expectations.
42
+ - Identify the key business questions that need to be answered.
43
+ 2. **Assess the Situation**:
44
+ - Conduct a SWOT analysis (Strengths, Weaknesses, Opportunities, Threats).
45
+ - Review existing resources, including data, tools, and expertise.
46
+ 3. **Define Data Mining Goals**:
47
+ - Translate business objectives into specific data mining goals.
48
+ - Ensure that the goals are measurable and achievable.
49
+ 4. **Produce a Project Plan**:
50
+ - Outline the project phases, tasks, and deliverables.
51
+ - Allocate resources and assign responsibilities.
52
+ - Develop a risk management plan to address potential challenges.
53
+ """)
54
+
55
+ st.write("## Quiz: Conceptual Questions")
56
+ q1 = st.radio("What is the main purpose of the Business Understanding phase?", ["Define project goals", "Collect data", "Build models"])
57
+ if q1 == "Define project goals":
58
+ st.success("βœ… Correct!")
59
+ else:
60
+ st.error("❌ Incorrect. The main purpose is to define project goals.")
61
+
62
+ st.write("## Learning Resources")
63
+ st.markdown("""
64
+ - πŸ“˜ [CRISP-DM Guide](https://www.sv-europe.com/crisp-dm-methodology/)
65
+ - πŸŽ“ [Understanding Business Objectives](https://www.datasciencecentral.com/profiles/blogs/understanding-business-objectives-in-data-science)
66
+ - πŸ”¬ [Business Understanding in Data Science](https://towardsdatascience.com/business-understanding-in-data-science-1a1d5e8b1c3d)
67
+ """)
pages/2_Data_understanding.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("Data Understanding")
5
+
6
+ st.write("## Overview")
7
+ st.write("""
8
+ Data Understanding is the second phase of the CRISP-DM process. It involves collecting initial data, describing the data, exploring the data, and verifying data quality.
9
+ """)
10
+
11
+ st.write("## Key Concepts & Explanations")
12
+ st.markdown("""
13
+ - **Data Collection**: Gathering data from various sources.
14
+ - **Data Description**: Summarizing the main characteristics of the data.
15
+ - **Data Exploration**: Using statistical and visualization techniques to understand the data.
16
+ - **Data Quality Verification**: Ensuring the data is accurate, complete, and reliable.
17
+ """)
18
+
19
+ st.write("## Introduction")
20
+ st.write("""
21
+ The Data Understanding phase is crucial for identifying potential issues with the data and gaining insights that will inform the subsequent phases of the CRISP-DM process.
22
+ """)
23
+
24
+ st.header("Objectives")
25
+ st.write("""
26
+ - **Collect Initial Data**: Gather data from various sources to get a comprehensive dataset.
27
+ - **Describe the Data**: Summarize the main characteristics of the data, including its structure and content.
28
+ - **Explore the Data**: Use statistical and visualization techniques to identify patterns, trends, and anomalies.
29
+ - **Verify Data Quality**: Assess the quality of the data to ensure it is suitable for analysis.
30
+ """)
31
+
32
+ st.header("Key Activities")
33
+ st.write("""
34
+ - **Data Collection**: Gather data from internal and external sources.
35
+ - **Data Description**: Generate summary statistics and visualizations to describe the data.
36
+ - **Data Exploration**: Perform exploratory data analysis (EDA) to uncover patterns and relationships.
37
+ - **Data Quality Verification**: Check for missing values, outliers, and inconsistencies in the data.
38
+ """)
39
+
40
+ st.write("## Detailed Steps")
41
+ st.write("""
42
+ 1. **Collect Initial Data**:
43
+ - Identify relevant data sources.
44
+ - Extract data from various sources and consolidate it into a single dataset.
45
+ 2. **Describe the Data**:
46
+ - Generate summary statistics (e.g., mean, median, standard deviation).
47
+ - Create visualizations (e.g., histograms, box plots) to describe the data distribution.
48
+ 3. **Explore the Data**:
49
+ - Perform exploratory data analysis (EDA) to identify patterns, trends, and anomalies.
50
+ - Use visualization tools (e.g., scatter plots, heatmaps) to explore relationships between variables.
51
+ 4. **Verify Data Quality**:
52
+ - Check for missing values and handle them appropriately.
53
+ - Identify and address outliers and inconsistencies in the data.
54
+ - Assess the overall quality of the data to ensure it is suitable for analysis.
55
+ """)
56
+
57
+ st.write("## Quiz: Conceptual Questions")
58
+ q1 = st.radio("What is the main purpose of the Data Understanding phase?", ["Collect data", "Describe data", "Explore data", "All of the above"])
59
+ if q1 == "All of the above":
60
+ st.success("βœ… Correct!")
61
+ else:
62
+ st.error("❌ Incorrect. The main purpose is to collect, describe, and explore data.")
63
+
64
+ st.write("## Learning Resources")
65
+ st.markdown("""
66
+ - πŸ“˜ [CRISP-DM Guide](https://www.sv-europe.com/crisp-dm-methodology/)
67
+ - πŸŽ“ [Data Understanding in Data Science](https://towardsdatascience.com/data-understanding-in-data-science-1a1d5e8b1c3d)
68
+ - πŸ”¬ [Exploratory Data Analysis (EDA)](https://www.analyticsvidhya.com/blog/2021/06/exploratory-data-analysis-eda-a-step-by-step-guide/)
69
+ """)
70
+
pages/3_Data_preparation.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("Data Preparation")
5
+ st.header("Introduction")
6
+ st.write("""
7
+ Data Preparation involves cleaning and transforming raw data into a format suitable for analysis.
8
+ """)
9
+ st.header("Objectives")
10
+ st.write("""
11
+ - Clean the data.
12
+ - Transform the data.
13
+ - Integrate data from multiple sources.
14
+ """)
15
+ st.header("Key Activities")
16
+ st.write("""
17
+ - Data cleaning.
18
+ - Data transformation.
19
+ - Data integration.
20
+ """)
21
+ import pandas as pd
22
+
23
+ st.title("3. Data Preparation")
24
+ st.write("## Overview")
25
+ st.write("Cleaning and transforming data for better model performance.")
26
+
27
+ st.write("## Key Concepts & Explanations")
28
+ st.markdown("""
29
+ - red[**Handling Missing Values**]: Fill with mean/median or drop rows.
30
+ - **Feature Engineering**: Creating new features for better modeling.
31
+ - **Scaling**: Normalization and standardization for consistency.
32
+ """)
33
+
34
+ file = st.file_uploader("Upload a dataset", type=["csv"])
35
+ if file:
36
+ df = pd.read_csv(file)
37
+ option = st.radio("Choose a method to handle missing values", ["Fill with Mean", "Fill with Median", "Drop Rows"])
38
+ if option == "Fill with Mean":
39
+ df.fillna(df.mean(), inplace=True)
40
+ elif option == "Fill with Median":
41
+ df.fillna(df.median(), inplace=True)
42
+ elif option == "Drop Rows":
43
+ df.dropna(inplace=True)
44
+
45
+ st.write("## Quiz: Conceptual Questions")
46
+ q1 = st.radio("What is feature engineering?", ["Data visualization", "Creating new features", "Data storage"])
47
+ if q1 == "Creating new features":
48
+ st.success("βœ… Correct!")
49
+ else:
50
+ st.error("❌ Incorrect.")
51
+
52
+ st.write("## Code-Based Quiz")
53
+ code_input = st.text_area("Write a function to normalize a column", value="def normalize(col):\n return (col - col.min()) / (col.max() - col.min())")
54
+ if "col.max() - col.min()" in code_input:
55
+ st.success("βœ… Correct!")
56
+ else:
57
+ st.error("❌ Try again.")
58
+
59
+ st.write("## Learning Resources")
60
+ st.markdown("""
61
+ - πŸ“ [Data Cleaning with Pandas](https://realpython.com/pandas-data-cleaning/)
62
+ """)
pages/4_Feature_engineering.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn.preprocessing import MinMaxScaler
3
+ import pandas as pd
4
+
5
+ def run():
6
+ st.title("Feature Engineering")
7
+
8
+ st.write("## Overview")
9
+ st.write("""
10
+ Feature Engineering is the process of using domain knowledge to create features (input variables) that make machine learning algorithms work better. It involves transforming raw data into meaningful features that improve the performance of machine learning models.
11
+ """)
12
+
13
+ st.write("## Key Concepts & Explanations")
14
+ st.markdown("""
15
+ - **Feature Creation**: Generating new features from existing data.
16
+ - **Feature Transformation**: Modifying features to make them more suitable for modeling.
17
+ - **Feature Selection**: Identifying the most relevant features for the model.
18
+ - **Feature Scaling**: Normalizing or standardizing features to ensure they are on a similar scale.
19
+ """)
20
+
21
+ st.write("## Introduction")
22
+ st.write("""
23
+ Feature Engineering is a crucial step in the data science process. It can significantly impact the performance of machine learning models by providing them with the right input variables. Effective feature engineering requires a deep understanding of the data and the problem domain.
24
+ """)
25
+
26
+ st.header("Objectives")
27
+ st.write("""
28
+ - **Create New Features**: Generate new features that capture important information from the data.
29
+ - **Transform Existing Features**: Modify existing features to make them more suitable for modeling.
30
+ - **Select Relevant Features**: Identify and select the most relevant features for the model.
31
+ - **Scale Features**: Normalize or standardize features to ensure they are on a similar scale.
32
+ """)
33
+
34
+ st.header("Key Activities")
35
+ st.write("""
36
+ - **Feature Creation**: Generate new features from existing data using domain knowledge.
37
+ - **Feature Transformation**: Apply mathematical transformations to features to improve their suitability for modeling.
38
+ - **Feature Selection**: Use statistical techniques to identify the most relevant features for the model.
39
+ - **Feature Scaling**: Normalize or standardize features to ensure they are on a similar scale.
40
+ """)
41
+
42
+ st.write("## Detailed Steps")
43
+ st.write("""
44
+ 1. **Feature Creation**:
45
+ - Generate new features from existing data using domain knowledge.
46
+ - Combine multiple features to create new ones (e.g., ratios, differences).
47
+ 2. **Feature Transformation**:
48
+ - Apply mathematical transformations (e.g., log, square root) to features.
49
+ - Encode categorical variables using techniques like one-hot encoding or label encoding.
50
+ 3. **Feature Selection**:
51
+ - Use statistical techniques (e.g., correlation, mutual information) to identify relevant features.
52
+ - Apply dimensionality reduction techniques (e.g., PCA) to reduce the number of features.
53
+ 4. **Feature Scaling**:
54
+ - Normalize features to a range (e.g., 0 to 1) using MinMaxScaler.
55
+ - Standardize features to have a mean of 0 and a standard deviation of 1 using StandardScaler.
56
+ """)
57
+
58
+ st.write("## Quiz: Conceptual Questions")
59
+ q1 = st.radio("What is the main purpose of feature engineering?", ["Improve model accuracy", "Reduce dataset size", "Make data harder to interpret"])
60
+ if q1 == "Improve model accuracy":
61
+ st.success("βœ… Correct!")
62
+ else:
63
+ st.error("❌ Incorrect. The main purpose is to improve model accuracy.")
64
+
65
+ st.write("## Code-Based Quiz")
66
+ code_input = st.text_area("Write a function to normalize a dataset using MinMaxScaler",
67
+ value="from sklearn.preprocessing import MinMaxScaler\n\ndef normalize_data(df):\n scaler = MinMaxScaler()\n return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)")
68
+ if "MinMaxScaler" in code_input:
69
+ st.success("βœ… Correct!")
70
+ else:
71
+ st.error("❌ Try again.")
72
+
73
+ st.write("## Learning Resources")
74
+ st.markdown("""
75
+ - πŸ“˜ [Feature Engineering for Machine Learning](https://towardsdatascience.com/feature-engineering-for-machine-learning-3a5e293a5114)
76
+ - πŸŽ“ [Scikit-learn Feature Engineering Guide](https://scikit-learn.org/stable/modules/feature_extraction.html)
77
+ - πŸ”¬ [Feature Engineering Techniques](https://www.analyticsvidhya.com/blog/2021/10/a-comprehensive-guide-on-feature-engineering/)
78
+ """)
pages/5_Modeling.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ def run():
8
+ st.title("4. Modeling")
9
+ st.write("## Overview")
10
+ st.write("Building and training machine learning models to make predictions.")
11
+
12
+ st.write("## Key Concepts & Explanations")
13
+ st.markdown("""
14
+ - **Model Selection**: Choose the model based on the problem (e.g., Classification, Regression).
15
+ - **Training Data**: The subset used to train the model.
16
+ - **Test Data**: The subset used to evaluate the model’s performance.
17
+ """)
18
+
19
+ file = st.file_uploader("Upload a dataset for modeling", type=["csv"])
20
+ if file:
21
+ df = pd.read_csv(file)
22
+ target = st.selectbox("Select the target variable", df.columns)
23
+ features = st.multiselect("Select the feature columns", df.columns)
24
+
25
+ if target and features:
26
+ X = df[features]
27
+ y = df[target]
28
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
29
+
30
+ model = LogisticRegression()
31
+ model.fit(X_train, y_train)
32
+ y_pred = model.predict(X_test)
33
+ accuracy = accuracy_score(y_test, y_pred)
34
+
35
+ st.write(f"Accuracy: {accuracy * 100:.2f}%")
36
+
37
+ st.write("## Quiz: Conceptual Questions")
38
+ q1 = st.radio("What is overfitting?", ["Model too simple", "Model too complex", "Data too large"])
39
+ if q1 == "Model too complex":
40
+ st.success("βœ… Correct!")
41
+ else:
42
+ st.error("❌ Incorrect.")
43
+
44
+ st.write("## Code-Based Quiz")
45
+ code_input = st.text_area("Write a function to split data into train and test sets", value="def split_data(df, target):\n X = df.drop(columns=[target])\n y = df[target]\n return train_test_split(X, y, test_size=0.2, random_state=42)")
46
+ if "train_test_split" in code_input:
47
+ st.success("βœ… Correct!")
48
+ else:
49
+ st.error("❌ Try again.")
50
+
51
+ st.write("## Learning Resources")
52
+ st.markdown("""
53
+ - πŸ“– [Introduction to Machine Learning with Python](https://www.oreilly.com/library/view/introduction-to-machine/9781449369880/)
54
+ """)
pages/6_Evaluation.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def run():
2
+ import streamlit as st
3
+ from sklearn.metrics import confusion_matrix, classification_report
4
+ import numpy as np
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ st.title("5. Evaluation")
8
+ st.header("Introduction")
9
+ st.write("""
10
+ Model Evaluation is the process of assessing the performance of a machine learning model using various metrics.
11
+ """)
12
+ st.header("Objectives")
13
+ st.write("""
14
+ - Assess model performance.
15
+ - Compare different models.
16
+ - Select the best model.
17
+ """)
18
+ st.header("Key Activities")
19
+ st.write("""
20
+ - Model validation.
21
+ - Performance metrics calculation.
22
+ - Model comparison.
23
+ """)
24
+
25
+ st.write("## Overview")
26
+ st.write("Assessing model performance using appropriate evaluation metrics.")
27
+
28
+ st.write("## Key Concepts & Explanations")
29
+ st.markdown("### Confusion Matrix")
30
+ st.write("""
31
+ A confusion matrix is a table used to evaluate the performance of a classification model. It shows the number of true positives, true negatives, false positives, and false negatives. This helps in understanding how well the model is performing in terms of correctly and incorrectly classified instances.
32
+ """)
33
+
34
+ st.markdown("### Precision, Recall, F1-Score")
35
+ st.write("""
36
+ - **Precision**: This metric measures the accuracy of the positive predictions. It is the ratio of true positive predictions to the total predicted positives (true positives + false positives). High precision indicates a low false positive rate.
37
+ - **Recall**: Also known as sensitivity, this metric measures the ability of the model to identify all relevant instances. It is the ratio of true positive predictions to the total actual positives (true positives + false negatives). High recall indicates a low false negative rate.
38
+ - **F1-Score**: This is the harmonic mean of precision and recall. It provides a single metric that balances both precision and recall, especially useful when you need to balance the two.
39
+ """)
40
+
41
+ st.markdown("### ROC-AUC")
42
+ st.write("""
43
+ - **ROC (Receiver Operating Characteristic) Curve**: This is a graphical representation of the model's performance across different threshold values. It plots the true positive rate (recall) against the false positive rate.
44
+ - **AUC (Area Under the Curve)**: This metric summarizes the ROC curve into a single value. It represents the likelihood that the model will rank a randomly chosen positive instance higher than a randomly chosen negative one. An AUC of 1 indicates a perfect model, while an AUC of 0.5 indicates a model with no discriminative power.
45
+ """)
46
+ q1 = st.radio("Which metric is used for evaluating a classification model?", ["Accuracy", "Mean Squared Error", "All of the above"])
47
+ if q1 == "All of the above":
48
+ st.success("βœ… Correct!")
49
+ else:
50
+ st.error("❌ Incorrect.")
51
+
52
+ st.write("## Code-Based Quiz")
53
+ code_input = st.text_area("Write a function to calculate the confusion matrix", value="def confusion_mat(y_true, y_pred):\n return confusion_matrix(y_true, y_pred)")
54
+ if "confusion_matrix" in code_input:
55
+ st.success("βœ… Correct!")
56
+ else:
57
+ st.error("❌ Try again.")
58
+
59
+ st.write("## Learning Resources")
60
+ st.markdown("""
61
+ - πŸŽ“ [Evaluation Metrics in Machine Learning](https://scikit-learn.org/stable/modules/model_evaluation.html)
62
+ """)
pages/7_Deployment.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("6. Deployment & Testing")
5
+ st.header("Introduction")
6
+ st.write("""
7
+ Model Deployment is the process of integrating a machine learning model into a production environment where it can make predictions on new data.
8
+ """)
9
+ st.header("Objectives")
10
+ st.write("""
11
+ - Integrate the model into production.
12
+ - Monitor model performance.
13
+ - Update the model as needed.
14
+ """)
15
+
16
+ st.write("## Overview")
17
+ st.write("Deploying the model and testing its real-world performance.")
18
+
19
+ st.write("## Key Concepts & Explanations")
20
+ st.markdown("""
21
+ - **Deployment**: Making the model available for use (e.g., via an API).
22
+ - **Testing**: Ensuring the model works in production environments.
23
+ - **Model Monitoring**: Continuously tracking model performance in real-time.
24
+ """)
25
+
26
+ st.write("## Quiz: Conceptual Questions")
27
+ q1 = st.radio("Which of the following is part of deployment?", ["Model Training", "Model Versioning", "Model Testing"])
28
+ if q1 == "Model Versioning":
29
+ st.success("βœ… Correct!")
30
+ else:
31
+ st.error("❌ Incorrect.")
32
+
33
+ st.write("## Code-Based Quiz")
34
+ code_input = st.text_area("Write code to save a model using joblib", value="import joblib\njoblib.dump(model, 'model.pkl')")
35
+ if "joblib.dump" in code_input:
36
+ st.success("βœ… Correct!")
37
+ else:
38
+ st.error("❌ Try again.")
39
+
40
+ st.write("## Learning Resources")
41
+ st.markdown("""
42
+ - πŸ“˜ [Machine Learning Model Deployment](https://towardsdatascience.com/deploying-machine-learning-models-using-flask-285dbddedbfa)
43
+ """)
pages/8_Models.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from sklearn.linear_model import LogisticRegression
3
+ import torch
4
+ from transformers import pipeline
5
+
6
+
7
+ def run():
8
+ st.title("7. Machine Learning, Deep Learning & Transformers")
9
+ st.write("## Overview")
10
+ st.write("Learn about different machine learning models, deep learning models, and transformers.")
11
+
12
+ st.write("## Key Concepts & Explanations")
13
+ st.markdown("""
14
+ - **Machine Learning Models**: Supervised, unsupervised, and reinforcement learning.
15
+ - **Deep Learning**: Neural networks with many layers, used for complex tasks like image recognition.
16
+ - **Transformers**: A powerful model architecture used in natural language processing (NLP) tasks.
17
+ """)
18
+
19
+ # ML Example: Logistic Regression
20
+ st.write("### Example: Logistic Regression")
21
+ st.write("We'll use logistic regression to classify some sample data.")
22
+ model = LogisticRegression()
23
+ # (Insert a sample dataset and training procedure here)
24
+
25
+ # Deep Learning Example: Using Pretrained Transformers
26
+ st.write("### Example: Transformer Model")
27
+ nlp = pipeline("sentiment-analysis")
28
+ st.write(nlp("I love machine learning!"))
29
+
30
+ st.write("## Quiz: Conceptual Questions")
31
+ q1 = st.radio("What is a transformer model used for?", ["Text classification", "Image processing", "Time series analysis"])
32
+ if q1 == "Text classification":
33
+ st.success("βœ… Correct!")
34
+ else:
35
+ st.error("❌ Incorrect.")
36
+
37
+ st.write("## Code-Based Quiz")
38
+ code_input = st.text_area("Write code to create a simple neural network using PyTorch", value="import torch\nimport torch.nn as nn\nclass SimpleNN(nn.Module):\n def __init__(self):\n super(SimpleNN, self).__init__()")
39
+ if "super(SimpleNN" in code_input:
40
+ st.success("βœ… Correct!")
41
+ else:
42
+ st.error("❌ Try again.")
43
+
44
+ st.write("## Learning Resources")
45
+ st.markdown("""
46
+ - πŸ“š [Deep Learning with PyTorch](https://pytorch.org/tutorials/)
47
+ - 🌐 [Transformers Library Documentation](https://huggingface.co/docs/transformers/)
48
+ """)
pages/ML_Algorithms/decision_trees.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("TBD")
pages/ML_Algorithms/logistic_regression.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("TBD")
pages/ML_Algorithms/neural_networks.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("TBD")
pages/ML_Algorithms/random_forest.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("TBD")
pages/ML_Algorithms/svm.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def run():
4
+ st.title("TBD")
pages/__pycache__/1_Business_understanding.cpython-311.pyc ADDED
Binary file (4.88 kB). View file
 
pages/__pycache__/2_Data_understanding.cpython-311.pyc ADDED
Binary file (3.75 kB). View file
 
pages/__pycache__/3_Algorithms.cpython-311.pyc ADDED
Binary file (1.3 kB). View file
 
pages/__pycache__/3_Data_preparation.cpython-311.pyc ADDED
Binary file (3.82 kB). View file
 
pages/__pycache__/4_Data_ingestion.cpython-311.pyc ADDED
Binary file (1.17 kB). View file
 
pages/__pycache__/4_Feature_engineering.cpython-311.pyc ADDED
Binary file (5.75 kB). View file
 
pages/__pycache__/5_Data_preparation.cpython-311.pyc ADDED
Binary file (1.13 kB). View file
 
pages/__pycache__/5_Modeling.cpython-311.pyc ADDED
Binary file (3.67 kB). View file
 
pages/__pycache__/6_Evaluation.cpython-311.pyc ADDED
Binary file (5.02 kB). View file
 
pages/__pycache__/7_Deployment.cpython-311.pyc ADDED
Binary file (2.77 kB). View file
 
pages/__pycache__/8_Models.cpython-311.pyc ADDED
Binary file (3.18 kB). View file
 
sidebar.png ADDED