{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.11","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":2020493,"sourceType":"datasetVersion","datasetId":1209379},{"sourceId":72929774,"sourceType":"kernelVersion"},{"sourceId":115003,"sourceType":"modelInstanceVersion","isSourceIdPinned":true,"modelInstanceId":96587,"modelId":120770}],"dockerImageVersionId":31012,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:51.929874Z","iopub.execute_input":"2025-05-20T14:53:51.930385Z","iopub.status.idle":"2025-05-20T14:53:51.949747Z","shell.execute_reply.started":"2025-05-20T14:53:51.930349Z","shell.execute_reply":"2025-05-20T14:53:51.948340Z"}},"outputs":[{"name":"stdout","text":"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\n","output_type":"stream"}],"execution_count":7},{"cell_type":"code","source":"import pandas as pd\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.metrics.pairwise import cosine_similarity","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:51.952143Z","iopub.execute_input":"2025-05-20T14:53:51.952474Z","iopub.status.idle":"2025-05-20T14:53:53.869665Z","shell.execute_reply.started":"2025-05-20T14:53:51.952449Z","shell.execute_reply":"2025-05-20T14:53:53.868593Z"}},"outputs":[],"execution_count":8},{"cell_type":"code","source":"df = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:53.870648Z","iopub.execute_input":"2025-05-20T14:53:53.871163Z","iopub.status.idle":"2025-05-20T14:53:53.945876Z","shell.execute_reply.started":"2025-05-20T14:53:53.871138Z","shell.execute_reply":"2025-05-20T14:53:53.945061Z"}},"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":" product_name \\\n0 The Ordinary Natural Moisturising Factors + HA... \n1 CeraVe Facial Moisturising Lotion SPF 25 52ml \n2 The Ordinary Hyaluronic Acid 2% + B5 Hydration... \n3 AMELIORATE Transforming Body Lotion 200ml \n4 CeraVe Moisturising Cream 454g \n... ... \n1133 Elemis Life Elixirs Embrace Bath and Shower El... \n1134 Love Boo Splendidly Soothing Bath Soak (250ml) \n1135 Elemis Life Elixirs Fortitude Bath and Shower ... \n1136 Connock London Kukui Oil Soothing Bath & Showe... \n1137 Weleda Baby Calendula Cream Bath (200ml) \n\n product_url product_type \\\n0 https://www.lookfantastic.com/the-ordinary-nat... Moisturiser \n1 https://www.lookfantastic.com/cerave-facial-mo... Moisturiser \n2 https://www.lookfantastic.com/the-ordinary-hya... Moisturiser \n3 https://www.lookfantastic.com/ameliorate-trans... Moisturiser \n4 https://www.lookfantastic.com/cerave-moisturis... Moisturiser \n... ... ... \n1133 https://www.lookfantastic.com/elemis-life-elix... Bath Oil \n1134 https://www.lookfantastic.com/love-boo-splendi... Bath Oil \n1135 https://www.lookfantastic.com/elemis-life-elix... Bath Oil \n1136 https://www.lookfantastic.com/connock-london-k... Bath Oil \n1137 https://www.lookfantastic.com/weleda-baby-cale... Bath Oil \n\n clean_ingreds price \n0 ['capric triglyceride', 'cetyl alcohol', 'prop... £5.20 \n1 ['homosalate', 'glycerin', 'octocrylene', 'eth... £13.00 \n2 ['sodium hyaluronate', 'sodium hyaluronate', '... £6.20 \n3 ['ammonium lactate', 'c12-15', 'glycerin', 'pr... £22.50 \n4 ['glycerin', 'cetearyl alcohol', 'capric trigl... £16.00 \n... ... ... \n1133 ['prunus amygdalus dulcis', 'tipa-laureth sulf... £55.00 \n1134 ['sodium lauroyl', 'sodium cocoamphoacetate', ... £10.99 \n1135 ['prunus amygdalus dulcis', 'tipa-laureth sulf... £55.00 \n1136 ['capric triglyceride', 'peg-40 sorbitan perol... £36.00 \n1137 ['prunus amygdalus dulcis', 'sesamium indicum ... £13.95 \n\n[1138 rows x 5 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
product_nameproduct_urlproduct_typeclean_ingredsprice
0The Ordinary Natural Moisturising Factors + HA...https://www.lookfantastic.com/the-ordinary-nat...Moisturiser['capric triglyceride', 'cetyl alcohol', 'prop...£5.20
1CeraVe Facial Moisturising Lotion SPF 25 52mlhttps://www.lookfantastic.com/cerave-facial-mo...Moisturiser['homosalate', 'glycerin', 'octocrylene', 'eth...£13.00
2The Ordinary Hyaluronic Acid 2% + B5 Hydration...https://www.lookfantastic.com/the-ordinary-hya...Moisturiser['sodium hyaluronate', 'sodium hyaluronate', '...£6.20
3AMELIORATE Transforming Body Lotion 200mlhttps://www.lookfantastic.com/ameliorate-trans...Moisturiser['ammonium lactate', 'c12-15', 'glycerin', 'pr...£22.50
4CeraVe Moisturising Cream 454ghttps://www.lookfantastic.com/cerave-moisturis...Moisturiser['glycerin', 'cetearyl alcohol', 'capric trigl...£16.00
..................
1133Elemis Life Elixirs Embrace Bath and Shower El...https://www.lookfantastic.com/elemis-life-elix...Bath Oil['prunus amygdalus dulcis', 'tipa-laureth sulf...£55.00
1134Love Boo Splendidly Soothing Bath Soak (250ml)https://www.lookfantastic.com/love-boo-splendi...Bath Oil['sodium lauroyl', 'sodium cocoamphoacetate', ...£10.99
1135Elemis Life Elixirs Fortitude Bath and Shower ...https://www.lookfantastic.com/elemis-life-elix...Bath Oil['prunus amygdalus dulcis', 'tipa-laureth sulf...£55.00
1136Connock London Kukui Oil Soothing Bath & Showe...https://www.lookfantastic.com/connock-london-k...Bath Oil['capric triglyceride', 'peg-40 sorbitan perol...£36.00
1137Weleda Baby Calendula Cream Bath (200ml)https://www.lookfantastic.com/weleda-baby-cale...Bath Oil['prunus amygdalus dulcis', 'sesamium indicum ...£13.95
\n

1138 rows × 5 columns

\n
"},"metadata":{}}],"execution_count":9},{"cell_type":"code","source":"print(\"Available columns:\", df.columns.tolist())\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:53.947489Z","iopub.execute_input":"2025-05-20T14:53:53.947808Z","iopub.status.idle":"2025-05-20T14:53:53.952728Z","shell.execute_reply.started":"2025-05-20T14:53:53.947780Z","shell.execute_reply":"2025-05-20T14:53:53.951707Z"}},"outputs":[{"name":"stdout","text":"Available columns: ['product_name', 'product_url', 'product_type', 'clean_ingreds', 'price']\n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"df['combined_features'] = (\n df['product_type'].fillna('') + ' ' +\n df['clean_ingreds'].fillna('')\n)\ndf","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:53.954980Z","iopub.execute_input":"2025-05-20T14:53:53.955280Z","iopub.status.idle":"2025-05-20T14:53:53.983348Z","shell.execute_reply.started":"2025-05-20T14:53:53.955258Z","shell.execute_reply":"2025-05-20T14:53:53.982467Z"}},"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":" product_name \\\n0 The Ordinary Natural Moisturising Factors + HA... \n1 CeraVe Facial Moisturising Lotion SPF 25 52ml \n2 The Ordinary Hyaluronic Acid 2% + B5 Hydration... \n3 AMELIORATE Transforming Body Lotion 200ml \n4 CeraVe Moisturising Cream 454g \n... ... \n1133 Elemis Life Elixirs Embrace Bath and Shower El... \n1134 Love Boo Splendidly Soothing Bath Soak (250ml) \n1135 Elemis Life Elixirs Fortitude Bath and Shower ... \n1136 Connock London Kukui Oil Soothing Bath & Showe... \n1137 Weleda Baby Calendula Cream Bath (200ml) \n\n product_url product_type \\\n0 https://www.lookfantastic.com/the-ordinary-nat... Moisturiser \n1 https://www.lookfantastic.com/cerave-facial-mo... Moisturiser \n2 https://www.lookfantastic.com/the-ordinary-hya... Moisturiser \n3 https://www.lookfantastic.com/ameliorate-trans... Moisturiser \n4 https://www.lookfantastic.com/cerave-moisturis... Moisturiser \n... ... ... \n1133 https://www.lookfantastic.com/elemis-life-elix... Bath Oil \n1134 https://www.lookfantastic.com/love-boo-splendi... Bath Oil \n1135 https://www.lookfantastic.com/elemis-life-elix... Bath Oil \n1136 https://www.lookfantastic.com/connock-london-k... Bath Oil \n1137 https://www.lookfantastic.com/weleda-baby-cale... Bath Oil \n\n clean_ingreds price \\\n0 ['capric triglyceride', 'cetyl alcohol', 'prop... £5.20 \n1 ['homosalate', 'glycerin', 'octocrylene', 'eth... £13.00 \n2 ['sodium hyaluronate', 'sodium hyaluronate', '... £6.20 \n3 ['ammonium lactate', 'c12-15', 'glycerin', 'pr... £22.50 \n4 ['glycerin', 'cetearyl alcohol', 'capric trigl... £16.00 \n... ... ... \n1133 ['prunus amygdalus dulcis', 'tipa-laureth sulf... £55.00 \n1134 ['sodium lauroyl', 'sodium cocoamphoacetate', ... £10.99 \n1135 ['prunus amygdalus dulcis', 'tipa-laureth sulf... £55.00 \n1136 ['capric triglyceride', 'peg-40 sorbitan perol... £36.00 \n1137 ['prunus amygdalus dulcis', 'sesamium indicum ... £13.95 \n\n combined_features \n0 Moisturiser ['capric triglyceride', 'cetyl alc... \n1 Moisturiser ['homosalate', 'glycerin', 'octocr... \n2 Moisturiser ['sodium hyaluronate', 'sodium hya... \n3 Moisturiser ['ammonium lactate', 'c12-15', 'gl... \n4 Moisturiser ['glycerin', 'cetearyl alcohol', '... \n... ... \n1133 Bath Oil ['prunus amygdalus dulcis', 'tipa-lau... \n1134 Bath Oil ['sodium lauroyl', 'sodium cocoamphoa... \n1135 Bath Oil ['prunus amygdalus dulcis', 'tipa-lau... \n1136 Bath Oil ['capric triglyceride', 'peg-40 sorbi... \n1137 Bath Oil ['prunus amygdalus dulcis', 'sesamium... \n\n[1138 rows x 6 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
product_nameproduct_urlproduct_typeclean_ingredspricecombined_features
0The Ordinary Natural Moisturising Factors + HA...https://www.lookfantastic.com/the-ordinary-nat...Moisturiser['capric triglyceride', 'cetyl alcohol', 'prop...£5.20Moisturiser ['capric triglyceride', 'cetyl alc...
1CeraVe Facial Moisturising Lotion SPF 25 52mlhttps://www.lookfantastic.com/cerave-facial-mo...Moisturiser['homosalate', 'glycerin', 'octocrylene', 'eth...£13.00Moisturiser ['homosalate', 'glycerin', 'octocr...
2The Ordinary Hyaluronic Acid 2% + B5 Hydration...https://www.lookfantastic.com/the-ordinary-hya...Moisturiser['sodium hyaluronate', 'sodium hyaluronate', '...£6.20Moisturiser ['sodium hyaluronate', 'sodium hya...
3AMELIORATE Transforming Body Lotion 200mlhttps://www.lookfantastic.com/ameliorate-trans...Moisturiser['ammonium lactate', 'c12-15', 'glycerin', 'pr...£22.50Moisturiser ['ammonium lactate', 'c12-15', 'gl...
4CeraVe Moisturising Cream 454ghttps://www.lookfantastic.com/cerave-moisturis...Moisturiser['glycerin', 'cetearyl alcohol', 'capric trigl...£16.00Moisturiser ['glycerin', 'cetearyl alcohol', '...
.....................
1133Elemis Life Elixirs Embrace Bath and Shower El...https://www.lookfantastic.com/elemis-life-elix...Bath Oil['prunus amygdalus dulcis', 'tipa-laureth sulf...£55.00Bath Oil ['prunus amygdalus dulcis', 'tipa-lau...
1134Love Boo Splendidly Soothing Bath Soak (250ml)https://www.lookfantastic.com/love-boo-splendi...Bath Oil['sodium lauroyl', 'sodium cocoamphoacetate', ...£10.99Bath Oil ['sodium lauroyl', 'sodium cocoamphoa...
1135Elemis Life Elixirs Fortitude Bath and Shower ...https://www.lookfantastic.com/elemis-life-elix...Bath Oil['prunus amygdalus dulcis', 'tipa-laureth sulf...£55.00Bath Oil ['prunus amygdalus dulcis', 'tipa-lau...
1136Connock London Kukui Oil Soothing Bath & Showe...https://www.lookfantastic.com/connock-london-k...Bath Oil['capric triglyceride', 'peg-40 sorbitan perol...£36.00Bath Oil ['capric triglyceride', 'peg-40 sorbi...
1137Weleda Baby Calendula Cream Bath (200ml)https://www.lookfantastic.com/weleda-baby-cale...Bath Oil['prunus amygdalus dulcis', 'sesamium indicum ...£13.95Bath Oil ['prunus amygdalus dulcis', 'sesamium...
\n

1138 rows × 6 columns

\n
"},"metadata":{}}],"execution_count":11},{"cell_type":"code","source":"vectorizer = TfidfVectorizer()\nX = vectorizer.fit_transform(df['combined_features'])\nX\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:53.984282Z","iopub.execute_input":"2025-05-20T14:53:53.984568Z","iopub.status.idle":"2025-05-20T14:53:54.077714Z","shell.execute_reply.started":"2025-05-20T14:53:53.984547Z","shell.execute_reply":"2025-05-20T14:53:54.076776Z"}},"outputs":[{"execution_count":12,"output_type":"execute_result","data":{"text/plain":""},"metadata":{}}],"execution_count":12},{"cell_type":"code","source":"def recommend_products(user_query, top_n=5):\n user_vec = vectorizer.transform([user_query])\n sim_scores = cosine_similarity(user_vec, X)\n top_indices = sim_scores[0].argsort()[-top_n:][::-1]\n return df.iloc[top_indices][['product_name', 'product_type', 'clean_ingreds', 'price']]\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:54.078792Z","iopub.execute_input":"2025-05-20T14:53:54.079836Z","iopub.status.idle":"2025-05-20T14:53:54.084917Z","shell.execute_reply.started":"2025-05-20T14:53:54.079809Z","shell.execute_reply":"2025-05-20T14:53:54.083870Z"}},"outputs":[],"execution_count":13},{"cell_type":"code","source":"query = \"moisturizer for dry skin with hyaluronic acid\"\nrecommendations = recommend_products(query, top_n=5)\n\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:54.085880Z","iopub.execute_input":"2025-05-20T14:53:54.086238Z","iopub.status.idle":"2025-05-20T14:53:54.111817Z","shell.execute_reply.started":"2025-05-20T14:53:54.086178Z","shell.execute_reply":"2025-05-20T14:53:54.110709Z"}},"outputs":[],"execution_count":14},{"cell_type":"code","source":"print(\"Top product recommendations for:\", query)\nprint(recommendations.to_string(index=False))","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:54.112782Z","iopub.execute_input":"2025-05-20T14:53:54.113071Z","iopub.status.idle":"2025-05-20T14:53:54.122632Z","shell.execute_reply.started":"2025-05-20T14:53:54.113050Z","shell.execute_reply":"2025-05-20T14:53:54.121570Z"}},"outputs":[{"name":"stdout","text":"Top product recommendations for: moisturizer for dry skin with hyaluronic acid\n product_name product_type clean_ingreds price\n GLAMGLOW Supertoner 200ml Toner ['alcohol denat', 'propanediol', 'mandelic acid', 'glycolic acid', 'salicylic acid', 'lactic acid', 'tartaric acid', 'pyruvic acid', 'charcoal powder', 'eucalyptus globulus', 'eucalyptus globulus', 'hamamelis virginiana', 'algae extract', 'salix alba extract', 'nylon 12', 'aloe barbadenis extract', 'mentha piperita extract', 'sodium hydroxide', 'citric acid', 'parfum', 'limonene', 'linalool', 'disodium edta', 'phenoxyethanol', 'ci 77019', 'titanium dioxide', 'ci 77499'] £29.00\n La Roche-Posay Effaclar Clarifying Lotion 200ml Toner ['alcohol denat', 'glycerin', 'sodium citrate', 'propylene glycol', 'castor oil', 'disodium edta', 'capryloyl salicylic acid', 'citric acid', 'salicylic acid', 'parfum'] £12.50\n The Organic Pharmacy Four Acid Peel Serum 30ml Serum ['hamamelis virginiana', 'citrus grandis', 'passiflora quadrangularis fruit extract', 'glycerin', 'sodium lactate', 'citrus limon juice extract', 'alcohol', 'algin', 'ananas sativas fruit extract', 'vitis vinifera extract', 'dehydroacetic acid', 'benzyl alcohol', 'chondrus crispus extract', 'xanthan gum', 'potassium sorbate', 'sodium bisulfite', 'sorbic acid', 'limonene', 'citral', 'linalool', 'glycolic acid', 'lactic acid', 'citric acid', 'tartaric acid'] £30.00\n Avene Face Essentials Cleansing Foam 150ml Cleanser ['sodium cocoamphoacetate', 'sodium cocoyl glutamate', 'lactic acid', 'citric acid', 'disodium edta', 'parfum', 'glutamic acid', 'propylene glycol', 'sodium benzoate', 'sodium chloride'] £13.50\nMurad Age Reform Aha/Bha Exfoliating Cleanser (200ml) Cleanser ['sodium laureth sulfate', 'cocamidopropyl betaine', 'simmondsia chinensis leaf extract', 'acrylates copolymer', 'glycol stearate', 'butylene glycol', 'sodium pca', 'dipotassium glycyrrhizate', 'sodium ascorbyl phosphate', 'glycolic acid', 'lactic acid', 'salicylic acid', 'polyquaternium-4', 'sodium hydroxide', 'sodium chloride', 'citric acid', 'disodium edta', 'phenoxyethanol', 'methylparaben', 'propylparaben'] £38.00\n","output_type":"stream"}],"execution_count":15},{"cell_type":"code","source":"import pandas as pd\nimport spacy\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.metrics.pairwise import cosine_similarity\n\n# Load dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\n\n# Load spaCy English model\nnlp = spacy.load(\"en_core_web_sm\")\n\n# Custom cleaner using spaCy (group together + remove stopwords)\ndef clean_text(text):\n doc = nlp(text.lower())\n return \" \".join([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])\n\n# Apply text cleaning\ndf['clean_ingreds_cleaned'] = df['clean_ingreds'].fillna('').apply(clean_text)\ndf['product_type_cleaned'] = df['product_type'].fillna('').apply(clean_text)\n\n# Give weights to ingredients (more important) vs product type\ndf['combined_features'] = df['product_type_cleaned'] + ' ' + df['clean_ingreds_cleaned'] * 2\n\n# Vectorization\nvectorizer = TfidfVectorizer()\nX = vectorizer.fit_transform(df['combined_features'])\n\n# Recommendation Function\ndef recommend_products_advanced(user_query, top_n=5):\n user_query_clean = clean_text(user_query)\n user_vec = vectorizer.transform([user_query_clean])\n sim_scores = cosine_similarity(user_vec, X)\n \n if sim_scores.max() < 0.1:\n print(\"⚠️ Warning: No strong matches found. Try using different keywords.\")\n \n top_indices = sim_scores[0].argsort()[-top_n:][::-1]\n return df.iloc[top_indices][['product_name', 'product_type', 'clean_ingreds', 'price']]\n\n# Test It\nquery = \"hydrating night cream with ceramides and niacinamide for dry skin\"\nrecommendations = recommend_products_advanced(query)\nprint(\"Recommendations for:\", query)\nprint(recommendations.to_string(index=False))\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T14:53:54.123596Z","iopub.execute_input":"2025-05-20T14:53:54.123857Z"}},"outputs":[],"execution_count":null},{"cell_type":"code","source":"def severity_score(ingredients):\n severe_terms = ['acne', 'eczema', 'rosacea', 'melasma', 'psoriasis']\n count = sum([1 for word in severe_terms if word in ingredients])\n return count","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"def recommend_products_advanced(\n user_query, \n top_n=10, \n sort_by='similarity', # options: 'similarity', 'price', 'alphabetical', 'severity'\n max_price=None\n):\n user_query_clean = clean_text(user_query)\n user_vec = vectorizer.transform([user_query_clean])\n sim_scores = cosine_similarity(user_vec, X).flatten()\n\n df['similarity_score'] = sim_scores\ndf","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\nimport spacy\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.metrics.pairwise import cosine_similarity\n\n# Load dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\n# Convert price to numeric, remove any non-numeric characters just in case\ndf['price'] = pd.to_numeric(df['price'], errors='coerce')\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf['price'] = pd.to_numeric(df['price'], errors='coerce') # Convert to numeric\n\n\n\n# Load spaCy NLP model\nnlp = spacy.load(\"en_core_web_sm\")\n\n# Clean input text (group together + stopword removal)\ndef clean_text(text):\n doc = nlp(str(text).lower())\n return \" \".join([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])\n\n# Preprocess relevant text columns\ndf['clean_ingreds_cleaned'] = df['clean_ingreds'].fillna('').apply(clean_text)\ndf['product_type_cleaned'] = df['product_type'].fillna('').apply(clean_text)\ndf['combined_features'] = df['product_type_cleaned'] + ' ' + df['clean_ingreds_cleaned'] * 2\n\n# Vectorize\nvectorizer = TfidfVectorizer()\nX = vectorizer.fit_transform(df['combined_features'])\n\n# Define custom skin severity scoring (simple rule-based)\ndef severity_score(ingredients):\n severe_terms = ['acne', 'eczema', 'rosacea', 'melasma', 'psoriasis', 'inflammation']\n return sum([1 for word in severe_terms if word in ingredients])\n\ndf['severity_score'] = df['clean_ingreds_cleaned'].apply(severity_score)\n\n# 🧠 Enhanced Recommendation Function\ndef recommend_products_advanced(\n user_query,\n top_n=10,\n sort_by='similarity', # Options: similarity, price, alphabetical, severity\n max_price=None\n):\n # Clean the user input\n user_query_clean = clean_text(user_query)\n user_vec = vectorizer.transform([user_query_clean])\n sim_scores = cosine_similarity(user_vec, X).flatten()\n\n # Attach similarity scores\n df['similarity_score'] = sim_scores\n\n # Apply price filtering if specified\n if max_price is not None:\n filtered_df = df[df['price'] <= max_price]\n else:\n filtered_df = df.copy()\n\n # Sorting options\n if sort_by == 'similarity':\n sorted_df = filtered_df.sort_values(by='similarity_score', ascending=False)\n elif sort_by == 'price':\n sorted_df = filtered_df.sort_values(by='price', ascending=True)\n elif sort_by == 'alphabetical':\n sorted_df = filtered_df.sort_values(by='product_name')\n elif sort_by == 'severity':\n sorted_df = filtered_df.sort_values(by='severity_score', ascending=False)\n else:\n sorted_df = filtered_df\n\n return sorted_df[['product_name', 'product_type', 'clean_ingreds', 'price', 'similarity_score']].head(top_n)\n\n# 🔍 Example usage\nquery = \"hydrating cream for acne with niacinamide\"\nrecommendations = recommend_products_advanced(query, top_n=5, sort_by='price', max_price=2500)\n\nprint(\"🔎 Top Matches For:\", query)\nprint(recommendations.to_string(index=False))\nrecommend_products_advanced(query, top_n=5, sort_by='price', max_price=2500)\n\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\n\n# Load your dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf['price'] = pd.to_numeric(df['price'], errors='coerce')\n\n# (Reapply text cleaning function and other prep if not already run)\nimport spacy\nnlp = spacy.load(\"en_core_web_sm\")\n\ndef clean_text(text):\n doc = nlp(str(text).lower())\n return \" \".join([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# Create combined cleaned text\ndf['combined_text'] = (df['product_type'].fillna('') + ' ' + df['clean_ingreds'].fillna('')) * 2\ndf['combined_text_cleaned'] = df['combined_text'].apply(clean_text)\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from sentence_transformers import SentenceTransformer, util\n\n# Load pre-trained Sentence-BERT model\nsbert_model = SentenceTransformer('all-MiniLM-L6-v2') # Lightweight & effective\n\n# Compute sentence embeddings for product descriptions\ndf['combined_text'] = (df['product_type'].fillna('') + ' ' + df['clean_ingreds'].fillna('')) * 2\ndf['combined_text_cleaned'] = df['combined_text'].apply(clean_text)\nproduct_embeddings = sbert_model.encode(df['combined_text_cleaned'].tolist(), convert_to_tensor=True)\n\n# Enhanced Recommendation Function using SBERT\ndef recommend_with_embeddings(\n user_query,\n top_n=10,\n sort_by='semantic', # New default\n max_price=None\n):\n user_query_cleaned = clean_text(user_query)\n query_embedding = sbert_model.encode(user_query_cleaned, convert_to_tensor=True)\n \n # Semantic similarity\n cosine_scores = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]\n\n df['semantic_score'] = cosine_scores.cpu().numpy()\n\n # Apply price filter\n if max_price is not None:\n filtered_df = df[df['price'] <= max_price]\n else:\n filtered_df = df.copy()\n\n # Sorting\n if sort_by == 'semantic':\n sorted_df = filtered_df.sort_values(by='semantic_score', ascending=False)\n elif sort_by == 'price':\n sorted_df = filtered_df.sort_values(by='price', ascending=True)\n elif sort_by == 'severity':\n sorted_df = filtered_df.sort_values(by='severity_score', ascending=False)\n elif sort_by == 'alphabetical':\n sorted_df = filtered_df.sort_values(by='product_name')\n else:\n sorted_df = filtered_df\n\n return sorted_df[['product_name', 'product_type', 'clean_ingreds', 'price', 'semantic_score']].head(top_n)\n\n# 🔍 Example Usage\nquery = \"hydrating cream for acne with niacinamide\"\nrecommendations = recommend_with_embeddings(query, top_n=5, sort_by='semantic', max_price=2500)\n\nprint(\"🔎 Semantic Top Matches For:\", query)\nprint(recommendations.to_string(index=False))\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\nimport spacy\nfrom sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics.pairwise import cosine_similarity\n\n# Load dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf['price'] = pd.to_numeric(df['price'], errors='coerce')\n\n# Load spaCy NLP model\nnlp = spacy.load(\"en_core_web_sm\")\n\n# Clean input text (group together + stopword removal)\ndef clean_text(text):\n doc = nlp(str(text).lower())\n return \" \".join([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])\n\n# Preprocess relevant text columns\ndf['clean_ingreds_cleaned'] = df['clean_ingreds'].fillna('').apply(clean_text)\ndf['product_type_cleaned'] = df['product_type'].fillna('').apply(clean_text)\ndf['combined_features'] = df['product_type_cleaned'] + ' ' + df['clean_ingreds_cleaned'] * 2\n\n# Severity scoring\ndef severity_score(ingredients):\n severe_terms = ['acne', 'eczema', 'rosacea', 'melasma', 'psoriasis', 'inflammation']\n return sum([1 for word in severe_terms if word in ingredients])\n\ndf['severity_score'] = df['clean_ingreds_cleaned'].apply(severity_score)\n\n# Ingredient presence matrix\ningred_vectorizer = CountVectorizer(binary=True, max_features=100)\ningred_matrix = ingred_vectorizer.fit_transform(df['clean_ingreds_cleaned'])\ningred_df = pd.DataFrame(ingred_matrix.toarray(), columns=ingred_vectorizer.get_feature_names_out())\n\n# Combine features\ndf_model = pd.concat([df.reset_index(drop=True), ingred_df], axis=1)\nfeature_cols = ingred_vectorizer.get_feature_names_out().tolist() + ['price', 'severity_score']\n\n# Simulate training labels (relevance) using cosine similarity from TF-IDF\ntfidf = TfidfVectorizer()\nX_tfidf = tfidf.fit_transform(df['combined_features'])\n\ndef recommend_products_tree_model(\n user_query,\n top_n=10,\n max_price=None,\n sort_by='model' # Can also use 'price' or 'severity'\n):\n # Clean and transform query\n user_query_clean = clean_text(user_query)\n user_query_vec = tfidf.transform([user_query_clean])\n \n # Simulated relevance as label\n df_model['sim_label'] = cosine_similarity(user_query_vec, X_tfidf).flatten()\n\n # Train a Random Forest model on current query\n X = df_model[feature_cols].fillna(0)\n y = df_model['sim_label']\n model = RandomForestRegressor(n_estimators=100, random_state=42)\n model.fit(X, y)\n\n # Predict relevance\n df_model['predicted_relevance'] = model.predict(X)\n\n # Filter by price if needed\n if max_price is not None:\n filtered_df = df_model[df_model['price'] <= max_price]\n else:\n filtered_df = df_model\n\n # Sorting\n if sort_by == 'model':\n sorted_df = filtered_df.sort_values(by='predicted_relevance', ascending=False)\n elif sort_by == 'price':\n sorted_df = filtered_df.sort_values(by='price', ascending=True)\n elif sort_by == 'severity':\n sorted_df = filtered_df.sort_values(by='severity_score', ascending=False)\n else:\n sorted_df = filtered_df\n\n return sorted_df[['product_name', 'product_type', 'clean_ingreds', 'price', 'predicted_relevance']].head(top_n)\n\n# 🔍 Example usage\nquery = \"hydrating cream for acne with niacinamide\"\nrecommendations = recommend_products_tree_model(query, top_n=5, max_price=2500, sort_by='model')\n\nprint(\"🔎 Tree Model Top Matches For:\", query)\nprint(recommendations.to_string(index=False))\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\nimport spacy\nfrom sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics.pairwise import cosine_similarity\n\n# Load dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf['price'] = pd.to_numeric(df['price'], errors='coerce')\n\n# Load spaCy model\nnlp = spacy.load(\"en_core_web_sm\")\n\n# Text cleaning function\ndef clean_text(text):\n doc = nlp(str(text).lower())\n return \" \".join([token.lemma_ for token in doc if token.is_alpha and not token.is_stop])\n\n# Apply cleaning\ndf['clean_ingreds_cleaned'] = df['clean_ingreds'].fillna('').apply(clean_text)\ndf['product_type_cleaned'] = df['product_type'].fillna('').apply(clean_text)\ndf['combined_features'] = df['product_type_cleaned'] + ' ' + df['clean_ingreds_cleaned'] * 2\n\n# Severity score\ndef severity_score(ingredients):\n severe_terms = ['acne', 'eczema', 'rosacea', 'melasma', 'psoriasis', 'inflammation']\n return sum([1 for word in severe_terms if word in ingredients])\n\ndf['severity_score'] = df['clean_ingreds_cleaned'].apply(severity_score)\n\n# Ingredient features (binary)\ningred_vectorizer = CountVectorizer(binary=True, max_features=100)\ningred_matrix = ingred_vectorizer.fit_transform(df['clean_ingreds_cleaned'])\ningred_df = pd.DataFrame(ingred_matrix.toarray(), columns=ingred_vectorizer.get_feature_names_out())\n\n# Combine features\ndf_model = pd.concat([df.reset_index(drop=True), ingred_df], axis=1)\nfeature_cols = ingred_vectorizer.get_feature_names_out().tolist() + ['price', 'severity_score']\n\n# TF-IDF for simulated relevance (proxy labels)\ntfidf = TfidfVectorizer()\nX_tfidf = tfidf.fit_transform(df['combined_features'])\n\n# User-Controlled Recommendation Function\ndef recommend_user_controlled():\n # 🌟 Get user input\n print(\"Enter your skincare query (e.g., 'moisturizer for acne with salicylic acid'):\")\n user_query = input(\"Query: \")\n \n try:\n top_n = int(input(\"How many recommendations do you want? (default 5): \") or 5)\n except:\n top_n = 5\n\n try:\n max_price = float(input(\"Enter max price (or press enter to skip): \") or float('inf'))\n except:\n max_price = float('inf')\n\n sort_by = input(\"Sort by (model / price / severity)? (default = model): \").strip().lower() or 'model'\n\n # ✅ Preprocess query\n user_query_clean = clean_text(user_query)\n user_vec = tfidf.transform([user_query_clean])\n\n # Simulated labels for tree model training\n df_model['sim_label'] = cosine_similarity(user_vec, X_tfidf).flatten()\n X = df_model[feature_cols].fillna(0)\n y = df_model['sim_label']\n\n # Train model\n model = RandomForestRegressor(n_estimators=100, random_state=42)\n model.fit(X, y)\n\n # Predict relevance\n df_model['predicted_relevance'] = model.predict(X)\n\n # Filter and sort\n filtered_df = df_model[df_model['price'] <= max_price]\n\n if sort_by == 'model':\n sorted_df = filtered_df.sort_values(by='predicted_relevance', ascending=False)\n elif sort_by == 'price':\n sorted_df = filtered_df.sort_values(by='price', ascending=True)\n elif sort_by == 'severity':\n sorted_df = filtered_df.sort_values(by='severity_score', ascending=False)\n else:\n sorted_df = filtered_df\n\n # Display results\n results = sorted_df[['product_name', 'product_type', 'clean_ingreds', 'price', 'predicted_relevance']].head(top_n)\n print(\"\\n🔍 Top Recommendations:\")\n print(results.to_string(index=False))\n\n# ✅ Run the user-controlled recommender\nrecommend_user_controlled()\ndef recommend_user_controlled():\n # 🌟 Get user input\n print(\"Enter your skincare query (e.g., 'moisturizer for acne with salicylic acid'):\")\n user_query = input(\"Query: \")\n \n try:\n top_n = int(input(\"How many recommendations do you want? (default 5): \") or 5)\n except:\n top_n = 5\n\n try:\n max_price_input = input(\"Enter max price (or press enter to skip): \")\n max_price = float(max_price_input) if max_price_input else float('inf')\n except:\n max_price = float('inf')\n\n sort_by = input(\"Sort by (model / price / severity)? (default = model): \").strip().lower()\n if sort_by not in ['model', 'price', 'severity']:\n print(f\"⚠️ Invalid sort option '{sort_by}', defaulting to 'model'.\")\n sort_by = 'model'\n\n # ✅ Preprocess query\n user_query_clean = clean_text(user_query)\n user_vec = tfidf.transform([user_query_clean])\n\n # Simulated labels for tree model training\n df_model['sim_label'] = cosine_similarity(user_vec, X_tfidf).flatten()\n X = df_model[feature_cols].fillna(0)\n y = df_model['sim_label']\n\n # Train model\n model = RandomForestRegressor(n_estimators=100, random_state=42)\n model.fit(X, y)\n\n # Predict relevance\n df_model['predicted_relevance'] = model.predict(X)\n\n # Filter and check\n filtered_df = df_model[df_model['price'] <= max_price]\n print(f\"\\n🔎 Found {len(filtered_df)} products under price {max_price} for query: '{user_query}'\")\n\n if filtered_df.empty:\n print(\"⚠️ No matching products. Try increasing price or changing the query.\")\n return\n\n # Sort\n if sort_by == 'model':\n sorted_df = filtered_df.sort_values(by='predicted_relevance', ascending=False)\n elif sort_by == 'price':\n sorted_df = filtered_df.sort_values(by='price', ascending=True)\n elif sort_by == 'severity':\n sorted_df = filtered_df.sort_values(by='severity_score', ascending=False)\n\n # Show result\n results = sorted_df[['product_name', 'product_type', 'clean_ingreds', 'price', 'predicted_relevance']].head(top_n)\n print(\"\\n✅ Top Recommendations:\")\n print(results.to_string(index=False))\n\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\n\n# Load data\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\n\n# Clean the price column\ndf['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n\n# User inputs\nquery = \"acne\"\nnum_results = 1\nmax_price = 200\nsort_by = \"price\" # could also be \"model\" or \"severity\"\n\n# Define acne-related ingredients\nacne_ingreds = [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"]\npattern = '|'.join(acne_ingreds)\n\n# Filter products containing those ingredients\nfiltered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n\n# Apply price filter\nif max_price:\n filtered = filtered[filtered['price'] <= max_price]\n\n# Sort by user-specified field\nif sort_by in filtered.columns:\n filtered = filtered.sort_values(by=sort_by)\nelse:\n filtered = filtered.sort_values(by='price') # fallback\n\n# Select top results\ntop_recommendations = filtered.head(num_results)[['product_name', 'product_type', 'clean_ingreds', 'price']]\n\n# Display results\nprint(\"🔍 Top Recommendations:\")\nprint(top_recommendations.to_string(index=False))\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"import pandas as pd\n\ndef get_skincare_recommendations(filepath):\n # Load and clean data\n df = pd.read_csv(filepath)\n df['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n\n # --- USER INPUT SECTION ---\n concern = input(\"Enter your skincare concern (e.g., 'acne', 'dryness', 'aging', etc.): \").strip().lower()\n \n try:\n num_results = int(input(\"How many recommendations do you want? (default 5): \") or 5)\n except ValueError:\n num_results = 5\n\n try:\n max_price_input = input(\"Enter max price (or press enter to skip): \").strip()\n max_price = float(max_price_input) if max_price_input else None\n except ValueError:\n max_price = None\n\n sort_by = input(\"Sort by (price / product_name / product_type)? (default = price): \").strip().lower() or \"price\"\n\n # Ingredient filters for common concerns\n concern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n }\n\n # Select ingredients for concern or fallback to raw keyword\n ingredients = concern_ingredients.get(concern, [concern])\n pattern = '|'.join(ingredients)\n\n # Filter by ingredients\n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n\n # Apply price filter\n if max_price is not None:\n filtered = filtered[filtered['price'] <= max_price]\n\n # Sort and get top results\n if sort_by in filtered.columns:\n filtered = filtered.sort_values(by=sort_by)\n else:\n filtered = filtered.sort_values(by=\"price\")\n\n top_recommendations = filtered.head(num_results)[['product_name', 'product_type', 'clean_ingreds', 'price']]\n\n # Display results\n if top_recommendations.empty:\n print(\"\\n❌ No matching products found based on your criteria.\")\n else:\n print(\"\\n🔍 Top Recommendations:\")\n# Truncate ingredients for cleaner display\ntop_recommendations['clean_ingreds'] = top_recommendations['clean_ingreds'].apply(\n lambda x: ', '.join(eval(x)[:3]) + \"...\" if isinstance(x, str) and x.startswith('[') else x\n)\n\n# Round price for neatness\ntop_recommendations['price'] = top_recommendations['price'].round(2)\n\n# Clean print\nprint(\"\\n🔍 Top Recommendations:\\n\")\nfor i, row in top_recommendations.iterrows():\n print(f\"🧴 Product: {row['product_name']}\")\n print(f\"📦 Type : {row['product_type']}\")\n print(f\"🧪 Key Ingredients: {row['clean_ingreds']}\")\n print(f\"💰 Price : £{row['price']}\\n\")\n\n\n# --- RUN THE FUNCTION ---\nget_skincare_recommendations(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T16:08:39.506592Z","iopub.execute_input":"2025-05-20T16:08:39.506935Z","iopub.status.idle":"2025-05-20T16:08:39.525346Z","shell.execute_reply.started":"2025-05-20T16:08:39.506904Z","shell.execute_reply":"2025-05-20T16:08:39.523686Z"}},"outputs":[{"traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/tmp/ipykernel_31/392685437.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"\\n🔍 Top Recommendations:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;31m# Truncate ingredients for cleaner display\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 58\u001b[0;31m top_recommendations['clean_ingreds'] = top_recommendations['clean_ingreds'].apply(\n\u001b[0m\u001b[1;32m 59\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m', '\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"...\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'['\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m )\n","\u001b[0;31mNameError\u001b[0m: name 'top_recommendations' is not defined"],"ename":"NameError","evalue":"name 'top_recommendations' is not defined","output_type":"error"}],"execution_count":9},{"cell_type":"code","source":"import pandas as pd\n\ndef get_skincare_recommendations(filepath):\n # Load and clean data\n df = pd.read_csv(filepath)\n df['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n\n concern = input(\"Enter your skincare concern (e.g., 'acne', 'dryness', 'aging', etc.): \").strip().lower()\n \n try:\n num_results = int(input(\"How many recommendations do you want? (default 5): \") or 5)\n except ValueError:\n num_results = 5\n\n try:\n max_price_input = input(\"Enter max price (or press enter to skip): \").strip()\n max_price = float(max_price_input) if max_price_input else None\n except ValueError:\n max_price = None\n\n sort_by = input(\"Sort by (price / product_name / product_type)? (default = price): \").strip().lower() or \"price\"\n\n concern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n }\n\n ingredients = concern_ingredients.get(concern, [concern])\n pattern = '|'.join(ingredients)\n\n # Filter 1: by ingredients\n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n\n # Filter 2: by price\n if max_price is not None:\n filtered = filtered[filtered['price'] <= max_price]\n\n # --- Fallbacks ---\n if filtered.empty:\n print(\"\\n⚠️ No results for both concern and price filters.\")\n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n if not filtered.empty:\n print(\"✅ Showing results based on concern only (price filter dropped).\")\n else:\n print(\"⚠️ No matches for concern either. Showing top overall products.\")\n filtered = df.copy()\n\n # Sort and format\n if sort_by in filtered.columns:\n filtered = filtered.sort_values(by=sort_by)\n else:\n filtered = filtered.sort_values(by=\"price\")\n\n # Clean display\n filtered['clean_ingreds'] = filtered['clean_ingreds'].apply(\n lambda x: ', '.join(eval(x)[:3]) + \"...\" if isinstance(x, str) and x.startswith('[') else x\n )\n filtered['price'] = filtered['price'].round(2)\n\n top_recommendations = filtered.head(num_results)\n\n # Display\n print(\"\\n🔍 Top Recommendations:\\n\")\n for _, row in top_recommendations.iterrows():\n print(f\"🧴 Product: {row['product_name']}\")\n print(f\"📦 Type : {row['product_type']}\")\n print(f\"🧪 Key Ingredients: {row['clean_ingreds']}\")\n print(f\"💰 Price : £{row['price']}\\n\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-20T16:08:44.974340Z","iopub.execute_input":"2025-05-20T16:08:44.974675Z","iopub.status.idle":"2025-05-20T16:08:44.987219Z","shell.execute_reply.started":"2025-05-20T16:08:44.974651Z","shell.execute_reply":"2025-05-20T16:08:44.985939Z"}},"outputs":[],"execution_count":10},{"cell_type":"code","source":"import pandas as pd\n\ndef get_skincare_recommendations(filepath):\n # Load and clean data\n df = pd.read_csv(filepath)\n df['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n\n concern = input(\"Enter your skincare concern (e.g., 'acne', 'dryness', 'aging', etc.): \").strip().lower()\n \n try:\n num_results = int(input(\"How many recommendations do you want? (default 5): \") or 5)\n except ValueError:\n num_results = 5\n\n try:\n max_price_input = input(\"Enter max price (or press enter to skip): \").strip()\n max_price = float(max_price_input) if max_price_input else None\n except ValueError:\n max_price = None\n\n sort_by = input(\"Sort by (price / product_name / product_type)? (default = price): \").strip().lower() or \"price\"\n\n concern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n }\n\n ingredients = concern_ingredients.get(concern, [concern])\n pattern = '|'.join(ingredients)\n\n # Filter 1: by ingredients\n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n\n # Filter 2: by price\n if max_price is not None:\n filtered = filtered[filtered['price'] <= max_price]\n\n # --- Fallbacks ---\n if filtered.empty:\n print(\"\\n⚠️ No results for both concern and price filters.\")\n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n if not filtered.empty:\n print(\"✅ Showing results based on concern only (price filter dropped).\")\n else:\n print(\"⚠️ No matches for concern either. Showing top overall products.\")\n filtered = df.copy()\n\n # Sort and format\n if sort_by in filtered.columns:\n filtered = filtered.sort_values(by=sort_by)\n else:\n filtered = filtered.sort_values(by=\"price\")\n\n # Clean display\n filtered['clean_ingreds'] = filtered['clean_ingreds'].apply(\n lambda x: ', '.join(eval(x)[:3]) + \"...\" if isinstance(x, str) and x.startswith('[') else x\n )\n filtered['price'] = filtered['price'].round(2)\n\n top_recommendations = filtered.head(num_results)\n\n # Display\n print(\"\\n🔍 Top Recommendations:\\n\")\n for _, row in top_recommendations.iterrows():\n print(f\"🧴 Product: {row['product_name']}\")\n print(f\"📦 Type : {row['product_type']}\")\n print(f\"🧪 Key Ingredients: {row['clean_ingreds']}\")\n print(f\"💰 Price : £{row['price']}\\n\")\nif __name__ == \"__main__\":\n get_skincare_recommendations(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-21T04:29:19.989834Z","iopub.execute_input":"2025-05-21T04:29:19.990570Z","iopub.status.idle":"2025-05-21T04:30:20.396297Z","shell.execute_reply.started":"2025-05-21T04:29:19.990547Z","shell.execute_reply":"2025-05-21T04:30:20.395356Z"}},"outputs":[{"output_type":"stream","name":"stdin","text":"Enter your skincare concern (e.g., 'acne', 'dryness', 'aging', etc.): blackheads\nHow many recommendations do you want? (default 5): 3\nEnter max price (or press enter to skip): 200\nSort by (price / product_name / product_type)? (default = price): product_type\n"},{"name":"stdout","text":"\n⚠️ No results for both concern and price filters.\n⚠️ No matches for concern either. Showing top overall products.\n\n🔍 Top Recommendations:\n\n🧴 Product: Uriage Xémose Anti-Itch Soothing Oil Balm 500ml\n📦 Type : Balm\n🧪 Key Ingredients: c13-15 alkane, hydrogenated polydecene, butyrospermum parkii...\n💰 Price : £20.89\n\n🧴 Product: NUXE Baume Levres Reve De Miel - Honey Lip Balm (15g)\n📦 Type : Balm\n🧪 Key Ingredients: cera alba, butyrospermum parkii, olus oil...\n💰 Price : £10.0\n\n🧴 Product: La Roche-Posay Cicaplast Baume B5 Soothing Repairing Balm 40ml\n📦 Type : Balm\n🧪 Key Ingredients: hydrogenated polyisobutene, dimethicon, glycerin...\n💰 Price : £7.5\n\n","output_type":"stream"}],"execution_count":2},{"cell_type":"code","source":"import pandas as pd\nfrom sklearn.metrics import multilabel_confusion_matrix\nfrom sklearn.preprocessing import MultiLabelBinarizer\n\ndef evaluate_recommendation_accuracy(filepath):\n df = pd.read_csv(filepath)\n df['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n \n # Assumed ground truth column: comma-separated concerns\n df = df.dropna(subset=['clean_ingreds', 'concerns'])\n df['true_concerns'] = df['concerns'].str.lower().str.split(',\\s*')\n\n concern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n }\n\n # Predict concerns based on ingredient matching\n def predict_concerns(ingredients):\n ingredients = ingredients.lower()\n predicted = []\n for concern, keywords in concern_ingredients.items():\n if any(keyword in ingredients for keyword in keywords):\n predicted.append(concern)\n return predicted\n\n df['pred_concerns'] = df['clean_ingreds'].str.lower().apply(predict_concerns)\n\n # Binarize true and predicted labels\n mlb = MultiLabelBinarizer(classes=list(concern_ingredients.keys()))\n y_true = mlb.fit_transform(df['true_concerns'])\n y_pred = mlb.transform(df['pred_concerns'])\n\n # Compute confusion matrices for each label\n matrices = multilabel_confusion_matrix(y_true, y_pred, labels=mlb.classes_)\n\n results = {}\n for idx, label in enumerate(mlb.classes_):\n tn, fp, fn, tp = matrices[idx].ravel()\n accuracy = (tp + tn) / (tp + tn + fp + fn)\n precision = tp / (tp + fp) if (tp + fp) > 0 else 0\n recall = tp / (tp + fn) if (tp + fn) > 0 else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n results[label] = {\n \"Accuracy\": round(accuracy, 3),\n \"Precision\": round(precision, 3),\n \"Recall\": round(recall, 3),\n \"F1 Score\": round(f1, 3),\n \"TP\": int(tp), \"FP\": int(fp), \"FN\": int(fn), \"TN\": int(tn)\n }\n\n return pd.DataFrame(results).T.sort_values(by=\"F1 Score\", ascending=False)\n","metadata":{"trusted":true},"outputs":[],"execution_count":2},{"cell_type":"code","source":"import pandas as pd\n\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\nprint(df.columns.tolist())\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-25T13:56:41.083576Z","iopub.execute_input":"2025-05-25T13:56:41.083911Z","iopub.status.idle":"2025-05-25T13:56:41.110543Z","shell.execute_reply.started":"2025-05-25T13:56:41.083885Z","shell.execute_reply":"2025-05-25T13:56:41.109470Z"}},"outputs":[{"name":"stdout","text":"['product_name', 'product_url', 'product_type', 'clean_ingreds', 'price']\n","output_type":"stream"}],"execution_count":9},{"cell_type":"code","source":"import pandas as pd\n\nfilepath = \"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\"\ndf = pd.read_csv(filepath)\n\nconcern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n}\n\ndef predict_concerns(ingredients):\n ingredients = ingredients.lower()\n predicted = []\n for concern, keywords in concern_ingredients.items():\n if any(keyword in ingredients for keyword in keywords):\n predicted.append(concern)\n return predicted\n\ndf = df.dropna(subset=['clean_ingreds'])\ndf['predicted_concerns'] = df['clean_ingreds'].apply(predict_concerns)\n\n# Preview\nprint(df[['product_name', 'predicted_concerns']].head())\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-25T13:59:15.950678Z","iopub.execute_input":"2025-05-25T13:59:15.951039Z","iopub.status.idle":"2025-05-25T13:59:16.009504Z","shell.execute_reply.started":"2025-05-25T13:59:15.951015Z","shell.execute_reply":"2025-05-25T13:59:16.008641Z"}},"outputs":[{"name":"stdout","text":" product_name \\\n0 The Ordinary Natural Moisturising Factors + HA... \n1 CeraVe Facial Moisturising Lotion SPF 25 52ml \n2 The Ordinary Hyaluronic Acid 2% + B5 Hydration... \n3 AMELIORATE Transforming Body Lotion 200ml \n4 CeraVe Moisturising Cream 454g \n\n predicted_concerns \n0 [dryness, sensitivity] \n1 [acne, dryness, aging, pigmentation] \n2 [dryness, sensitivity] \n3 [dryness, sensitivity] \n4 [dryness] \n","output_type":"stream"}],"execution_count":10},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nfrom sklearn.metrics import multilabel_confusion_matrix\nfrom sklearn.preprocessing import MultiLabelBinarizer\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport random\n\n# Load your dataset\ndf = pd.read_csv(\"/kaggle/input/skincare-products-clean-dataset/skincare_products_clean.csv\")\ndf = df.dropna(subset=['clean_ingreds'])\n\n# Define concern-to-ingredient mapping\nconcern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n}\nall_concerns = list(concern_ingredients.keys())\n\n# Predict concerns based on ingredients\ndef predict_concerns(ingredients):\n ingredients = ingredients.lower()\n predicted = []\n for concern, keywords in concern_ingredients.items():\n if any(keyword in ingredients for keyword in keywords):\n predicted.append(concern)\n return predicted\n\ndf['pred_concerns'] = df['clean_ingreds'].apply(predict_concerns)\n\n# Fake true concerns based on prediction + noise\ndef fake_true_concerns(predicted):\n base = set(predicted)\n # Add or remove 1 concern with some probability to simulate imperfect ground truth\n if random.random() < 0.3 and base:\n base.remove(random.choice(list(base)))\n if random.random() < 0.4:\n base.add(random.choice(all_concerns))\n return list(base)\n\ndf['true_concerns'] = df['pred_concerns'].apply(fake_true_concerns)\n\n# Binarize labels\nmlb = MultiLabelBinarizer(classes=all_concerns)\ny_true = mlb.fit_transform(df['true_concerns'])\ny_pred = mlb.transform(df['pred_concerns'])\n\n# Compute multilabel confusion matrices WITHOUT 'labels' param (fix!)\nmatrices = multilabel_confusion_matrix(y_true, y_pred)\n\n# Plotting function\ndef plot_multilabel_confusion_matrices(matrices, labels):\n for idx, label in enumerate(labels):\n tn, fp, fn, tp = matrices[idx].ravel()\n cm = [[tp, fn],\n [fp, tn]]\n\n plt.figure(figsize=(4, 3))\n sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Purples\", cbar=False,\n xticklabels=['Actual Positive', 'Actual Negative'],\n yticklabels=['Predicted Positive', 'Predicted Negative'])\n plt.title(f'Confusion Matrix for \"{label}\"')\n plt.xlabel(\"Actual\")\n plt.ylabel(\"Predicted\")\n plt.tight_layout()\n plt.show()\n\n# Run the plot\nplot_multilabel_confusion_matrices(matrices, mlb.classes_)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-25T14:12:45.285450Z","iopub.execute_input":"2025-05-25T14:12:45.285764Z","iopub.status.idle":"2025-05-25T14:12:45.934731Z","shell.execute_reply.started":"2025-05-25T14:12:45.285742Z","shell.execute_reply":"2025-05-25T14:12:45.933933Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{}}],"execution_count":13},{"cell_type":"code","source":"import os\n\n# ✅ Suppress TensorFlow & XLA logging\nos.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 0 = all logs, 3 = only errors\nos.environ[\"TF_XLA_FLAGS\"] = \"--tf_xla_enable_xla_devices=false\"\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:27:22.775172Z","iopub.execute_input":"2025-05-26T06:27:22.775479Z","iopub.status.idle":"2025-05-26T06:27:22.781402Z","shell.execute_reply.started":"2025-05-26T06:27:22.775457Z","shell.execute_reply":"2025-05-26T06:27:22.780105Z"}},"outputs":[],"execution_count":3},{"cell_type":"code","source":"import tensorflow as tf\nfrom tensorflow.keras.preprocessing import image\nfrom tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input, decode_predictions\nimport numpy as np\nimport requests\nfrom PIL import Image\nfrom io import BytesIO\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:27:38.051416Z","iopub.execute_input":"2025-05-26T06:27:38.052854Z","iopub.status.idle":"2025-05-26T06:27:38.065761Z","shell.execute_reply.started":"2025-05-26T06:27:38.052816Z","shell.execute_reply":"2025-05-26T06:27:38.064510Z"}},"outputs":[],"execution_count":4},{"cell_type":"code","source":"import tensorflow as tf\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense\n\ndef build_model(input_shape=(128, 128, 3), num_classes=2):\n model = Sequential([\n Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),\n MaxPooling2D(2, 2),\n Conv2D(64, (3, 3), activation='relu'),\n MaxPooling2D(2, 2),\n Flatten(),\n Dense(64, activation='relu'),\n Dense(num_classes, activation='softmax')\n ])\n model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n return model\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:27:44.538887Z","iopub.execute_input":"2025-05-26T06:27:44.539218Z","iopub.status.idle":"2025-05-26T06:27:44.545803Z","shell.execute_reply.started":"2025-05-26T06:27:44.539189Z","shell.execute_reply":"2025-05-26T06:27:44.544642Z"}},"outputs":[],"execution_count":6},{"cell_type":"code","source":"import requests\nfrom PIL import Image\nfrom io import BytesIO\nimport numpy as np\n\ndef load_image_from_url(url, target_size=(128, 128)):\n response = requests.get(url)\n img = Image.open(BytesIO(response.content)).convert('RGB')\n img = img.resize(target_size)\n return np.expand_dims(np.array(img) / 255.0, axis=0)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:28:19.537682Z","iopub.execute_input":"2025-05-26T06:28:19.538003Z","iopub.status.idle":"2025-05-26T06:28:19.543945Z","shell.execute_reply.started":"2025-05-26T06:28:19.537979Z","shell.execute_reply":"2025-05-26T06:28:19.542818Z"}},"outputs":[],"execution_count":8},{"cell_type":"code","source":"import requests\nfrom PIL import Image\nfrom io import BytesIO\nimport numpy as np\nfrom tensorflow.keras.applications.mobilenet_v2 import preprocess_input\n\ndef load_image_from_url(url, target_size=(224, 224)):\n \"\"\"\n Load and preprocess image from a URL for model prediction.\n \"\"\"\n response = requests.get(url)\n img = Image.open(BytesIO(response.content)).convert(\"RGB\")\n img = img.resize(target_size)\n img_array = np.array(img)\n img_array = np.expand_dims(img_array, axis=0)\n return preprocess_input(img_array)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:29:37.033741Z","iopub.execute_input":"2025-05-26T06:29:37.034598Z","iopub.status.idle":"2025-05-26T06:29:37.040699Z","shell.execute_reply.started":"2025-05-26T06:29:37.034545Z","shell.execute_reply":"2025-05-26T06:29:37.039443Z"}},"outputs":[],"execution_count":10},{"cell_type":"code","source":"from PIL import Image\nimport requests\nfrom io import BytesIO\nimport numpy as np\n\ndef load_image_from_url(url, target_size=(224, 224)):\n try:\n response = requests.get(url)\n img = Image.open(BytesIO(response.content)).convert('RGB')\n img = img.resize(target_size)\n img_array = np.array(img) / 255.0\n return np.expand_dims(img_array, axis=0)\n except Exception as e:\n print(f\"❌ Error loading image: {e}\")\n return None\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:33:29.899406Z","iopub.execute_input":"2025-05-26T06:33:29.899773Z","iopub.status.idle":"2025-05-26T06:33:29.907597Z","shell.execute_reply.started":"2025-05-26T06:33:29.899746Z","shell.execute_reply":"2025-05-26T06:33:29.906174Z"}},"outputs":[],"execution_count":12},{"cell_type":"code","source":"from tensorflow.keras.models import load_model, Sequential\nfrom tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense\n\nimport os\n\ndef get_or_create_dummy_model(path=\"/kaggle/working/skin_disease_model.h5\"):\n if os.path.exists(path):\n return load_model(path)\n\n print(\"⚠️ No trained model found. Creating a dummy model for demo...\")\n model = Sequential([\n Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),\n MaxPooling2D(2, 2),\n Flatten(),\n Dense(10, activation='softmax')\n ])\n model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n model.save(path)\n return model\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:33:45.895624Z","iopub.execute_input":"2025-05-26T06:33:45.896160Z","iopub.status.idle":"2025-05-26T06:33:45.902861Z","shell.execute_reply.started":"2025-05-26T06:33:45.896131Z","shell.execute_reply":"2025-05-26T06:33:45.901670Z"}},"outputs":[],"execution_count":14},{"cell_type":"code","source":"import pandas as pd\n\ndef diagnose_and_recommend(image_url, model_path, csv_path):\n model = get_or_create_dummy_model(model_path)\n image = load_image_from_url(image_url)\n if image is None:\n return\n\n # Predict\n preds = model.predict(image)\n predicted_class_index = np.argmax(preds)\n \n # Map index to skin concerns (adjust to match your actual model)\n classes = [\"acne\", \"aging\", \"dryness\", \"sensitivity\", \"pigmentation\", \n \"eczema\", \"rosacea\", \"dark circles\", \"wrinkles\", \"scars\"]\n predicted_concern = classes[predicted_class_index]\n \n print(f\"\\n🔍 Detected Concern: **{predicted_concern.upper()}**\")\n\n # Load product dataset\n df = pd.read_csv(csv_path)\n df['price'] = df['price'].replace('[£$,]', '', regex=True).astype(float)\n \n # Ingredients mapping\n concern_ingredients = {\n \"acne\": [\"salicylic acid\", \"benzoyl peroxide\", \"niacinamide\", \"azelaic acid\", \"tea tree\"],\n \"dryness\": [\"hyaluronic acid\", \"glycerin\", \"ceramide\", \"squalane\"],\n \"aging\": [\"retinol\", \"peptides\", \"vitamin c\", \"niacinamide\"],\n \"sensitivity\": [\"allantoin\", \"panthenol\", \"madecassoside\", \"centella\"],\n \"pigmentation\": [\"vitamin c\", \"kojic acid\", \"azelaic acid\", \"niacinamide\"],\n }\n\n ingredients = concern_ingredients.get(predicted_concern, [predicted_concern])\n pattern = '|'.join(ingredients)\n \n filtered = df[df['clean_ingreds'].str.contains(pattern, case=False, na=False)]\n filtered = filtered.sort_values(by='price').head(5)\n\n if filtered.empty:\n print(\"⚠️ No matching products found.\")\n return\n\n print(\"\\n🧴 Recommended Products:\\n\")\n for _, row in filtered.iterrows():\n print(f\"🔹 {row['product_name']}\")\n print(f\" 🧪 {row['clean_ingreds']}\")\n print(f\" 💰 £{row['price']}\\n\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:44:27.615058Z","iopub.execute_input":"2025-05-26T06:44:27.615388Z","iopub.status.idle":"2025-05-26T06:44:27.626863Z","shell.execute_reply.started":"2025-05-26T06:44:27.615367Z","shell.execute_reply":"2025-05-26T06:44:27.625111Z"}},"outputs":[{"traceback":["\u001b[0;36m File \u001b[0;32m\"/tmp/ipykernel_31/638667709.py\"\u001b[0;36m, line \u001b[0;32m18\u001b[0m\n\u001b[0;31m classes = [\"acne\", \"aging\", \"dryness\", \"sensitivity\", \"pigmentation\",\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unexpected indent\n"],"ename":"IndentationError","evalue":"unexpected indent (638667709.py, line 18)","output_type":"error"}],"execution_count":21},{"cell_type":"code","source":"import numpy as np\nimport requests\nfrom io import BytesIO\nfrom PIL import Image\nfrom tensorflow.keras.models import load_model\nfrom tensorflow.keras.preprocessing.image import img_to_array\n\n# Define your classes here (must match model training)\nCLASSES = [\"acne\", \"aging\", \"dryness\", \"sensitivity\", \"pigmentation\", \n \"eczema\", \"rosacea\", \"dark circles\", \"wrinkles\", \"scars\"]\n\nIMG_SIZE = (224, 224) # Adjust based on your model input size\n\ndef load_image_from_url(url):\n try:\n response = requests.get(url)\n response.raise_for_status()\n img = Image.open(BytesIO(response.content)).convert('RGB')\n img = img.resize(IMG_SIZE)\n img_array = img_to_array(img) / 255.0 # normalize pixel values\n img_array = np.expand_dims(img_array, axis=0) # add batch dimension\n return img_array\n except Exception as e:\n print(f\"❌ Failed to load image: {e}\")\n return None\n\ndef predict_disease(image_url):\n print(\"📥 Loading model...\")\n model_path = \"/kaggle/working/skin_disease_model.h5\" # Update if different path\n try:\n model = load_model(model_path)\n except Exception as e:\n print(f\"❌ Failed to load model: {e}\")\n return\n\n print(\"🌐 Downloading and preprocessing image...\")\n img = load_image_from_url(image_url)\n if img is None:\n return\n\n preds = model.predict(img)\n print(\"Prediction output shape:\", preds.shape) # Should be (1, 10)\n\n class_index = np.argmax(preds[0])\n confidence = preds[0][class_index] * 100\n\n if 0 <= class_index < len(CLASSES):\n diagnosis = CLASSES[class_index]\n print(f\"\\n✅ Predicted Diagnosis: **{diagnosis.capitalize()}**\")\n print(f\"🔎 Confidence: {confidence:.2f}%\")\n else:\n print(\"❌ Error: predicted class index is out of range!\")\n\nif __name__ == \"__main__\":\n user_url = input(\"🔗 Enter image URL for diagnosis: \").strip()\n predict_disease(user_url)\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-05-26T06:47:05.950965Z","iopub.execute_input":"2025-05-26T06:47:05.951324Z","iopub.status.idle":"2025-05-26T06:48:05.167834Z","shell.execute_reply.started":"2025-05-26T06:47:05.951301Z","shell.execute_reply":"2025-05-26T06:48:05.166805Z"}},"outputs":[{"output_type":"stream","name":"stdin","text":"🔗 Enter image URL for diagnosis: https://upload.wikimedia.org/wikipedia/commons/thumb/4/4b/Akne-jugend.jpg/500px-Akne-jugend.jpg\n"},{"name":"stdout","text":"📥 Loading model...\n🌐 Downloading and preprocessing image...\n\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 82ms/step\nPrediction output shape: (1, 10)\n\n✅ Predicted Diagnosis: **Wrinkles**\n🔎 Confidence: 22.24%\n","output_type":"stream"}],"execution_count":22}]}