Spaces:
Runtime error
Runtime error
| import validators | |
| from selectorlib import Extractor | |
| import requests | |
| import json | |
| import time | |
| import csv | |
| from dateutil.parser import parse | |
| import sys, os | |
| import re | |
| from datetime import date, datetime | |
| import numpy as np | |
| import math | |
| import concurrent.futures | |
| import boto3 | |
| import botocore | |
| from io import StringIO | |
| import pandas as pd | |
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import base64 | |
| import uuid | |
| #import pyperclip | |
| #from IPython.core.display import HTML | |
| from bokeh.plotting import figure | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from all_funcs import * | |
| def create_table(theurls): | |
| e = Extractor.from_yaml_file('selectors.yml') | |
| all_five_star = [] | |
| all_time_diff = [] | |
| all_hun_days = [] | |
| all_rating = [] | |
| all_verified = [] | |
| all_helped = [] | |
| urls_used = [] | |
| product_names = [] | |
| all_reviews = [] | |
| all_amazon_ratings = [] | |
| all_count_of_day = [] | |
| string = "" | |
| fig = go.Figure() | |
| prime = False | |
| today = parse(date.today().strftime("%Y-%m-%d")) | |
| url_dataframe = pd.DataFrame() | |
| spin = st.empty() | |
| stat = st.empty() | |
| print(theurls) | |
| for i in theurls: | |
| try: | |
| asin = find_asin(i) | |
| print(asin) | |
| if len(asin) != 10: | |
| raise ValueError | |
| except: | |
| st.error("ASIN NUMBER NOT FOUND IN URL! PLEASE CHECK FORMAT OF URL") | |
| prime = False | |
| break | |
| file_name = asin+'.csv' | |
| print(file_name) | |
| try: | |
| df = s3.get_object(Bucket='productreviewsdata', Key="alldata/"+file_name) | |
| body = df["Body"].read().decode('utf-8') | |
| df_data = pd.read_csv(StringIO(body)) | |
| try: | |
| title = list(set(df_data["product"]))[0] | |
| print(list(set(df_data["title"]))) | |
| if list(set(df_data["title"]))[0] == "-": | |
| st.error(title + " has 0 reviews. Please remove it from your list and try again!") | |
| break | |
| except IndexError: | |
| string = string + "https://www.amazon.in/product-reviews/"+asin+"\n" | |
| break | |
| stat.info("Getting " + title + "....") | |
| product_names.append(title) | |
| try: | |
| all_amazon_ratings.append(str(list(set(df_data["amazon_rating"]))[0])) | |
| except: | |
| all_amazon_ratings.append("-") | |
| urls_used.append(list(set(df_data["url"]))[0]) | |
| string = string+list(set(df_data["url"]))[0]+"\n" | |
| #st.write(df_data) | |
| if len(df_data)==0: | |
| pass | |
| #string = string + "https://www.amazon.in/product-reviews/"+asin+"\n" | |
| #st.write(string) | |
| else: | |
| fig = create_graph(fig, df_data) | |
| df_len, deltaT, rate, ind_time_diff, ind_rating, ind_verified, ind_helped, count_of_day, count_of_five_star, ind_hun_days = getrate(df_data) | |
| #print(df_len) | |
| all_reviews.append(str(df_len)) | |
| all_time_diff.append(ind_time_diff) | |
| all_rating.append(ind_rating) | |
| all_verified.append(ind_verified) | |
| all_helped.append(ind_helped) | |
| all_count_of_day.append(count_of_day) | |
| all_five_star.append(count_of_five_star) | |
| all_hun_days.append(ind_hun_days) | |
| prime=True | |
| except botocore.exceptions.ClientError: | |
| st.info("Request sent for " + asin) | |
| create_df = pd.DataFrame({"title":[], "content": [], 'date':[], "author": [], "rating":[], "product":[], "url":[], "verified":[], "helped": [], "amazon_rating": []}) | |
| bucket = 'productreviewsdata' | |
| csv_buffer = StringIO() | |
| create_df.to_csv(csv_buffer, index=False) | |
| res.Object(bucket, 'alldata/'+asin+'.csv').put(Body=csv_buffer.getvalue()) | |
| string = string + "https://www.amazon.in/product-reviews/"+asin+"\n" | |
| prime=False | |
| dataf = pd.DataFrame({'Product': [], | |
| 'Our Rating': [], | |
| 'Total Verified Purchases': [], | |
| 'No. of Verified Purchases in last 100 days':[], | |
| 'No. of Verified Purchases that have 5 stars in the last 100 days':[], | |
| 'Amazon Rating': [], | |
| 'URL': []}) | |
| if prime and len(all_time_diff) == len(st.session_state["linksFinal"]): | |
| fig.update_layout( | |
| title="Graph of reviews", | |
| xaxis_title="Date", | |
| yaxis_title="No. of Reviews", | |
| legend_title="Products", | |
| font=dict( | |
| family="Courier New, monospace", | |
| color="black")) | |
| rates = relative_rates(all_time_diff, all_rating, all_verified, all_helped) | |
| for record in range(0, len(urls_used)): | |
| #dataf.append([product_names[record], all_reviews[record], rates[record], all_amazon_ratings[record]]) | |
| to_insert = { | |
| 'Product': product_names[record][:70]+"...", | |
| 'Our Rating': rates[record], | |
| 'Total Verified Purchases': all_reviews[record], | |
| 'No. of Verified Purchases in last 100 days': str(all_count_of_day[record]), | |
| 'No. of Verified Purchases that have 5 stars in the last 100 days': str(all_five_star[record]), | |
| 'Amazon Rating': all_amazon_ratings[record], | |
| 'URL': urls_used[record] | |
| } | |
| dataf = dataf.append(to_insert, ignore_index=True) | |
| dataf = dataf.sort_values(by=['Our Rating'], ascending=False) | |
| dataf.set_index('Product', inplace=True) | |
| stat.empty() | |
| #st.table(dataf.style.format({"Total Reviews": "{:.0f}"})) | |
| st.table(dataf) | |
| st.plotly_chart(fig) | |
| #st.dataframe(dataf) | |
| else: | |
| stat.empty() | |
| #reqs_spin.empty() | |
| spin.info("Your request is being processed...") | |
| time.sleep(10) | |
| #st.write(string) | |
| return string | |
| def save_data_in_session(string, prime_session, sessions_here): | |
| if prime_session ==True: | |
| s_check = string.split("\n") | |
| try: | |
| while True: | |
| s_check.remove("") | |
| except ValueError: | |
| pass | |
| print("THIS") | |
| print(s_check) | |
| if len(s_check) != len(st.session_state.linksFinal): | |
| pass | |
| else: | |
| for ses in sessions_here: | |
| ses_check = ses.split("\n") | |
| try: | |
| while True: | |
| ses_check.remove("") | |
| except ValueError: | |
| pass | |
| print("ses_check") | |
| print(ses_check) | |
| if set(s_check) == set(ses_check): | |
| break | |
| else: | |
| print("HIIIIIIIIIIIIII") | |
| string = st.session_state.dataInBucket+",\n"+string | |
| st.success("Session Saved") | |
| res.Object('productreviewsdata', 'sessions/'+st.session_state["iden"]).put(Body=string) | |
| else: | |
| s_check = string.split("\n") | |
| try: | |
| while True: | |
| s_check.remove("") | |
| except ValueError: | |
| pass | |
| if len(s_check) !=len(st.session_state.linksFinal): | |
| pass | |
| else: | |
| st.success("Session Saved") | |
| res.Object('productreviewsdata', 'sessions/'+st.session_state["iden"]).put(Body=string) | |