Spaces:
Configuration error
Configuration error
Upload Scheduler_Covid.py
Browse files- Scheduler/Scheduler_Covid.py +116 -3
Scheduler/Scheduler_Covid.py
CHANGED
|
@@ -1,3 +1,116 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
"""
|
| 4 |
+
Created on Thu Sep 23 09:27:21 2021
|
| 5 |
+
|
| 6 |
+
@author: benjaminull
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import numpy as np
|
| 12 |
+
from datetime import timedelta
|
| 13 |
+
import requests
|
| 14 |
+
import io
|
| 15 |
+
import openpyxl
|
| 16 |
+
from pandas import ExcelWriter
|
| 17 |
+
import requests
|
| 18 |
+
from bs4 import BeautifulSoup
|
| 19 |
+
from bs4 import BeautifulSoup
|
| 20 |
+
import pandas as pd
|
| 21 |
+
from selenium import webdriver
|
| 22 |
+
import requests
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def GenerarExcel(ruta_guardado, Pestañas, Data):
|
| 27 |
+
wb = openpyxl.Workbook()
|
| 28 |
+
writer = ExcelWriter(ruta_guardado)
|
| 29 |
+
for pestaña in Pestañas:
|
| 30 |
+
wb.create_sheet(pestaña)
|
| 31 |
+
std = wb.get_sheet_by_name('Sheet')
|
| 32 |
+
wb.remove_sheet(std)
|
| 33 |
+
wb.save(ruta_guardado)
|
| 34 |
+
for i in range(len(Pestañas)):
|
| 35 |
+
print(Data[i])
|
| 36 |
+
Data[i].to_excel(writer, sheet_name=Pestañas[i])
|
| 37 |
+
writer.save()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def run_data_covid():
|
| 41 |
+
options = webdriver.ChromeOptions()
|
| 42 |
+
options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
|
| 43 |
+
path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
|
| 44 |
+
browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
|
| 45 |
+
# Ir a página deseada
|
| 46 |
+
url = 'https://covid19.apple.com/mobility'
|
| 47 |
+
browser.get(url)
|
| 48 |
+
page = requests.get(url)
|
| 49 |
+
html=browser.page_source
|
| 50 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 51 |
+
link = str(soup.find_all("a")[1]).split('"')[1]
|
| 52 |
+
link_apple = "https://covid19.apple.com/mobility"
|
| 53 |
+
r = requests.get(link_apple)
|
| 54 |
+
soup = BeautifulSoup(r.text, 'lxml')
|
| 55 |
+
data_agg = pd.DataFrame()
|
| 56 |
+
i = 0
|
| 57 |
+
for chunk in pd.read_csv(
|
| 58 |
+
'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
|
| 59 |
+
usecols=['country_region', 'date',
|
| 60 |
+
'retail_and_recreation_percent_change_from_baseline',
|
| 61 |
+
'grocery_and_pharmacy_percent_change_from_baseline',
|
| 62 |
+
'parks_percent_change_from_baseline',
|
| 63 |
+
'transit_stations_percent_change_from_baseline',
|
| 64 |
+
'workplaces_percent_change_from_baseline'],
|
| 65 |
+
dtype = {"workplaces_percent_change_from_baseline":
|
| 66 |
+
"float32",
|
| 67 |
+
"parks_percent_change_from_baseline": "float32",
|
| 68 |
+
"retail_and_recreation_percent_change_from_baseline":
|
| 69 |
+
"float32",
|
| 70 |
+
'transit_stations_percent_change_from_baseline':
|
| 71 |
+
"float32",
|
| 72 |
+
},chunksize = 150000):
|
| 73 |
+
if i == 65:
|
| 74 |
+
break
|
| 75 |
+
i=i+1
|
| 76 |
+
data_agg = pd.concat([data_agg, chunk], ignore_index=True)
|
| 77 |
+
data_agg.info(memory_usage="deep")
|
| 78 |
+
data_agg.set_index(['country_region', 'date'], inplace=True)
|
| 79 |
+
data_agg = data_agg.groupby(level=[0, 1]).mean()
|
| 80 |
+
data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
|
| 81 |
+
yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
|
| 82 |
+
url=f''+link
|
| 83 |
+
CONFIRMED_CONTENT = requests.get(url).content
|
| 84 |
+
data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
|
| 85 |
+
error_bad_lines=False)
|
| 86 |
+
# Dejamos solo la data a nivel pais
|
| 87 |
+
data_app.info(memory_usage="deep")
|
| 88 |
+
data_app = data_app.loc[data_app['geo_type'] == 'country/region']
|
| 89 |
+
data_app = data_app.drop(columns=['geo_type', 'country',
|
| 90 |
+
'alternative_name', 'sub-region'])
|
| 91 |
+
data_app = data_app.set_index(['region', 'transportation_type']).stack()
|
| 92 |
+
data_app = data_app.unstack(level='transportation_type') - 100
|
| 93 |
+
data_app.index.names = data_agg.index.names
|
| 94 |
+
data_agg = data_agg.join(data_app)
|
| 95 |
+
print(data_app.columns)
|
| 96 |
+
mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
|
| 97 |
+
'parks_google', 'transit_stations_google',
|
| 98 |
+
'workplaces_google', 'driving', 'transit', 'walking']
|
| 99 |
+
data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
|
| 100 |
+
regiones = {}
|
| 101 |
+
regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
|
| 102 |
+
'Mexico', 'Peru']
|
| 103 |
+
regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
|
| 104 |
+
regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
|
| 105 |
+
'Thailand', 'Indonesia']
|
| 106 |
+
regiones['USA'] = ['United States']
|
| 107 |
+
# regiones['Israel'] = ['Israel']
|
| 108 |
+
data_dict = {}
|
| 109 |
+
for col in data_agg.columns:
|
| 110 |
+
df = data_agg[col].unstack().T.rolling(7, 3).mean()
|
| 111 |
+
for region, paises in regiones.items():
|
| 112 |
+
df[region] = df[paises].mean(1)
|
| 113 |
+
data_dict[col] = df
|
| 114 |
+
GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
|
| 115 |
+
list(data_dict.values()))
|
| 116 |
+
np.save('Scheduler/dict_movilidad.npy', data_dict)
|