Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""SummarizingData.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1Wo7aUHTjFTRVpiK4efjRHI2gsA6fRip5 | |
""" | |
# Import pandas | |
import pandas as pd | |
# Use pandas to read in recent_grads_url | |
recent_grads = pd.read_csv("/content/recent_grads.csv") | |
# Print the shape | |
print(recent_grads.shape) | |
from google.colab import drive | |
drive.mount('/content/drive') | |
# Print .dtypes | |
print(recent_grads.dtypes) | |
# Output summary statistics | |
print(recent_grads.describe()) | |
# Exclude data of type object | |
print(recent_grads.describe(exclude=["object"])) | |
# Names of the columns we're searching for missing values | |
columns = ['median', 'p25th', 'p75th'] | |
# Take a look at the dtypes | |
print(recent_grads[columns].dtypes) | |
# Find how missing values are represented | |
print(recent_grads["median"].unique()) | |
# Replace missing values with NaN | |
for column in columns: | |
recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan | |
import numpy as np | |
import pandas as pd | |
# Assuming 'recent_grads' is your DataFrame and 'columns' is a list of columns needing correction | |
# Replace missing values with NaN | |
for column in columns: | |
recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan | |
# Select sharewomen column | |
sw_col = recent_grads['sharewomen'] | |
# Output first five rows | |
print(sw_col.head()) | |
# Import numpy | |
import numpy as np | |
# Use max to output maximum values | |
max_sw = recent_grads['sharewomen'].max() | |
# Print column max | |
print(max_sw) | |
# Output the row containing the maximum percentage of women | |
#print(sw_col) | |
print(recent_grads[(recent_grads['sharewomen']==max_sw)]) | |
# Convert to numpy array | |
import numpy as np | |
recent_grads_np=np.array(recent_grads[['unemployed', 'low_wage_jobs']]) | |
# Print the type of recent_grads_np | |
print(type(recent_grads_np)) | |
print(np.corrcoef(recent_grads_np[:,0], recent_grads_np[:,1])) |