Spaces:
Runtime error
Runtime error
File size: 1,903 Bytes
27900ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# -*- coding: utf-8 -*-
"""SummarizingData.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1Wo7aUHTjFTRVpiK4efjRHI2gsA6fRip5
"""
# Import pandas
import pandas as pd
# Use pandas to read in recent_grads_url
recent_grads = pd.read_csv("/content/recent_grads.csv")
# Print the shape
print(recent_grads.shape)
from google.colab import drive
drive.mount('/content/drive')
# Print .dtypes
print(recent_grads.dtypes)
# Output summary statistics
print(recent_grads.describe())
# Exclude data of type object
print(recent_grads.describe(exclude=["object"]))
# Names of the columns we're searching for missing values
columns = ['median', 'p25th', 'p75th']
# Take a look at the dtypes
print(recent_grads[columns].dtypes)
# Find how missing values are represented
print(recent_grads["median"].unique())
# Replace missing values with NaN
for column in columns:
recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan
import numpy as np
import pandas as pd
# Assuming 'recent_grads' is your DataFrame and 'columns' is a list of columns needing correction
# Replace missing values with NaN
for column in columns:
recent_grads.loc[recent_grads[column] == 'UN', column] = np.nan
# Select sharewomen column
sw_col = recent_grads['sharewomen']
# Output first five rows
print(sw_col.head())
# Import numpy
import numpy as np
# Use max to output maximum values
max_sw = recent_grads['sharewomen'].max()
# Print column max
print(max_sw)
# Output the row containing the maximum percentage of women
#print(sw_col)
print(recent_grads[(recent_grads['sharewomen']==max_sw)])
# Convert to numpy array
import numpy as np
recent_grads_np=np.array(recent_grads[['unemployed', 'low_wage_jobs']])
# Print the type of recent_grads_np
print(type(recent_grads_np))
print(np.corrcoef(recent_grads_np[:,0], recent_grads_np[:,1])) |