import scipy as sp
import pandas as pd
import matplotlib as mpl
import seaborn as sns
traffic = {'Int. 1': [11,7,14,11,43,38,61,75,38,28,12,18,18,17,19,32,42,57,44,114,35,11,13,10],
'Int. 2': [11,13,17,13,51,46,132,135,88,36,12,27,19,15,36,47,65,66,55,145,58,12,9,9],
'Int. 3': [9,11,20,9,69,76,186,180,115,55,14,30,29,18,48,10,92,151,90,257,68,15,15,7]
}
df_traffic = pd.DataFrame(traffic, columns = ['Int. 1', 'Int. 2', 'Int. 3'])
df_traffic
df_traffic.mean()
df_traffic.median()
df_traffic.mode()
df_traffic['Int. 1'].mean()
df_traffic['Int. 1'].median()
df_traffic['Int. 1'].mode()
df_traffic.skew()
df_traffic['Int. 1'].skew()
print("{0:.4f}".format(df_traffic['Int. 3'].skew()))
df_traffic.kurt()
df_traffic['Int. 1'].kurt()
print("{:.4f}".format(df_traffic['Int. 3'].kurt()))
df_traffic.quantile([0.25, 0.5, 0.75])
df_traffic['Int. 1'].quantile()
df_traffic.min()
df_traffic.max()
range = (df_traffic.max()-df_traffic.min())
range
range_calc = {'min': df_traffic.min(), 'max': df_traffic.max(), 'range': range}
df_range_calc = pd.DataFrame(range_calc)
df_range_calc
df_traffic.var()
df_traffic.std()
df_traffic.mean()
my_calc = {'var': df_traffic.var(), 'std': df_traffic.max(), 'mean': df_traffic.mean()}
df_my_calc = pd.DataFrame(my_calc)
df_my_calc
print("{0:.4f}".format(df_my_calc) # fix me!
pd.DataFrame.boxplot(df_traffic)
grades = [48,57,58,65,68,69,71,73,73,74,75,77,78,78,78,79,80,85,87,88,89,89,89,95,96,97,99]
df_grades = pd.DataFrame(grades)
df_grades
df_grades.min()
df_grades.max()
df_grades.median()
Q1 = df_grades.quantile([0.25])
Q1
Q3 = df_grades.quantile([0.75])
Q3
grade_calc = {'min': df_grades.min(),
'max': df_grades.max(),
'Q1': df_grades.quantile([0.25]).iloc[0], #iloc used to remove the 0.25 which is not part of my data (see out 77)
'Q3': df_grades.quantile([0.75]).iloc[0],
'median': df_grades.median()
}
df2=pd.DataFrame(grade_calc)
df2
pd.DataFrame.boxplot(df2)
sns.boxplot(data=df_traffic)
# 5 number summary on 1st column
Data = df_traffic['Int. 1']
Quartiles = Data.quantile([.25, .5, .75])
print('min: %.3f' %Data.min())
print('Q1: %.3f' %Quartiles.iloc[0])
print('median: %.3f' %Data.iloc[1])
print('Q1: %.3f' %Quartiles.iloc[2])
print('max: %.3f' %Data.max())
df_traffic.describe()
df_grades.describe()