Descriptive Satistics

In [3]:
import scipy as sp
import pandas as pd
import matplotlib as mpl
import seaborn as sns
In [4]:
traffic = {'Int. 1': [11,7,14,11,43,38,61,75,38,28,12,18,18,17,19,32,42,57,44,114,35,11,13,10],
'Int. 2': [11,13,17,13,51,46,132,135,88,36,12,27,19,15,36,47,65,66,55,145,58,12,9,9],
'Int. 3': [9,11,20,9,69,76,186,180,115,55,14,30,29,18,48,10,92,151,90,257,68,15,15,7]
}
In [17]:
df_traffic = pd.DataFrame(traffic, columns = ['Int. 1', 'Int. 2', 'Int. 3'])
In [18]:
df_traffic
Out[18]:
Int. 1 Int. 2 Int. 3
0 11 11 9
1 7 13 11
2 14 17 20
3 11 13 9
4 43 51 69
5 38 46 76
6 61 132 186
7 75 135 180
8 38 88 115
9 28 36 55
10 12 12 14
11 18 27 30
12 18 19 29
13 17 15 18
14 19 36 48
15 32 47 10
16 42 65 92
17 57 66 151
18 44 55 90
19 114 145 257
20 35 58 68
21 11 12 15
22 13 9 15
23 10 9 7
In [19]:
df_traffic.mean()
Out[19]:
Int. 1    32.000000
Int. 2    46.541667
Int. 3    65.583333
dtype: float64
In [20]:
df_traffic.median()
Out[20]:
Int. 1    23.5
Int. 2    36.0
Int. 3    39.0
dtype: float64
In [21]:
df_traffic.mode()
Out[21]:
Int. 1 Int. 2 Int. 3
0 11.0 9 9.0
1 NaN 12 15.0
2 NaN 13 NaN
3 NaN 36 NaN
In [22]:
df_traffic['Int. 1'].mean()
Out[22]:
32.0
In [23]:
df_traffic['Int. 1'].median()
Out[23]:
23.5
In [24]:
df_traffic['Int. 1'].mode()
Out[24]:
0    11
dtype: int64
In [25]:
df_traffic.skew()
Out[25]:
Int. 1    1.712357
Int. 2    1.310520
Int. 3    1.422015
dtype: float64
In [26]:
df_traffic['Int. 1'].skew()
Out[26]:
1.7123565316265639
In [27]:
print("{0:.4f}".format(df_traffic['Int. 3'].skew()))
1.4220
In [28]:
df_traffic.kurt()
Out[28]:
Int. 1    3.589131
Int. 2    0.891532
Int. 3    1.489980
dtype: float64
In [29]:
df_traffic['Int. 1'].kurt()
Out[29]:
3.5891314136372086
In [31]:
print("{:.4f}".format(df_traffic['Int. 3'].kurt()))
1.4900
In [32]:
df_traffic.quantile([0.25, 0.5, 0.75])
Out[32]:
Int. 1 Int. 2 Int. 3
0.25 12.75 13.00 14.75
0.50 23.50 36.00 39.00
0.75 42.25 59.75 90.50
In [33]:
df_traffic['Int. 1'].quantile()
Out[33]:
23.5
In [34]:
df_traffic.min()
Out[34]:
Int. 1    7
Int. 2    9
Int. 3    7
dtype: int64
In [35]:
df_traffic.max()
Out[35]:
Int. 1    114
Int. 2    145
Int. 3    257
dtype: int64
In [37]:
range = (df_traffic.max()-df_traffic.min())
In [38]:
range
Out[38]:
Int. 1    107
Int. 2    136
Int. 3    250
dtype: int64
In [39]:
range_calc = {'min': df_traffic.min(), 'max': df_traffic.max(), 'range': range}
In [40]:
df_range_calc = pd.DataFrame(range_calc)
df_range_calc
Out[40]:
min max range
Int. 1 7 114 107
Int. 2 9 145 136
Int. 3 7 257 250
In [42]:
df_traffic.var()
Out[42]:
Int. 1     643.652174
Int. 2    1714.432971
Int. 3    4627.818841
dtype: float64
In [43]:
df_traffic.std()
Out[43]:
Int. 1    25.370301
Int. 2    41.405712
Int. 3    68.028074
dtype: float64
In [44]:
df_traffic.mean()
Out[44]:
Int. 1    32.000000
Int. 2    46.541667
Int. 3    65.583333
dtype: float64
In [45]:
my_calc = {'var': df_traffic.var(), 'std': df_traffic.max(), 'mean': df_traffic.mean()}
In [46]:
df_my_calc = pd.DataFrame(my_calc)
df_my_calc
Out[46]:
var std mean
Int. 1 643.652174 114 32.000000
Int. 2 1714.432971 145 46.541667
Int. 3 4627.818841 257 65.583333
In [47]:
print("{0:.4f}".format(df_my_calc) # fix me!
  File "<ipython-input-47-517412dd2e80>", line 1
    print("{0:.4f}".format(df_my_calc)
                                      ^
SyntaxError: unexpected EOF while parsing
In [49]:
pd.DataFrame.boxplot(df_traffic)
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x1d1655224c8>
In [50]:
grades = [48,57,58,65,68,69,71,73,73,74,75,77,78,78,78,79,80,85,87,88,89,89,89,95,96,97,99]
In [101]:
df_grades = pd.DataFrame(grades)
df_grades
Out[101]:
0
0 48
1 57
2 58
3 65
4 68
5 69
6 71
7 73
8 73
9 74
10 75
11 77
12 78
13 78
14 78
15 79
16 80
17 85
18 87
19 88
20 89
21 89
22 89
23 95
24 96
25 97
26 99
In [74]:
df_grades.min()
Out[74]:
0    48
dtype: int64
In [75]:
df_grades.max()
Out[75]:
0    99
dtype: int64
In [76]:
df_grades.median()
Out[76]:
0    78.0
dtype: float64
In [77]:
Q1 = df_grades.quantile([0.25])
Q1
Out[77]:
0
0.25 72.0
In [78]:
Q3 = df_grades.quantile([0.75])
Q3
Out[78]:
0
0.75 88.5
In [91]:
grade_calc = {'min': df_grades.min(),
              'max': df_grades.max(),
              'Q1': df_grades.quantile([0.25]).iloc[0], #iloc used to remove the 0.25 which is not part of my data (see out 77)
              'Q3': df_grades.quantile([0.75]).iloc[0],
              'median': df_grades.median()
             }
In [94]:
df2=pd.DataFrame(grade_calc)
In [99]:
df2
Out[99]:
min max Q1 Q3 median
0 48 99 72.0 88.5 78.0
In [102]:
pd.DataFrame.boxplot(df2)
Out[102]:
<matplotlib.axes._subplots.AxesSubplot at 0x1d165b34e48>
In [103]:
sns.boxplot(data=df_traffic)
Out[103]:
<matplotlib.axes._subplots.AxesSubplot at 0x1d165b4a388>
In [104]:
# 5 number summary on 1st column

Data = df_traffic['Int. 1']
Quartiles = Data.quantile([.25, .5, .75])

print('min: %.3f' %Data.min())
print('Q1: %.3f' %Quartiles.iloc[0])
print('median: %.3f' %Data.iloc[1])
print('Q1: %.3f' %Quartiles.iloc[2])
print('max: %.3f' %Data.max())
min: 7.000
Q1: 12.750
median: 7.000
Q1: 42.250
max: 114.000
In [105]:
df_traffic.describe()
Out[105]:
Int. 1 Int. 2 Int. 3
count 24.000000 24.000000 24.000000
mean 32.000000 46.541667 65.583333
std 25.370301 41.405712 68.028074
min 7.000000 9.000000 7.000000
25% 12.750000 13.000000 14.750000
50% 23.500000 36.000000 39.000000
75% 42.250000 59.750000 90.500000
max 114.000000 145.000000 257.000000
In [106]:
df_grades.describe()
Out[106]:
0
count 27.000000
mean 78.333333
std 12.749057
min 48.000000
25% 72.000000
50% 78.000000
75% 88.500000
max 99.000000
In [ ]: