Assignment -03
import pandas as pd
import numpy as np
In [2]:
from sklearn import datasets
iris = datasets.load_iris()
df = [Link](data= np.c_[iris['data'], iris['target']],
columns= iris['feature_names'] + ['target'])
df
Out[2]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
0 5.1 3.5 1.4 0.2 0.0
1 4.9 3.0 1.4 0.2 0.0
2 4.7 3.2 1.3 0.2 0.0
3 4.6 3.1 1.5 0.2 0.0
4 5.0 3.6 1.4 0.2 0.0
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 2.0
146 6.3 2.5 5.0 1.9 2.0
147 6.5 3.0 5.2 2.0 2.0
148 6.2 3.4 5.4 2.3 2.0
149 5.9 3.0 5.1 1.8 2.0
150 rows × 5 columns
In [3]:
#mean
print(df['petal length (cm)'].mean())
print(df['sepal length (cm)'].mean())
print(df['sepal width (cm)'].mean())
print(df['sepal width (cm)'].mean())
3.7580000000000005
5.843333333333334
3.0573333333333337
3.0573333333333337
In [4]:
#mode
print(df['petal length (cm)'].mode())
print(df['sepal length (cm)'].mode())
print(df['sepal width (cm)'].mode())
print(df['sepal width (cm)'].mode())
0 1.4
1 1.5
Name: petal length (cm), dtype: float64
0 5.0
Name: sepal length (cm), dtype: float64
0 3.0
Name: sepal width (cm), dtype: float64
0 3.0
Name: sepal width (cm), dtype: float64
In [5]:
#median
print(df['petal length (cm)'].median())
print(df['sepal length (cm)'].median())
print(df['sepal width (cm)'].median())
print(df['sepal width (cm)'].median())
4.35
5.8
3.0
3.0
In [6]:
#standard deviation
print(df['petal length (cm)'].std())
print(df['sepal length (cm)'].std())
print(df['sepal width (cm)'].std())
print(df['sepal width (cm)'].std())
1.7652982332594662
0.828066127977863
0.4358662849366982
0.4358662849366982
In [7]:
#minimun
print(df['petal length (cm)'].min())
print(df['sepal length (cm)'].min())
print(df['sepal width (cm)'].min())
print(df['sepal width (cm)'].min())
1.0
4.3
2.0
2.0
In [8]:
#maximum
print(df['petal length (cm)'].max())
print(df['sepal length (cm)'].max())
print(df['sepal width (cm)'].max())
print(df['sepal width (cm)'].max())
6.9
7.9
4.4
4.4
In [9]:
import [Link] as plt
[Link](figsize=(12, 8))
[Link](2, 2, 1)
[Link](df['petal length (cm)'], bins=10)
[Link]('Petal Length Distribution')
[Link]('Petal Length (cm)')
[Link]('Frequency')
[Link](2, 2, 2)
[Link](df['sepal length (cm)'], bins=10)
[Link]('Sepal Length Distribution')
[Link]('Sepal Length (cm)')
[Link]('Frequency')
[Link](2, 2, 3)
[Link](df['sepal width (cm)'], bins=10)
[Link]('Sepal Width Distribution')
[Link]('Sepal Width (cm)')
[Link]('Frequency')
[Link](2, 2, 4)
[Link](df['petal width (cm)'], bins=10)
[Link]('Petal Width Distribution')
[Link]('Petal Width (cm)')
[Link]('Frequency')
plt.tight_layout()
[Link]()
In [10]:
[Link](figsize=(12, 6))
[Link](1, 2, 1)
[Link](column=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'])
[Link]('Box Plots Before Outlier Removal')
Out[10]:
Text(0.5, 1.0, 'Box Plots Before Outlier Removal')
In [11]:
def remove_outliers_iqr(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
df_filtered = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
return df_filtered
for column in ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']:
df = remove_outliers_iqr(df, column)
In [34]:
[Link](1, 2, 2)
[Link](column=['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'])
[Link]('Box Plots After Outlier Removal')
plt.tight_layout()
[Link]()
In [ ]: