EDA All Functions
EDA All Functions
import libraries
In [2]: import pandas as pd
Out[7]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zone O
Group
Out[10]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zone
Group
Madhya
11248 1001209 Oshin P00201342 F 36-45 40 0 Central
Pradesh
the DataFrame.
In [8]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11251 entries, 0 to 11250
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 User_ID 11251 non-null int64
1 Cust_name 11251 non-null object
2 Product_ID 11251 non-null object
3 Gender 11251 non-null object
4 Age Group 11251 non-null object
5 Age 11251 non-null int64
6 Marital_Status 11251 non-null int64
7 State 11251 non-null object
8 Zone 11251 non-null object
9 Occupation 11251 non-null object
10 Product_Category 11251 non-null object
11 Orders 11251 non-null int64
12 Amount 11239 non-null float64
13 Status 0 non-null float64
14 unnamed1 0 non-null float64
dtypes: float64(3), int64(4), object(8)
memory usage: 1.3+ MB
(11251, 15)
Out[11]:
User_ID int64
Out[14]:
Cust_name object
Product_ID object
Gender object
Age Group object
Age int64
Marital_Status int64
State object
Zone object
Occupation object
Product_Category object
Orders int64
Amount float64
Status float64
unnamed1 float64
dtype: object
Out[15]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zone Occupatio
Group
0 False False False False False False False False False Fal
1 False False False False False False False False False Fal
2 False False False False False False False False False Fal
3 False False False False False False False False False Fal
4 False False False False False False False False False Fal
... ... ... ... ... ... ... ... ... ...
11246 False False False False False False False False False Fal
11247 False False False False False False False False False Fal
11248 False False False False False False False False False Fal
11249 False False False False False False False False False Fal
11250 False False False False False False False False False Fal
User_ID 0
Out[16]:
Cust_name 0
Product_ID 0
Gender 0
Age Group 0
Age 0
Marital_Status 0
State 0
Zone 0
Occupation 0
Product_Category 0
Orders 0
Amount 12
Status 11251
unnamed1 11251
dtype: int64
Out[17]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zone Occupation Pro
Group
df.drop('column name',axis=1,inplace=True)
Removes Missing values from column.
In [23]: df.drop('unnamed1',axis=1,inplace=True)
Out[27]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zo
Group
Madhya
11248 1001209 Oshin P00201342 F 36-45 40 0 Cent
Pradesh
df['column name'].fillna(df['column
name'].mean(),inplace=True)
In [33]: df['Amount'].fillna(df['Amount'].mean(),inplace=True)
User_ID 0
Out[34]:
Cust_name 0
Product_ID 0
Gender 0
Age Group 0
Age 0
Marital_Status 0
State 0
Zone 0
Occupation 0
Product_Category 0
Orders 0
Amount 0
Status 11251
dtype: int64
Product_ID Cust_name
P00000142 Adrian 19.0
Akshat 27.0
Armstrong
34.0
Arun 33.0
Atkinson46.0
...
P0099442 Amol 26.0
Astrea 35.0
Grant 32.0
Siddharth 36.0
P0099742 Shatayu 13.0
Name: Age, Length: 10948, dtype: float64
Product_ID Cust_name
Out[54]:
P00000142 Adrian 19.0
Akshat 27.0
Armstrong
34.0
Arun 33.0
Atkinson46.0
...
P0099442 Amol 26.0
Astrea 35.0
Grant 32.0
Siddharth 36.0
P0099742 Shatayu 13.0
Name: Age, Length: 10948, dtype: float64
df.sort_values(by='Amount')
Out[59]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zo
Group
Madhya
11248 1001209 Oshin P00201342 F 36-45 40 0 Cent
Pradesh
Out[56]: Age
User_ID Cust_name Product_ID Gender Age Marital_Status State Zone
Group
Madhya
10774 1001926 Barton P00157542 M 0-17 12 1 Central
Pradesh
... ... ... ... ... ... ... ... ... ...
Madhya
2951 1002204 Dilbeck P00246642 M 55+ 92 0 Central
Pradesh
Uttar
1106 1001176 Alice P00128942 M 55+ 92 0 Central
Pradesh
Uttar
612 1002526 Shreya P00271142 M 55+ 92 1 Central
Pradesh