import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('./googleplaystore.csv', usecols=(0, 1, 2, 3, 4, 5, 6, 7))
df.head()
|
App |
Category |
Rating |
Reviews |
Size |
Installs |
Type |
Price |
0 |
Photo Editor & Candy Camera & Grid & ScrapBook |
ART_AND_DESIGN |
4.1 |
159 |
19M |
10,000+ |
Free |
0 |
1 |
Coloring book moana |
ART_AND_DESIGN |
3.9 |
967 |
14M |
500,000+ |
Free |
0 |
2 |
U Launcher Lite – FREE Live Cool Themes, Hide ... |
ART_AND_DESIGN |
4.7 |
87510 |
8.7M |
5,000,000+ |
Free |
0 |
3 |
Sketch - Draw & Paint |
ART_AND_DESIGN |
4.5 |
215644 |
25M |
50,000,000+ |
Free |
0 |
4 |
Pixel Draw - Number Art Coloring Book |
ART_AND_DESIGN |
4.3 |
967 |
2.8M |
100,000+ |
Free |
0 |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 8 columns):
App 10841 non-null object
Category 10841 non-null object
Rating 9367 non-null float64
Reviews 10841 non-null object
Size 10841 non-null object
Installs 10841 non-null object
Type 10840 non-null object
Price 10841 non-null object
dtypes: float64(1), object(7)
memory usage: 677.7+ KB
df.shape
(10841, 8)
df.count()
App 10841
Category 10841
Rating 9367
Reviews 10841
Size 10841
Installs 10841
Type 10840
Price 10841
dtype: int64
len(df[df.duplicated()])
485
pd.unique(df['App']).size
9660
df['Category'].value_counts(dropna=False)
df.drop(df[df['Category'] == '1.9'].index, inplace = True)
df['Rating'].value_counts(dropna = False)