import pandas as pd
# 加载数据
users = pd.read_excel('users.xlsx')
# print(users)
# 删除数据用drop
# labels 指定删除的列或者行名称
# axis = 0 删除行
# axis = 1 删除列
# inplace = True 替换原来数据
data = users.drop(labels=['age','sex'], axis=1, inplace=True)
print(users)
删除空值列
# 首先 整列为空,对于数据分析毫无意义
# pd.count()--->非空的数目
# drop -->删除空列
import pandas as pd
# 加载数据
detail = pd.read_excel('meal_order_detail.xlsx')
# print(detail.columns)
columns = detail.columns
# print(type(columns))
# for col in columns:
# # print(col)
# if (detail[col].count()) == 0:#全空列
# print(col)
# detail.drop(labels=col, axis=1, inplace=True)
# print(detail)
col = detail.count() == 0
# print(col)
length = len(col)
# print(length)
for i in range(length):
if col[i]:
detail.drop(labels=col.index[i],axis=1,inplace=True)
print("detail 的形状:",detail.shape)
#删除空列
#数据去重
#相似度运算
# 删除数据
# drop()
import pandas as pd
# 去重
# pd.drop_duplicates()
# 加载数据
detail = pd.read_excel('meal_order_detail.xlsx')
# 单列数据去重
# print(detail.columns)
# print(detail.shape)
# data = detail[['counts','amounts']].drop_duplicates()
# print(data.shape)
# 多列数据去重
data1 = detail.drop_duplicates(subset=['counts', 'amounts'], inplace=True)
print(data1.shape)