import jieba
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from wordcloud import WordCloud
from PIL import Image
plt.rcParams["font.sans-serif"] = ["SimHei"] # 设置字体
plt.rcParams["axes.unicode_minus"] = False # 该语句解决图像中的“-”负号的乱码问题
"""数据清洗"""
class FX:
def data_clean(self):
df = pd.read_csv('python.csv')
# print(df.columns)
"""去除空值"""
df = df.dropna(subset=['positionName', 'salary'], how='any')
# print(df.isnull().sum())
""" 去除重复 """
df = df.drop_duplicates(subset=['companyFullName', 'positionName', 'salary'])
# print(df.shape)
"""处理薪资问题"""
df['min-salary'] = df['salary'].apply(lambda i: int(i.split('-')[0].strip('k')))
df['max-salary'] = df['salary'].apply(lambda i: int(i.split('-')[1].strip('k')))
df['avg-salary'] = (df['min-salary'] + df['max-salary']) / 2
# print(df['avg-salary'
python 数据分析 操作 柱状图和词云分析
于 2024-07-10 17:31:36 首次发布