1、读取本地数据库
import pymysql
import numpy as npy
import pandas as pda
import matplotlib.pylab as pyl
conn = pymysql.connect(host="localhost",user="root",passwd="360421",db="employees") #读取本地数据库
sql = "select * from salaries limit 0,10000" #SQL语句选择要读取的数据
data=pda.read_sql(sql,conn)
print(data.describe)
len(data) #对比看有没有缺失
#发现缺失值
data2=data.T
salary=data2.values[1]
from_date=data2.values[2]
pyl.plot(from_date,salary,'o')
pyl.show()
2、读取结果未发现异常值
3、数据发现异常数据
准备csv文件设置好缺失值和异常值
data=pda.read_csv("E:\AWorkStation01\数据挖掘\square.csv") #读取
data1=data.T #转置
row=data1.values[0]
col=data1.values[1]
pyl.plot(row,col,'o')
pyl.show()
结果
清洗异常数据
data1=data.values #不能直接修改data的值
for i in range(len(data1)): #找出不满足条件的数据并修正
if(data1[i][1]!=(i+1)*(i+1)):
data1[i][1]=(i+1)*(i+1)
data2=data1.T
x=data2[0]
y=data2[1]
pyl.plot(x,y,'o')
pyl.show()