文章目录
1.分类
1.1 决策树
基础数据
数值,武器,子弹,血量,身边队友,行为
0,手枪,少,少,没,逃跑
1,机枪,中,中,有,战斗
2,,多,多,,躲藏
3,98K,多,中,有,战斗
4,mp,中,少,有,躲藏
5,m4,少,多,有,躲藏
6,AK47,少,多,有,战斗
7,巴雷特,中,多,没,战斗
8,AWM,少,中,有,躲藏
9,MP4,中,多,没,逃跑
10,MG,多,多,没,战斗
11,机枪,少,中,没,逃跑
12,巴雷特,少,少,没,逃跑
13,AK47,多,多,有,战斗
14,AWM,多,多,没,战斗
15,AK47,多,中,有,战斗
16,手枪,多,少,没,躲藏
decision_tree_t
'''
手动转换数据
ppt上示例(移除) 这个还是有点问题,以后要是碰到了再使用吧.
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
'''
数值 武器类型 子弹 血量 身边是否有队友 行为类别
0 手枪 少 少 没 逃跑
1 机枪 多 多 有 战斗
'''
#names = ['zidan','wuqi','xueliang','do_what','duiyou']
#df = pd.read_csv('fightrun.csv',encoding='utf-8',names=names) # 指定列名
# df = pd.read_csv(r'..\csv\fightrun.csv')
df = pd.read_csv('fightrun2.csv')
#df = df.ix[1:,['武器','子弹','血量','身边队友','行为']]
df.columns = ['#', 'zidan', 'wuqi', 'hp', 'duiyou', 'do']
df = df.drop('#', axis=1)
# 手动转
#df[(df=='手枪')|(df=='少')|(df=='没')|(df=='逃跑')] = 0
#df[(df=='机枪')|(df=='中')|(df=='有')|(df=='战斗')] = 1
#df[(df=='多')|(df=='躲藏')] = 2
#df = df.astype(int)
# 用LabelEncoder转
classle = LabelEncoder()
df['zidan'] = classle.fit_transform(df['zidan'].values)
df['wuqi'] = classle.fit_transform(df['wuqi'].values)
df['hp'] = classle.fit_transform(df['hp'].values)
df['duiyou'] = classle.fit_transform(df['duiyou'].values)
df['do'] = classle.fit_transform(df['do'].values)
X = df[['zidan','wuqi','hp','duiyou']]
y = df['do']
#print(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, test_size=0.5)
model = DTC().fit(X_train, y_train)
# 预测
# 预测一组特征
y_pred = model.predict([(1,1,0,1)])
# 预测n组特征
y_pred = model.predict(X_test) # 预测出的结果
# 预测结果,
print(metrics.classification_report(y_test, y_pred))
print(metrics.confusion_matrix(y_test, y_pred))
# 说明
# precision recall f1-score support
#
# 0 0.73 0.89 0.80 9
# 1 0.95 0.87 0.91 23
#
#avg / total 0.89 0.88 0.88 32
# 以上为metrics.classification_report的返回结果,
#[[ 8 1]
# [ 3 20]]
# 上面两行metrics.confusion_matrix的结果
# 其中precision = 8 / (8 + 3)
# recall = 8 / (8 + 1)
# f1-score = 2 * (precision * recall) / (precision + recall)
# support是权重
# avg / total 是加权均值 比如recall = (0.89*9+0.87*23)/(9+23)
probas_ = model.predict_proba(X_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, probas_[:, 1], pos_label=1)
#
## 计算AUC值
auc = metrics.auc(fpr, tpr, reorder=False)
print('auc is ', auc)
#
## 作ROC曲线
plt.plot(fpr, tpr, linewidth=2, label = 'ROC', color = 'green') #作出ROC曲线
plt.xlabel('False Positive Rate') #坐标轴标签
plt.ylabel('True Positive Rate') #坐标轴标签
plt.ylim(0, 1.05) #边界范围
plt.xlim(0, 1.05) #边界范围
plt.show()
decision_tree_t2
'''
手动转换数据
3种结果行为,战斗,逃跑,躲藏,
有空值
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier as DTC # 决策树分类
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
'''
数值 武器类型 子弹 血量 身边队友 行为类别
0 手枪 少 少 没 逃跑
1 机枪 中 中 有 战斗
2 多 多 躲藏
'''
# (1)读取数据
# df = pd.read_csv(r'..\csv\fightrun2.csv')
df = pd.read_csv('fightrun2.csv')
df = df.ix[1:,['子弹','武器','血量','身边队友','行为']] # 读取的列要么都是字符,要么都是数字
df = df.dropna()
print(df)
# 子弹 武器 血量 身边队友 行为
# 1 中 机枪 中 有 战斗
# 3 多 98K 中 有 战斗
# 4 中 mp 少 有 躲藏
# 5 少 m4 多 有 躲藏
# (2)用LabelEncoder转
classle = LabelEncoder()
df['武器'] = classle.fit_transform(df['武器'].values)
df['子弹'] = classle.fit_transform(df['子弹'].values)
df['血量'] = classle.fit_transform(df['血量'].values)
df['身边队友'] = classle.fit_transform(df['身边队友'].values)
df['行为'] = classle.fit_transform(df['行为'].values)
print("****************")
print(classle.classes_) # ['战斗' '躲藏' '逃跑'] 0 1 2 这个是最后一个行为的classle
# (3) DecisionTreeClassifier as DTC 决策树模型
X = df[['武器','子弹','血量','身边队友']]
print(X)
# 武器 子弹 血量 身边队友
# 1 9 0 0 0
# 3 0 1 0 0
# 4 6 0 2 0
# 5 5 2 1 0
y = df['行为']#.astype(int)
print(y)
# 1 0
# 3 0
# 4 1
# 5 1
# 6 0
# 7 0
# 8 1
# 9 2
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4)
model = DTC()
model.fit(X_train, y_train)
# (4)预测
y_pred = model.predict(X_test) # 预测出的结果
# 打印预测结果,
print(metrics.classification_report(y_test, y_pred))
# precision recall f1-score support
#
# 0 1.00 0.25 0.40 4
# 1 0.25 1.00 0.40 1
# 2 0.00 0.00 0.00 1
#
# micro avg 0.33 0.33 0.33 6
# macro avg 0.42 0.42 0.27 6
# weighted avg 0.71 0.33 0.33 6
print("*******************************")
# 混淆矩阵
print(metrics.confusion_matrix(y_test, y_pred))
# [[1 2 1]
# [0 1 0]
# [0 1 0]]
# 预测
n = model.predict([[0, 0, 0, 1]])
print(n) # 0
print(classle.inverse_transform(n)) # ['战斗']
n = model.predict([[1, 0, 0, 0,]])
print(n) # 0 将label换成相应的值
print(classle.inverse_transform(n)) # ['战斗']
1.2 贝叶斯
1.2.1 案例1
数据源头1
数值,武器,子弹,血量,身边队友,行为
0,手枪,少,少,没,逃跑
1,机枪,中,中,有,战斗
2,,多,多,,躲藏
3,98K,多,中,有,战斗
4,mp,中,少,有,躲藏
5,m4,少,多,有,躲藏
6,AK47,少,多,有,战斗
7,巴雷特,中,多,没,战斗
8,AWM,少,中,有,躲藏
9,MP4,中,多,没,逃跑
10,MG,多,多,没,战斗
11,机枪,少,中,没,逃跑
12,巴雷特,少,少,没,逃跑
13,AK47,多,多,有,战斗
14,AWM,多,多,没,战斗
15,AK47,多,中,有,战斗
16,手枪,多,少,没,躲藏
bayes1
'''
from decision_tree_t2
'''
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
# 贝叶斯判断概率,属于哪个类别的高
'''
数值 武器类型 子弹 血量 身边队友 行为类别
0 手枪 少 少 没 逃跑
1 机枪 中 中 有 战斗
2 多 多 躲藏
'''
# df = pd.read_csv(r'..\csv\fightrun2.csv')
df = pd.read_csv('fightrun2.csv')
df = df.ix[:,['子弹','武器','血量','身边队友','行为']] # 选取需要的列
df = df.dropna()
# 手动转
#df[(df=='手枪')|(df=='少')|(df=='没')|(df=='逃跑')] = 0
#df[(df=='机枪')|(df=='中')|(df=='有')|(df=='战斗')] = 1
#df[(df=='多')|(df=='躲藏')] = 2
#df = df.astype(int)
# 用LabelEncoder转
classle = LabelEncoder()
df['武器'] = classle.fit_transform(df['武器'].values)
df['子弹'] = classle.fit_transform(df['子弹'].values)
df['血量'] = classle.fit_transform(df['血量'].values)
df['身边队友'] = classle.fit_transform(df['身边队友'].values)
df['行为'] = classle.fit_transform(df['行为'].values)
X = df[['武器','子弹','血量','身边队友']]
y = df['行为']
#print(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4)
model = GaussianNB()
m = model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test) # 预测出的结果
y_test = y_test # 期望的结果
# 打印预测结果,
print(metrics.classification_report(y_test, y_pred))
# precision recall f1-score support
#
# 0 1.00 0.50 0.67 6
# 2 0.25 1.00 0.40 1
#
# micro avg 0.57 0.57 0.57 7
# macro avg 0.62 0.75 0.53 7
# weighted avg 0.89 0.57 0.63 7
# 混淆矩阵
cm = metrics.confusion_matrix(y_test, y_pred)
## 矩阵标准化 值在0, 1 之间
#cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print(cm)
# [[3 3]
# [0 1]]
# 画混淆矩阵
classes = ['逃跑','战斗','躲藏'] # 分类名
length = range(len(classes)) # 混淆矩阵的边长
plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示中文
plt.imshow(cm, cmap = plt.cm.Blues)
thresh = cm.max() / 2.
for i, j in product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
horizontalalignment="center",
# 浅色背景深色字,深背景浅色字
color="white" if cm[i, j] > thresh else "black")
plt.xticks(length, classes)
plt.yticks(length, classes)
plt.colorbar()
plt.ylabel('真值')
plt.xlabel('预测值')
plt.savefig("bayes.png")
plt.show()
1.2.2 案例2
数据源头
喜欢吃萝卜,喜欢吃鱼,喜欢捉耗子,喜欢啃骨头,短尾巴,长耳朵,分类
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
否,是,是,否,是,是,猫
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,否,是,是,猫
否,是,是,否,否,否,猫
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
是,否,否,否,否,否,兔子
是,否,否,否,是,否,兔子
是,是,是,是,是,否,狗
是,是,是,是,否,是,狗
否,是,是,是,否,否,狗
否,是,否,是,否,否,狗
否,否,否,是,是,否,狗
是,否,是,否,是,否,狗
否,是,否,是,否,是,狗
否,是,否,是,是,是,狗
是,否,否,是,否,否,狗
是,否,否,是,是,否,狗
否,是,否,是,否,是,狗
否,否,否,是,是,是,狗
否,否,是,是,否,是,狗
否,否,是,是,是,否,狗
否,否,否,否,否,否,狗
否,否,否,否,是,否,狗
否,否,否,否,是,是,狗
否,否,否,否,否,是,狗
否,否,否,是,否,否,狗
否,否,是,是,否,否,狗
是,否,否,否,是,是,兔子
是,否,否,否,否,是,兔子
否,是,是,否,否,是,猫
否,是,是,是,是,是,猫
是,是,是,是,是,是,兔子
bayes2
'''
猫狗兔子
ppt上示例
'''
from itertools import product
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
'''
是 1
否 0
狗 1 猫 2 兔子 3
'''
# df = pd.read_csv(r'..\csv\catdograbbit.csv')
df = pd.read_csv('category.csv')
# 1.特征处理 3选1
# (1)手动转
# df[df == '是'] = 1
# df[df == '否'] = 0
# df[df == '狗'] = 1
# df[df == '猫'] = 2
# df[df == '兔子'] = 3
# df = df.astype(np.int8)
# print(df)
# 线性回归是连续值;这里的分类是非连续值,拥有哪一些属性,是哪种动物?
# 喜欢吃萝卜 喜欢吃鱼 喜欢捉耗子 喜欢啃骨头 短尾巴 长耳朵 分类
# 0 0 1 1 0 1 1 2
# 1 1 0 0 0 0 1 3
# 2 0 1 1 0 0 1 2
# 3 0 1 1 0 1 1 2
# 4 0 1 1 0 0 0 2
# 5 1 0 0 0 1 1 3
## 选取训练集特征
X = df[['喜欢吃萝卜', '喜欢吃鱼', '喜欢捉耗子', '喜欢啃骨头', '短尾巴', '长耳朵']]
y = df['分类']
# (2) 用labelencoder 转
classle = LabelEncoder()
# df['喜欢吃萝卜'] = classle.fit_transform(df['喜欢吃萝卜'].values)
# df['喜欢吃鱼'] = classle.fit_transform(df['喜欢吃鱼'].values)
# df['喜欢捉耗子'] = classle.fit_transform(df['喜欢捉耗子'].values)
# df['喜欢啃骨头'] = classle.fit_transform(df['喜欢啃骨头'].values)
# df['短尾巴'] = classle.fit_transform(df['短尾巴'].values)
# df['长耳朵'] = classle.fit_transform(df['长耳朵'].values)
# 这个for循环就是等价于上面的语句
# for column_name in df.colimns:
# df[column_name]=classle.fit_transform(df[column_name].values)
# print(classle.classes_)
# print(df.head())
## 选取训练集特征
# X = df[['喜欢吃萝卜','喜欢吃鱼','喜欢捉耗子','喜欢啃骨头','短尾巴','长耳朵']]
# y = classle.fit_transform(df['分类'].values)
# (3)用哑变量
# classle = LabelEncoder()
# X = pd.get_dummies(df.drop('分类', axis=1))
# y = classle.fit_transform(df['分类'].values)
# 特征转换,可以手动,可以用LabelEncoder,看情况使用
# 手动
df[df == '否'] = 0
df[df == '是'] = 1
# LabelEncoder
classle = LabelEncoder()
df['分类'] = classle.fit_transform(df['分类'])
print(df)
# 喜欢吃萝卜 喜欢吃鱼 喜欢捉耗子 喜欢啃骨头 短尾巴 长耳朵 分类 (分类是0,1,2 兔子 狗 猫)
# 0 0 1 1 0 1 1 1
# 1 1 0 0 0 0 1 2
# 2 0 1 1 0 0 1 1
# 3 0 1 1 0 1 1 1
# 4 0 1 1 0 0 0 1
# 5 1 0 0 0 1 1 2
# print(df.head())
# print(classle.classes_) # ['兔子' '狗' '猫']
# 2.确定特征集
X = df.drop('分类', axis=1)
y = df['分类']
# 3.交叉验证 为什么要交叉验证--(过拟合Overfit)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3)
print(X.shape) # (442, 6)
print(X_train.shape) # (309, 6)
print(X_test.shape) # (133, 6)
# 4. 学习 (朴素贝叶斯)
model = GaussianNB()
# model = MultinomialNB()
model.fit(X_train, y_train)
# 5.查看预测效果 -------------------start--------------------------------
print("5分数********************")
# 一般用test看分数 随机抽取的数据集,所以结果不同
print(model.score(X_test, y_test)) # 0.8120300751879699 分数,就是report的f1-score
# 预测报告
y_pred = model.predict(X_test)
report = metrics.classification_report(y_test, y_pred)
print("report****************")
print(report) # 测试值 预测值报告 分数
# precision recall f1-score support
#
# 0 1.00 0.68 0.81 79
# 1 0.91 1.00 0.95 21
# 2 0.59 1.00 0.74 33
#
# micro avg 0.81 0.81 0.81 133
# macro avg 0.83 0.89 0.84 133
# weighted avg 0.88 0.81 0.82 133
# 分类是0,1,2 兔子 狗 猫
# precision(精确度,谁分类好一点) 代表 兔子分类> 狗分类>猫分类
# precision 就是分类正确率的意思,正确个数/整个个数
# 6.混淆矩阵
cm = metrics.confusion_matrix(y_test, y_pred)
print("cm********************")
print(cm)
# [[54 2 23]
# [ 0 21 0]
# [ 0 0 33]]
# 画混淆矩阵
plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示中文
plt.imshow(cm, cmap=plt.cm.Blues) # 画矩阵
half = cm.max() / 2
# classes = ['狗','猫','兔子'] # 分类名
classes = classle.classes_
# print(classes)
length = range(len(classes)) # 混淆矩阵的边长
plt.xticks(length, classes, rotation=0)
plt.yticks(length, classes)
plt.colorbar()
plt.ylabel('真值', rotation=0)
plt.xlabel('预测值')
# 每个块显示数字
for i, j in product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, cm[i, j],
# 字居中
horizontalalignment="center",
# 浅色背景深色字,深背景浅色字
color="white" if cm[i, j] > half else "black",
# size=15 字体大小
)
plt.savefig('bayes2_rcParams.png')
plt.show()
# 7.查看预测效果 ------------------end---------------------------------
# 打印不出来?
print("7**********************")
# 预测一个值 训练完了之后,来了一个有相应特征的对象,可以将其分到相应的类中去.
# 喜欢吃萝卜 喜欢吃鱼 喜欢捉耗子 喜欢啃骨头 短尾巴 长耳朵 分类 (分类是0,1,2 兔子 狗 猫)
n = model.predict([[0, 0, 0, 1, 1, 1]])
print(n) # 1
print(classle.inverse_transform(n)) # ['狗']
n = model.predict([[1, 0, 0, 0, 0, 1]])
print(n) # 0 将label换成相应的值
print(classle.inverse_transform(n)) # ['兔子']
# 如何看混淆矩阵
# 预测值
# 猫 狗 兔子
# 猫 5 3 0
# 真
# 实 狗 2 3 1
# 值
# 兔子 0 2 11
# 对角线上的就是真实值
# 真实 8只猫 狗6只 兔子13只
# 8只猫 3只真实的猫被预测成狗
# 6只狗 其中2只狗预测成2只猫,1只狗预测成兔子
# 预测 7只猫 狗8只 兔子12只
# 预测7只猫 有5只是真猫 其中有2只狗预测成猫
# 预测12只兔子 有11只是真实兔子,其中1只狗预测成兔子
1.3 集成学习
'''
这么多分类器,选择哪一个分类方法比较好 ??
运用 集成学习的分类器
'''
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB, MultinomialNB # 朴素贝叶斯分类(高斯)
from sklearn.neighbors import KNeighborsClassifier # K-最近邻分类
from sklearn.tree import DecisionTreeClassifier # 决策树分类
from sklearn.linear_model import LogisticRegression # 逻辑回归分类
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier # 投票分类器
'''
是 1
否 0
狗 1 猫 2 兔子 3
'''
#names = ['luobo','yu','haozi','gutou','dwb','ced','fenlei']
## 读取csv时自定义列名
#df = pd.read_csv('D:\csv\catdograbbit.csv', names = names)
# df = pd.read_csv(r'..\csv\catdograbbit.csv')
df = pd.read_csv('category.csv')
# 用labelencoder 转
classle = LabelEncoder()
df['喜欢吃萝卜'] = classle.fit_transform(df['喜欢吃萝卜'].values)
df['喜欢吃鱼'] = classle.fit_transform(df['喜欢吃鱼'].values)
df['喜欢捉耗子'] = classle.fit_transform(df['喜欢捉耗子'].values)
df['喜欢啃骨头'] = classle.fit_transform(df['喜欢啃骨头'].values)
df['短尾巴'] = classle.fit_transform(df['短尾巴'].values)
df['长耳朵'] = classle.fit_transform(df['长耳朵'].values)
# 选取训练集特征
X = df[['喜欢吃萝卜','喜欢吃鱼','喜欢捉耗子','喜欢啃骨头','短尾巴','长耳朵']]
y = classle.fit_transform(df['分类'].values)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3)
model1 = GaussianNB()
model2 = MultinomialNB()
model3 = KNeighborsClassifier()
model4 = DecisionTreeClassifier()
model5 = LogisticRegression()
models = [model1,model2,model3,model4,model5]
labels = ['GaussianNB','MultinomialNB','KNeighborsClassifier','DecisionTreeClassifier','LogisticRegression']
# estimators 集成学习,投票者进行投票
eclf = VotingClassifier(estimators=list(zip(labels, models)))
models.append(eclf)
labels.append('VotingClassifier')
for model, label in list(zip(models, labels)):
scores = cross_val_score(model, X_train, y_train)
#print(scores)
print("Accuracy: {0:.2f} (+/- {1:.2f}) -{2}".format(scores.mean(), scores.std(), label))
'''
-----------------------------------------------------------------------------------------------------
'''
# 0.83 是 scores.mean() 0.04)是 scores.std() GaussianNB 是 label
# Accuracy: 0.83 (+/- 0.04) -GaussianNB # 朴素贝叶斯分类(高斯)
# Accuracy: 0.94 (+/- 0.01) -MultinomialNB
# Accuracy: 0.98 (+/- 0.03) -KNeighborsClassifier # K-最近邻分类
# Accuracy: 1.00 (+/- 0.00) -DecisionTreeClassifier # 决策树分类
# Accuracy: 0.92 (+/- 0.03) -LogisticRegression # 逻辑回归分类
# Accuracy: 1.00 (+/- 0.00) -VotingClassifier # 投票分类器
# scores.mean() 分数均值越高代表越好
# scores.std() 标准差越小代表越稳定
# 决策树分类
# 投票分类器 这两个模型比较好
2.回归
数据源
"Id","TV","Radio","Newspaper","Sales"
"1",230.1,37.8,69.2,22.1
"2",44.5,39.3,45.1,10.4
"3",17.2,45.9,69.3,9.3
"4",151.5,41.3,58.5,18.5
"5",180.8,10.8,58.4,12.9
"6",8.7,48.9,75,7.2
"7",57.5,32.8,23.5,11.8
"8",120.2,19.6,11.6,13.2
"9",8.6,2.1,1,4.8
"10",199.8,2.6,21.2,10.6
"11",66.1,5.8,24.2,8.6
"12",214.7,24,4,17.4
"13",23.8,35.1,65.9,9.2
"14",97.5,7.6,7.2,9.7
"15",204.1,32.9,46,19
"16",195.4,47.7,52.9,22.4
"17",67.8,36.6,114,12.5
"18",281.4,39.6,55.8,24.4
"19",69.2,20.5,18.3,11.3
"20",147.3,23.9,19.1,14.6
"21",218.4,27.7,53.4,18
"22",237.4,5.1,23.5,12.5
"23",13.2,15.9,49.6,5.6
"24",228.3,16.9,26.2,15.5
"25",62.3,12.6,18.3,9.7
"26",262.9,3.5,19.5,12
"27",142.9,29.3,12.6,15
"28",240.1,16.7,22.9,15.9
"29",248.8,27.1,22.9,18.9
"30",70.6,16,40.8,10.5
"31",292.9,28.3,43.2,21.4
"32",112.9,17.4,38.6,11.9
"33",97.2,1.5,30,9.6
"34",265.6,20,0.3,17.4
"35",95.7,1.4,7.4,9.5
"36",290.7,4.1,8.5,12.8
"37",266.9,43.8,5,25.4
"38",74.7,49.4,45.7,14.7
"39",43.1,26.7,35.1,10.1
"40",228,37.7,32,21.5
"41",202.5,22.3,31.6,16.6
"42",177,33.4,38.7,17.1
"43",293.6,27.7,1.8,20.7
"44",206.9,8.4,26.4,12.9
"45",25.1,25.7,43.3,8.5
"46",175.1,22.5,31.5,14.9
"47",89.7,9.9,35.7,10.6
"48",239.9,41.5,18.5,23.2
"49",227.2,15.8,49.9,14.8
"50",66.9,11.7,36.8,9.7
"51",199.8,3.1,34.6,11.4
"52",100.4,9.6,3.6,10.7
"53",216.4,41.7,39.6,22.6
"54",182.6,46.2,58.7,21.2
"55",262.7,28.8,15.9,20.2
"56",198.9,49.4,60,23.7
"57",7.3,28.1,41.4,5.5
"58",136.2,19.2,16.6,13.2
"59",210.8,49.6,37.7,23.8
"60",210.7,29.5,9.3,18.4
"61",53.5,2,21.4,8.1
"62",261.3,42.7,54.7,24.2
"63",239.3,15.5,27.3,15.7
"64",102.7,29.6,8.4,14
"65",131.1,42.8,28.9,18
"66",69,9.3,0.9,9.3
"67",31.5,24.6,2.2,9.5
"68",139.3,14.5,10.2,13.4
"69",237.4,27.5,11,18.9
"70",216.8,43.9,27.2,22.3
"71",199.1,30.6,38.7,18.3
"72",109.8,14.3,31.7,12.4
"73",26.8,33,19.3,8.8
"74",129.4,5.7,31.3,11
"75",213.4,24.6,13.1,17
"76",16.9,43.7,89.4,8.7
"77",27.5,1.6,20.7,6.9
"78",120.5,28.5,14.2,14.2
"79",5.4,29.9,9.4,5.3
"80",116,7.7,23.1,11
"81",76.4,26.7,22.3,11.8
"82",239.8,4.1,36.9,12.3
"83",75.3,20.3,32.5,11.3
"84",68.4,44.5,35.6,13.6
"85",213.5,43,33.8,21.7
"86",193.2,18.4,65.7,15.2
"87",76.3,27.5,16,12
"88",110.7,40.6,63.2,16
"89",88.3,25.5,73.4,12.9
"90",109.8,47.8,51.4,16.7
"91",134.3,4.9,9.3,11.2
"92",28.6,1.5,33,7.3
"93",217.7,33.5,59,19.4
"94",250.9,36.5,72.3,22.2
"95",107.4,14,10.9,11.5
"96",163.3,31.6,52.9,16.9
"97",197.6,3.5,5.9,11.7
"98",184.9,21,22,15.5
"99",289.7,42.3,51.2,25.4
"100",135.2,41.7,45.9,17.2
"101",222.4,4.3,49.8,11.7
"102",296.4,36.3,100.9,23.8
"103",280.2,10.1,21.4,14.8
"104",187.9,17.2,17.9,14.7
"105",238.2,34.3,5.3,20.7
"106",137.9,46.4,59,19.2
"107",25,11,29.7,7.2
"108",90.4,0.3,23.2,8.7
"109",13.1,0.4,25.6,5.3
"110",255.4,26.9,5.5,19.8
"111",225.8,8.2,56.5,13.4
"112",241.7,38,23.2,21.8
"113",175.7,15.4,2.4,14.1
"114",209.6,20.6,10.7,15.9
"115",78.2,46.8,34.5,14.6
"116",75.1,35,52.7,12.6
"117",139.2,14.3,25.6,12.2
"118",76.4,0.8,14.8,9.4
"119",125.7,36.9,79.2,15.9
"120",19.4,16,22.3,6.6
"121",141.3,26.8,46.2,15.5
"122",18.8,21.7,50.4,7
"123",224,2.4,15.6,11.6
"124",123.1,34.6,12.4,15.2
"125",229.5,32.3,74.2,19.7
"126",87.2,11.8,25.9,10.6
"127",7.8,38.9,50.6,6.6
"128",80.2,0,9.2,8.8
"129",220.3,49,3.2,24.7
"130",59.6,12,43.1,9.7
"131",0.7,39.6,8.7,1.6
"132",265.2,2.9,43,12.7
"133",8.4,27.2,2.1,5.7
"134",219.8,33.5,45.1,19.6
"135",36.9,38.6,65.6,10.8
"136",48.3,47,8.5,11.6
"137",25.6,39,9.3,9.5
"138",273.7,28.9,59.7,20.8
"139",43,25.9,20.5,9.6
"140",184.9,43.9,1.7,20.7
"141",73.4,17,12.9,10.9
"142",193.7,35.4,75.6,19.2
"143",220.5,33.2,37.9,20.1
"144",104.6,5.7,34.4,10.4
"145",96.2,14.8,38.9,11.4
"146",140.3,1.9,9,10.3
"147",240.1,7.3,8.7,13.2
"148",243.2,49,44.3,25.4
"149",38,40.3,11.9,10.9
"150",44.7,25.8,20.6,10.1
"151",280.7,13.9,37,16.1
"152",121,8.4,48.7,11.6
"153",197.6,23.3,14.2,16.6
"154",171.3,39.7,37.7,19
"155",187.8,21.1,9.5,15.6
"156",4.1,11.6,5.7,3.2
"157",93.9,43.5,50.5,15.3
"158",149.8,1.3,24.3,10.1
"159",11.7,36.9,45.2,7.3
"160",131.7,18.4,34.6,12.9
"161",172.5,18.1,30.7,14.4
"162",85.7,35.8,49.3,13.3
"163",188.4,18.1,25.6,14.9
"164",163.5,36.8,7.4,18
"165",117.2,14.7,5.4,11.9
"166",234.5,3.4,84.8,11.9
"167",17.9,37.6,21.6,8
"168",206.8,5.2,19.4,12.2
"169",215.4,23.6,57.6,17.1
"170",284.3,10.6,6.4,15
"171",50,11.6,18.4,8.4
"172",164.5,20.9,47.4,14.5
"173",19.6,20.1,17,7.6
"174",168.4,7.1,12.8,11.7
"175",222.4,3.4,13.1,11.5
"176",276.9,48.9,41.8,27
"177",248.4,30.2,20.3,20.2
"178",170.2,7.8,35.2,11.7
"179",276.7,2.3,23.7,11.8
"180",165.6,10,17.6,12.6
"181",156.6,2.6,8.3,10.5
"182",218.5,5.4,27.4,12.2
"183",56.2,5.7,29.7,8.7
"184",287.6,43,71.8,26.2
"185",253.8,21.3,30,17.6
"186",205,45.1,19.6,22.6
"187",139.5,2.1,26.6,10.3
"188",191.1,28.7,18.2,17.3
"189",286,13.9,3.7,15.9
"190",18.7,12.1,23.4,6.7
"191",39.5,41.1,5.8,10.8
"192",75.5,10.8,6,9.9
"193",17.2,4.1,31.6,5.9
"194",166.8,42,3.6,19.6
"195",149.7,35.6,6,17.3
"196",38.2,3.7,13.8,7.6
"197",94.2,4.9,8.1,9.7
"198",177,9.3,6.4,12.8
"199",283.6,42,66.2,25.5
"200",232.1,8.6,8.7,13.4
2.1 一元线性回归
'''
(1)一元线性回归 注意相关系数,回归系数,判定系数(或者叫决定系数)
一般是先求出相关系数r并对其进行假设检验,如果r显著并有进行回归分析之必要,再建立回归方程。
(3)回归中的相关系数和决定系数概念及Python实现 http://www.cnblogs.com/python-frog/p/8988030.html
通过结果验证,简单线性回归模型中,成立
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.externals import joblib # 模型持久化
from sklearn import metrics
height_weight = {
'height': (171, 175, 159, 155, 152, 158, 154, 164, 168, 166, 159, 164),
'weight': (57, 64, 41, 38, 35, 44, 41, 51, 57, 49, 47, 46)
}
height = (171, 175, 159, 155, 152, 158, 154, 164, 168, 166, 159, 164)
weight = (57, 64, 41, 38, 35, 44, 41, 51, 57, 49, 47, 46)
# (1)字典的 键:值 字符串:元组
dct = {'height': height, 'weight': weight}
# 散点图
# scatter是散点的意思 (0轴,1轴) (x轴,y轴)
# 机器学习 机器帮我们找好规律
# plt.scatter(height, weight)
# plt.show()
# (2)构建一个DataFrame
# df = pd.DataFrame(fangjia)
df = pd.DataFrame(height_weight)
# height weight
# 0 171 57
# 1 175 64
# '' ''' '''
# (3)x代表特征 reshape(-1, 1)中的 -1 代表行不清楚的情况下
x = df["height"].values.reshape(-1, 1)
# 以上不能直接用 ,x要是二维的 [[171],[175],...]
# x = df["height"].values.reshape(-1, 1)
# 0 171
# 1 175
# ... ...
# (4)y代表结果集
y = df["weight"]
# weight
# 0 57
# 1 64
# '' '''
# 模型持久化 将模型保存到本地(默认文件的后缀名是pkl) lr.pkl是一个路径,这里是相对路径,是在同一个文件夹下
# model = LinearRegression()
# model.fit(x, y)
# joblib.dump(model, 'lr.pkl')
# (5)相关性高,用线性回归得出的结果才可靠 (目前两个数据之间的相关系统)
print(df.height.corr(df.weight)) # 0.9593031405705869
# 训练集 特征 (特征训练集) 训练集是二维数组
X_train = df.height.values.reshape(-1, 1)
# 结果集 (结果训练集)
y_train = df.weight
# (6)_1学习训练集生成模型
model = LinearRegression()
model.fit(X_train, y_train)
# 判定系数
y_pre=model.predict(X_train)
print("判定系数是***************")
# 'weight的真实值 pd.Series(y_pre) 通过height进行回归计算出的weight的预测值
print(df['weight'].corr(pd.Series(y_pre))) # 结果是0.9593031405705865
# (6)_2保存训练完成的模型
# 训练完的模型持久化 lr.pkl 是二进制格式的,留给机器看的.
joblib.dump(model, 'lr.pkl')
# 读取持久化的模型
model = joblib.load('lr.pkl')
# (7)预测
# print(model.predict(170)) x要传递一个二维数组,二维集合 不能是一维
print(model.predict([[170]])) # [56.67589406]
x_pred = [[171], [175]]
y_pred = model.predict(x_pred)
print(y_pred) # [57.83495436 62.47119557]
# (8)得到直线的斜率,截距
a, b = model.coef_, model.intercept_
print("直线的斜率: {},截距: {}".format(a, b)) # 直线的斜率: [1.1590603],截距: -140.36435732455482
# (9)_1 画图
# 散点图
plt.scatter(df['height'], df['weight'], color='blue')
# 拟合的直线
plt.plot(X_train, model.predict(X_train), color='red', linewidth=3)
# plt.plot(df['height'], a*df['height']+b, color='red', linewidth=3)
plt.xlabel("height")
plt.ylabel('weight')
# (9)_2显示画图,保存画图
plt.savefig('linear_model.png')
plt.show()
# 下面的目前先不看......
#### 统计量参数
def get_lr_stats(x, y, model):
message0 = '一元线性回归方程为: ' + '\ty' + '=' + str(model.intercept_[0]) + ' + ' + str(model.coef_[0][0]) + '*x'
from scipy import stats
n = len(x)
y_prd = model.predict(x)
Regression = sum((y_prd - np.mean(y)) ** 2) # 回归
Residual = sum((y - y_prd) ** 2) # 残差
R_square = Regression / (Regression + Residual) # 相关性系数R^2
F = (Regression / 1) / (Residual / (n - 2)) # F 分布
pf = stats.f.sf(F, 1, n - 2)
message1 = ('相关系数(R^2): ' + str(R_square[0]) + ';' + '\n' +
'回归分析(SSR): ' + str(Regression[0]) + ';' + '\t残差(SSE): ' + str(Residual[0]) + ';' + '\n' +
' F : ' + str(F[0]) + ';' + '\t' + 'pf : ' + str(pf[0]))
## T
L_xx = n * np.var(x)
sigma = np.sqrt(Residual / n)
t = model.coef_ * np.sqrt(L_xx) / sigma
pt = stats.t.sf(t, n - 2)
message2 = ' t : ' + str(t[0][0]) + ';' + '\t' + 'pt : ' + str(pt[0][0])
return print(message0 + '\n' + message1 + '\n' + message2)
print(get_lr_stats(df.height.values.reshape(-1, 1), df.weight, model))
2.2 多元线性回归
'''
多元线性回归
参考http://blog.csdn.net/lulei1217/article/details/49386295
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
# Advertising.csv来自http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv
# "","TV","Radio","Newspaper","Sales"
# df = pd.read_csv(r'..\csv\Advertising.csv')
# (1)读取样本
df = pd.read_csv('Advertising.csv',encoding='utf-8')
#print(df)
#
#print(df.Sales.corr(df.TV))
#print(df.Sales.corr(df.Radio))
#print(df.Sales.corr(df.Newspaper))
# (2) 选择
# 选择特征库
X = df[['TV', 'Radio', 'Newspaper']]
#print(x)
# 生成训练集和测试集(测试TV,Radio,Newspaper三种方式对广告销量的影响Sales)
y = df['Sales']
# (3)交叉验证
# 交叉验证,用一部分数据来训练,另一部分来验证,这两部分数据不能重复
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4)
# X_train, X_test, y_train, y_test 是一个多元赋值 将后面的元组一个一个赋值. train_size=0.6 的值赋给y_train
print("3************")
print(X_train.shape)
print(y_train.shape)
print(X_test.shape) #print(y_test.shape)
# (4) 训练模型
print("4************")
model = LinearRegression()
model.fit(X_train, y_train)
a, b = model.intercept_, model.coef_
print("直线的斜率: {},截距: {}".format(a, b)) # 直线的斜率(回归系数): [1.1590603],截距: -140.36435732455482
#y=2.668+0.0464*TV+0.192*Radio-0.00349*Newspaper
# (5)预测训练模型结果
print("5************")
y_pred = model.predict(X_test)
print(y_pred) # 机器学习预测出来的
print(y_test) # 真实数据的测试值
# 自己拿3个数字测试
# y_pred = model.predict([[230.1,37.8,69.2],[44.5,39.3,45.1],[17.2,45.9,69.3]])
#print(metrics.accuracy_score(y_test, y_pred))
# 以上报错 continuous is not supported
#
# (5)_2预测结果和真实值比较
# 封装一个字典
dct = {
'y_test':y_test,
'y_pred': y_pred
}
df = pd.DataFrame(dct)
#
print(df.y_test.corr(df.y_pred))
# 0.9531363896456168
print(df)
# y_test y_pred
# 128 24.7 22.072041
# 145 10.3 9.483712
# 11 17.4 17.118617
# 72 8.8 9.991983
# 106 7.2 5.764625
# ... ... ...
print(df.describe()) # 对上述df 的数据按照列进行统计
# y_test y_pred
# count 80.000000 80.000000
# mean 14.371250 14.419086
# std 5.070745 4.874209
# min 5.300000 3.705385
# 25% 10.775000 10.145408
# 50% 13.300000 14.407218
# 75% 17.700000 18.360683
# max 25.400000 23.269870
# (6)绘制小提琴图
# 画提琴图
# df是上面的DataFrame
sns.violinplot(data = df)
plt.ylim(0,35) # y值的取值范围
plt.ylabel('sales')
plt.savefig('linear_model_violinplot.png')
plt.show()
3.推荐
基于内容推荐
3.1 案例1
'''
根据chapter8 anjuke_zufang.py 抓取的数据
房源推荐系统,
根据用户浏览的房源,推荐相似区域,租金,房型等的房源 (基于内容的推荐)
一个函数接收用户经常访问的房源
def accept()
另一个函数推荐和前面一个函数接收的房源 类似的房源
def recommend()
初步设想思路是
1 先用聚类分好类,产生data
2 然后用knn(K最近邻)算法,去学习上述分好类的data,产生一个model
3 再用上面的model去预测历史浏览记录的房源,
看属于哪个类别的房源最多
4 recommend函数就推荐那个最多类别的房源,推5条左右
简单起见,这里只考虑,租金,装修,面积
#1. 根据租金,装修,面积 对不同的房源进行分类
#2.得到模型
#3.根据不同用户的现有数据,(租金,装修,面积) 推荐类似的房源.
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from random import choice
df = pd.read_csv(r'..\csv\anjuke.csv', encoding='gbk')
#print(df.tail(10))
#print(df.columns)
#X = df.loc[:, ['租金', '租赁方式', '装修', '面积', '年代']]
X = df.loc[:, ['租金', '装修', '面积']]
# 特征整理
def get_mianji(mianji):
return mianji.replace('平米', '')
#def get_niandai(niandai):
# if niandai == "暂无":
# rtn = 2000
# else:
# rtn = niandai.replace('年', '')
# return int(rtn) - 1980
# 装修用手工整理,安装装修的简单到豪华排序,LabelEncoder的顺序不一定,所以不用
zx = X['装修'].copy()
zx[zx=='毛坯'] = 1
zx[zx=='简单装修'] = 2
zx[zx=='中等装修'] = 3
zx[zx=='精装修'] = 4
zx[zx=='豪华装修'] = 5
X['装修'] = zx
X['面积'] = X['面积'].apply(get_mianji)
#X['年代'] = X['年代'].apply(get_niandai)
#X['租赁方式'] = LabelEncoder().fit_transform(X['租赁方式'].values)
#X = X.dropna()
#print(X.head(50), X.shape)
ss = StandardScaler()
X2 = ss.fit_transform(X)
kmeans = KMeans(n_clusters=15, n_init=50)
kmeans.fit(X2)
#统计各个类别的数目
#r1 = pd.Series(kmeans.labels_).value_counts()
## 找出聚类中心
##r2 = pd.DataFrame(kmeans.cluster_centers_)
## 聚类中心真实值
#r2 = pd.DataFrame(ss.inverse_transform(kmeans.cluster_centers_))
##横向连接(0是纵向),得到聚类中心对应的类别下的数目
#r = pd.concat([r2, r1], axis = 1)
##重命名表头
#r.columns = list(X.columns) + ['类别数目']
#print(r)
y_pred = kmeans.predict(X2)
ydata = pd.DataFrame(y_pred, columns=['分类'])
# knn学习用
data = pd.concat([X, ydata], axis=1)
# 推荐用
data_recommend = pd.concat([df, ydata], axis=1)
#print(data.head(30), data.shape[0])
X = data.drop('分类', axis=1)
y = data['分类']
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X, y)
def _random_choice(lst, n):
'''从列表lst中随机选择n个不重复的元素'''
if n > len(lst):
return lst
choiced_elements = []
lst = lst.copy()
while n > 0:
element = choice(lst)
choiced_elements.append(element)
lst.remove(element) # 避免重复选择
n -= 1
return choiced_elements
#rc = _random_choice([1,2,3,4,5,6,7], 2)
#print(rc)
# 假装这是某个用户最近的浏览历史记录
viewed = [
[9900, 1, 150],
[9000, 4, 150],
[9000, 4, 150],
[9000, 3, 150],
[9200, 4, 160],
[9400, 4, 180],
[9000, 4, 150],
[9000, 4, 160],
[92000, 4, 1120],
[9000, 4, 190],
[9500, 2, 127],
]
def history_view():
# 从csv文件中随机选择20条,假装是某个用户最近的浏览历史记录
choosed_idx = _random_choice(list(df.index), 20)
choosed_rows = df.ix[choosed_idx,:]
#print(choosed_rows)
X = choosed_rows.loc[:, ['租金', '装修', '面积']]
print('-----------------------------查看历史----------------------------------------------')
print(X)
zx = X['装修'].copy()
zx[zx=='毛坯'] = 1
zx[zx=='简单装修'] = 2
zx[zx=='中等装修'] = 3
zx[zx=='精装修'] = 4
zx[zx=='豪华装修'] = 5
X['装修'] = zx
X['面积'] = X['面积'].apply(get_mianji)
return X
#history_view()
def recommend(viewed):
'''
根据用户最近的浏览记录,推荐浏览类型最多的房源
'''
viewed_types = knn.predict(viewed)
#print(viewed_types)
value_counts = pd.Series(viewed_types).value_counts()
#print(value_counts)
most_view = value_counts.index[0] # 推荐最多浏览的类型
#print(most_view)
recommended = data_recommend[data_recommend['分类'] == most_view]
#idx = np.array([1,2,3,4,5])
n = 5
#if recommended.shape[0]>n: # 在这一类别里随机选5条,否则就全部选出
# idx = list(recommended.index)
# choiced_idx = []
# while n > 0:
# a = choice(idx)
# choiced_idx.append(a)
# idx.remove(a) # 避免重复选择
# n -= 1
##choiced_idx = np.array(choiced_idx)
choiced_idx = _random_choice(list(recommended.index), n)
#print(choiced_idx)
recommended = recommended.ix[choiced_idx, :]
print('-----------------------------推荐的----------------------------------------------')
print(recommended)
return recommended
#recommend(viewed)
#recommend(history_view())
3.2 案例2
'''
根据chapter8 anjuke_zufang.py 抓取的数据
房源推荐系统,
根据用户浏览的房源,推荐相似区域,租金,房型等的房源 (基于内容的推荐)
一个函数接收用户经常访问的房源
def accept()
另一个函数推荐和前面一个函数接收的房源 类似的房源
def recommend()
1 先用聚类分好类,产生data
2 看历史最近浏览的房源的类别,看属于哪个类别的房源最多
3 recommend函数就推荐那个最多类别的房源,推5条左右
简单起见,这里只考虑,租金,装修,面积
'''
import copy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from random import choice
area = ['浦东新区', '闵行区', '松江区',
'徐汇区', '普陀区', '长宁区',
'青浦区', '静安区', '上海周边',
'杨浦区', '虹口区', '宝山区',
'嘉定区', '黄浦区', '奉贤区',
'崇明区', '金山区']
# 查询网址http://www.gpsspg.com/maps.htm 百度地图gps
gps = (
(31.2274065041,121.5505840120), (31.1189141643,121.3886803785), (31.0383289332,121.2330541677),
(31.1946458680,121.4433055580), (31.2553119532,121.4035442489), (31.2265243725,121.4304185175),
(31.1555447438,121.1308224101), (31.2296614952,121.4624372609), (31.2363429624,121.4803295328),
(31.2656839054,121.5326577316), (31.2703244262,121.5118910226), (31.4109435502,121.4959742660),
(31.3805628349,121.2727914784), (31.2374294453,121.4912966392), (30.9239497878,121.4806129429),
(31.6288408354,121.4038337007), (30.7479665497,121.3489176446)
)
area_gps = dict(zip(area, gps))
#print(area_gps)
df = pd.read_csv(r'..\csv\anjuke.csv', encoding='gbk')
#print(list(df['区域1'].unique()))
#print(df['区域1'].value_counts())
def history_viewed(viewed_index):
'''根据输入的index选择几条数据,
假装是某个用户最近的浏览历史记录最新的几条'''
return df.ix[viewed_index, :]
#viewed_index = 3, 4, 5, 6, 22, 40, 43 # 四五十平米,租金2,3千
#viewed_index = 12, 18, 39, 46 # 中等装修
#viewed_index = 1, 8, 17, 20, 27, 30, 37 # 一百多平米
viewed_index = 0, 1, 6, 13, 15, 17,18,19 # 浦东新区
viewed = history_viewed(viewed_index)
#print('-----------------------------浏览过的----------------------------------------------')
#print(viewed)
#print(df.tail(10))
#print(df.columns)
#X = df.loc[:, ['租金', '租赁方式', '装修', '面积', '年代']]
X = df.loc[:, ['租金', '装修', '面积', '区域1']]
# 特征整理
def get_mianji(mianji):
return mianji.replace('平米', '')
def get_area_gps(area):
return area_gps[area]
# 装修用手工整理,安装装修的简单到豪华排序,LabelEncoder的顺序不一定,所以不用
zx = X['装修'].copy()
zx[zx=='毛坯'] = 1
zx[zx=='简单装修'] = 2
zx[zx=='中等装修'] = 3
zx[zx=='精装修'] = 4
zx[zx=='豪华装修'] = 5
X['装修'] = zx
X['面积'] = X['面积'].apply(get_mianji)
# 处理区域坐标
X['区域1'] = X['区域1'].apply(get_area_gps)
X['area_x'] = X['区域1'].apply(lambda x:x[0])
X['area_y'] = X['区域1'].apply(lambda x:x[1])
X = X.drop('区域1', axis=1)
#print(X.head())
X = X.astype(float)
X_filtered = X[(X['面积']<300) & (X['租金']<20000)]
df_filtered = df[df.index.isin(X_filtered.index)]
#print(df_filtered.shape[0])
#print(X_filtered.shape[0])
ss = StandardScaler()
X2 = ss.fit_transform(X_filtered)
model = KMeans(n_clusters=15, n_init=50)
# 肘方法
#ine = [[],[]] # 画图用的坐标点
#for n in range(2, 50):
# inertia = KMeans(n_clusters=n, n_init=20).fit(X2).inertia_
# ine[0].append(n)
# ine[1].append(inertia)
## 肘方法
#plt.plot(ine[0], ine[1])
#plt.show()
#model = DBSCAN(eps = 0.1, min_samples=10)
#model.fit(X2)
y_pred = model.fit_predict(X2)
y_data = pd.DataFrame(y_pred, columns=['分类'], index=df_filtered.index)
#print(y_data.index)
#print(y_data.shape[0])
# 推荐用
data_recommend = pd.concat([df_filtered, y_data], axis=1)
#print(data_recommend.shape[0])
print(y_pred == model.labels_)
print(data_recommend.shape)
def _random_choice(lst, n=5):
'''从列表lst中随机选择n个不重复的元素'''
if n > len(lst):
return lst
choiced_elements = []
lst = lst.copy()
while n > 0:
element = choice(lst)
choiced_elements.append(element)
lst.remove(element) # 避免重复选择
n -= 1
return choiced_elements
#rc = _random_choice([1,2,3,4,5,6,7], 2)
#print(rc)
def recommend():
'''
根据用户最近的浏览记录,推荐浏览类型最多的房源
'''
viewed_types = y_pred[np.array(viewed_index)]
value_counts = pd.Series(viewed_types).value_counts()
most_view = value_counts.index[0] # 推荐最多浏览的类型
#print(most_view)
recommended = data_recommend[data_recommend['分类'] == most_view]
#idx = np.array([1,2,3,4,5])
n = 5
# 随机选这一类别的n跳记录
choiced_idx = _random_choice(list(recommended.index), n)
#print(choiced_idx)
recommended = recommended.ix[choiced_idx, :]
print('-----------------------------推荐的----------------------------------------------')
print(recommended)
return recommended
def recommend2():
viewed2 = data_recommend[data_recommend.index.isin(viewed_index)]
print(viewed2['分类'])
print(viewed2['分类'].value_counts())
viewed_types = model.labels_[np.array(viewed_index)]
print(viewed_types)
view_counts = pd.Series(viewed_types).value_counts()
most_view = view_counts.index[0]
print(most_view)
sametype = data_recommend[data_recommend['分类']==most_view]
#print(sametype.head())
choosed_index = _random_choice(list(sametype.index))
#print(data_recommend.ix[choosed_index,:])
return data_recommend.ix[choosed_index,:]
recommend2()
4.聚类
样例数据
ID,R,F,M
1,27,6,232.61
2,3,5,1507.11
3,4,16,817.62
4,3,11,232.81
5,14,7,1913.05
6,19,8,220.07
7,5,2,615.83
8,26,2,1059.66
9,21,9,304.82
10,2,21,1227.96
11,15,2,521.02
12,26,3,438.22
13,17,11,1744.55
14,30,16,1957.44
15,5,7,1713.79
16,4,21,1768.11
17,93,2,1016.34
18,16,3,950.36
19,4,1,754.93
20,27,1,294.23
21,5,1,195.3
22,17,3,1845.34
4.1 DBSCAN
# 使用DBSCAN算法聚类消费行为特征数据
#
# 选两个特征, 以便画图
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from mpl_toolkits.mplot3d import Axes3D # 画3D图
# 可以做二维数据的分析
# df = pd.read_csv('consumption_data.csv', encoding='utf-8')
# print(df)
# df = df.ix[:, ['F', 'M']] # 去掉ID
# print(df)
# # 去掉夸张离群点
# df = df[ (df.F<50) | (df.M<8400)]
# # 数据规范化
# df = (df - df.mean())/df.std()
df = pd.read_csv('consumption_data.csv', encoding='utf-8')
df = df.drop('ID', axis=1) # 去掉ID,一样的意思
# 一.去掉夸张离群点 将离群点删除
df = df[(df.R < 80) | (df.F < 50) | (df.M < 8400)]
# 二.特征数据规范化,统一取值范围 选一种
# 1 零-均值规范化,均值为0,标准差为1
# 用sklearn方法 StandardScaler 方法
ss = MinMaxScaler()
scaled_df = ss.fit_transform(df) # 标准化的目的是为了 R F M三个轴的刻度范围能够都在0-10之间
df2 = pd.DataFrame(ss.fit_transform(df), columns=list('RFM'))
# 三
# 参数
# eps:确定在同一聚类中两个点彼此之间的距离
# min_samples: 每个分类最少点数
# model = DBSCAN(eps=0.5, min_samples=10)
# 如果将eps=0.5,由于不同点的距离为0.5的被分为一类,这样可能全部被分为一类。
model = DBSCAN(eps=0.1)
model.fit(df2) # 开始聚类 标准化之后的数据。eps是0-1之间的数 可以0.05
# print(model.predict([[1,25, 2535]]))
# 散点图
y_pred = model.fit_predict(df2)
# 这个是画二维的。
# plt.scatter(df2.R,df2.F) # 未分类散点图
# plt.scatter(df2.R, df2.F, c=y_pred) # 分类
# plt.show()
sd = plt.figure().add_subplot(111, projection='3d')
sd.set_xlabel('R')
sd.set_ylabel('F')
sd.set_zlabel('M')
sd.scatter(df2.R, df2.F, df2.M, c=y_pred)
plt.show()
4.2 Kmeans
# 使用K-Means算法聚类消费行为特征数据
# ppt上示例
import matplotlib.pyplot as plt # 画2d图
from mpl_toolkits.mplot3d import Axes3D # 画3D图
import numpy as np # 数组数据处理包
import pandas as pd # 数据处理包
from sklearn.cluster import KMeans, DBSCAN # KMeans 聚类方法
from sklearn.preprocessing import MinMaxScaler, StandardScaler # 数据标准化2中方法
import seaborn as sns # 画2d图
sns.set_style("whitegrid")
df = pd.read_csv('consumption_data.csv', encoding='utf-8')
print(df)
# df = df.ix[:, ['R', 'F', 'M']] # 去掉ID
df = df.drop('ID', axis=1) # 去掉ID,一样的意思
print(df)
# 画散点图,盒型图(横轴是F,纵轴是M) 去除离群点
# plt.scatter(df.F, df.M)
# plt.show()
# 观察数据,看是否有异常值
# 可以画箱型图,直观
# sns.boxplot(data=df.R)
# plt.ylim(0, df.R.max())
# plt.ylim(0, 130)
# plt.show()
# 得到R>80 有一个异常点
# sns.boxplot(data=df.F)
# plt.ylim(0, df.F.max())
# plt.ylim(0, 100)
# plt.show()
# 我的数据太少,没有看到异常点
# sns.boxplot(data=df.M)
# plt.ylim(0, df.M.max())
# plt.ylim(0, 20000)
# plt.show()
# 一.去掉夸张离群点 将离群点删除
df = df[(df.R < 80) | (df.F < 50) | (df.M < 8400)]
# 二.特征数据规范化,统一取值范围 选一种
# 1 零-均值规范化,均值为0,标准差为1
# df2 = (df - df.mean())/df.std() # df2为规范化的数据集
# 用sklearn方法 StandardScaler 方法
ss = StandardScaler()
scaled_df = ss.fit_transform(df) # 标准化的目的是为了 R F M三个轴的刻度范围能够都在0-10之间
df2 = pd.DataFrame(ss.fit_transform(df), columns=list('RFM'))
# 2 最小-最大规范化, 范围限定在0到1之间
# df2 = (df - df.min())/(df.max() - df.min())
# 用sklearn方法 MinMaxScaler 坐标轴都变成0-1了
# scaled_df = MinMaxScaler().fit_transform(df)
# df2 = pd.DataFrame(scaled_df, columns=list('RFM'))
# 3 上面选择第1种 第2种方法 画图
# 标准化方法,自己看一下3D散点图
# sd = plt.figure().add_subplot(111, projection='3d')
# sd.set_xlabel('R')
# sd.set_ylabel('F')
# sd.set_zlabel('M')
# sd.scatter(df2.R, df2.F, df2.M)
# plt.show()
# 三.分类算法
# 分为n_clusters类,n_init中心点随机放置的次数
# model = KMeans(n_clusters=9, n_init=5)
# 1.分为n_clusters类,聚类最大循环次数500 到底分成几类好?
# model = KMeans(n_clusters=5, max_iter=500)
model = KMeans(n_clusters=5)
# (1)开始聚类学习(给标准化之后的数据进行学习)
model.fit(df2)
# (2)scaled_df 和 df2其实是一样的数据,只不过df2中的字段变了一下
y_pred = model.predict(df2)
print(y_pred) # [3 1 2 3 1 3 0 0 3 2 0 0 1 2 1 2 4 0 0 0 0 1] 这就是分的类
print(df2.shape, y_pred.shape)
# scaled_df.shape (22, 3) 原来22个数据,有3个特征 R M F
# y_pred.shape (22,)
# print(model.predict([[0.1, 0.1, 0.1]]))
# (3)每个点到达中心点的距离之和 (分的类越多,值越小)
print(model.inertia_) # 10.834820247097994
# (3)_1肘方法 找到线的切线的位置是最好的类数 多少个类合适?
# 画图用的坐标点 x轴 聚类的个数 y轴 不同类个数的情况下,每个点到聚类中心的距离值得和.
ine = [[], []]
# 范围给适当的数据,如果有940个数据,可以给range(2,31)
for n in range(2, 10):
# inertia = KMeans(n_clusters=n, n_init=5).fit(df2).inertia_
inertia = KMeans(n_clusters=n).fit(df2).inertia_
ine[0].append(n)
ine[1].append(inertia)
print(ine)
plt.plot(ine[0], ine[1])
plt.show()
# print(model.labels_) # 打印model的类别
# print(model.cluster_centers_) # 打印聚类中心
# 2.分类算法中几个相应的值
# (1)统计各个类别的数目
r1 = pd.Series(model.labels_).value_counts()
print(r1)
# 类别 个数
# 0 9
# 1 5
# 4 4
# 2 3
# 3 1
# (2)找出每一个类别的聚类中心
r2 = pd.DataFrame(model.cluster_centers_)
print(r2)
# 0 1 2
# 0 0.162393 0.066667 0.208362
# 1 0.101099 0.280000 0.879310
# 2 0.109890 0.916667 0.826194
# 3 1.000000 0.050000 0.465933
# 4 0.107143 0.500000 0.112664
# # (3)聚类中心真实值
# ss = StandardScaler()
# r2 = pd.DataFrame(ss.inverse_transform(model.cluster_centers_))
# print(r2)
# #(4)将 r1 ,r2两个表放到一起
# 横向连接(0是纵向),得到聚类中心对应的类别下的数目
r = pd.concat([r2, r1], axis=1)
print(r) # 默认Series是0,要重新命名为类别数目.
# 0 1 2 0
# 0 0.162393 0.066667 0.208362 9
# 1 0.101099 0.280000 0.879310 5
# 2 0.109890 0.916667 0.826194 3
# 3 0.107143 0.500000 0.112664 4
# 4 1.000000 0.050000 0.465933 1
# 重命名表头
r.columns = list(df.columns) + ['类别数目']
print(r)
# R F M 类别数目
# 0 0.162393 0.066667 0.208362 9
# 1 0.101099 0.280000 0.879310 5
# 2 0.109890 0.916667 0.826194 3
# 3 0.107143 0.500000 0.112664 4
# 4 1.000000 0.050000 0.465933 1
# 3.画3D散点图 (不同类的颜色不同)
# model.fit_predict(df2) # == model.fit(df2).predict(df2)
# y_pred = model.predict(df2) # 因为这个model已经fit过,只要predict
sd = plt.figure().add_subplot(111, projection='3d')
sd.set_xlabel('R')
sd.set_ylabel('F')
sd.set_zlabel('M')
sd.scatter(df.R, df.F, df.M, c=y_pred)
# 直接用原来的df来画图,这样看图能够更加清晰 y_pred 和model.labels 是同一个值
# sd.scatter(df.R, df.F, df.M, c=model.labels_)
plt.show()
'''
# sns.barplot(x="sex", y="survived", hue="class", data=r2)
# plt.show()
# ----------------分类中心的柱状图----------------------
# r3 = r.drop('类别数目', axis=1)
##plt.bar(range(3), r3.ix[0], label='0')
##plt.legend()
# sns.barplot(data=r3)
# plt.show()
# --------------------------------------
## 带图例的3D散点图, 上面画的就是这样的,已经OK了
# y_pred = model.predict(df2) # 因为这个model已经fit过,只要predict
# sd = plt.figure().add_subplot(111, projection = '3d')
# sd.set_xlabel('R')
# sd.set_ylabel('F')
# sd.set_zlabel('M')
## 分组构建不同类型的集合
# type0 = ([], [], []) # 一系列(x, y, z)坐标这里为R, F, M
# type1 = ([], [], [])
# type2 = ([], [], [])
# type3 = ([], [], [])
# type4 = ([], [], [])
# types = tuple(set(y_pred))
# length = len(y_pred)
# for i in range(length):
# if y_pred[i] == types[0]:
# type0[0].append(df.R[i])
# type0[1].append(df.F[i])
# type0[2].append(df.M[i])
# elif y_pred[i] == types[1]:
# type1[0].append(df.R[i])
# type1[1].append(df.F[i])
# type1[2].append(df.M[i])
# elif y_pred[i] == types[2]:
# type2[0].append(df.R[i])
# type2[1].append(df.F[i])
# type2[2].append(df.M[i])
# elif y_pred[i] == types[3]:
# type3[0].append(df.R[i])
# type3[1].append(df.F[i])
# type3[2].append(df.M[i])
# elif y_pred[i] == types[4]:
# type4[0].append(df.R[i])
# type4[1].append(df.F[i])
# type4[2].append(df.M[i])
#
# t0 = sd.scatter(type0[0], type0[1],type0[2], marker='x', color='b')
# t1 = sd.scatter(type1[0], type1[1],type1[2], marker='x', color='c')
# t2 = sd.scatter(type2[0], type2[1],type2[2], marker='o', color='r')
# t3 = sd.scatter(type3[0], type3[1],type3[2], marker='o', color='y')
# t4 = sd.scatter(type4[0], type4[1],type4[2], marker='o', color='g')
# plt.legend((t0, t1, t2, t3, t4),
# ('type0', 'type1', 'type2', 'type3', 'type4'),
# scatterpoints=1,
# loc=2, # 显示位置
# ncol=2, # 几列
# fontsize=8)
# plt.show()
'''
5.降维
数据源
#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False
6,Charizard,Fire,Flying,534,78,84,78,109,85,100,1,False
6,CharizardMega Charizard X,Fire,Dragon,634,78,130,111,130,85,100,1,False
6,CharizardMega Charizard Y,Fire,Flying,634,78,104,78,159,115,100,1,False
7,Squirtle,Water,,314,44,48,65,50,64,43,1,False
8,Wartortle,Water,,405,59,63,80,65,80,58,1,False
9,Blastoise,Water,,530,79,83,100,85,105,78,1,False
9,BlastoiseMega Blastoise,Water,,630,79,103,120,135,115,78,1,False
10,Caterpie,Bug,,195,45,30,35,20,20,45,1,False
11,Metapod,Bug,,205,50,20,55,25,25,30,1,False
12,Butterfree,Bug,Flying,395,60,45,50,90,80,70,1,False
13,Weedle,Bug,Poison,195,40,35,30,20,20,50,1,False
14,Kakuna,Bug,Poison,205,45,25,50,25,25,35,1,False
15,Beedrill,Bug,Poison,395,65,90,40,45,80,75,1,False
15,BeedrillMega Beedrill,Bug,Poison,495,65,150,40,15,80,145,1,False
16,Pidgey,Normal,Flying,251,40,45,40,35,35,56,1,False
17,Pidgeotto,Normal,Flying,349,63,60,55,50,50,71,1,False
18,Pidgeot,Normal,Flying,479,83,80,75,70,70,101,1,False
18,PidgeotMega Pidgeot,Normal,Flying,579,83,80,80,135,80,121,1,False
19,Rattata,Normal,,253,30,56,35,25,35,72,1,False
20,Raticate,Normal,,413,55,81,60,50,70,97,1,False
21,Spearow,Normal,Flying,262,40,60,30,31,31,70,1,False
22,Fearow,Normal,Flying,442,65,90,65,61,61,100,1,False
23,Ekans,Poison,,288,35,60,44,40,54,55,1,False
24,Arbok,Poison,,438,60,85,69,65,79,80,1,False
25,Pikachu,Electric,,320,35,55,40,50,50,90,1,False
26,Raichu,Electric,,485,60,90,55,90,80,110,1,False
27,Sandshrew,Ground,,300,50,75,85,20,30,40,1,False
28,Sandslash,Ground,,450,75,100,110,45,55,65,1,False
29,Nidoran♀,Poison,,275,55,47,52,40,40,41,1,False
30,Nidorina,Poison,,365,70,62,67,55,55,56,1,False
31,Nidoqueen,Poison,Ground,505,90,92,87,75,85,76,1,False
32,Nidoran♂,Poison,,273,46,57,40,40,40,50,1,False
33,Nidorino,Poison,,365,61,72,57,55,55,65,1,False
34,Nidoking,Poison,Ground,505,81,102,77,85,75,85,1,False
35,Clefairy,Fairy,,323,70,45,48,60,65,35,1,False
36,Clefable,Fairy,,483,95,70,73,95,90,60,1,False
37,Vulpix,Fire,,299,38,41,40,50,65,65,1,False
38,Ninetales,Fire,,505,73,76,75,81,100,100,1,False
39,Jigglypuff,Normal,Fairy,270,115,45,20,45,25,20,1,False
40,Wigglytuff,Normal,Fairy,435,140,70,45,85,50,45,1,False
41,Zubat,Poison,Flying,245,40,45,35,30,40,55,1,False
42,Golbat,Poison,Flying,455,75,80,70,65,75,90,1,False
43,Oddish,Grass,Poison,320,45,50,55,75,65,30,1,False
44,Gloom,Grass,Poison,395,60,65,70,85,75,40,1,False
45,Vileplume,Grass,Poison,490,75,80,85,110,90,50,1,False
46,Paras,Bug,Grass,285,35,70,55,45,55,25,1,False
47,Parasect,Bug,Grass,405,60,95,80,60,80,30,1,False
48,Venonat,Bug,Poison,305,60,55,50,40,55,45,1,False
49,Venomoth,Bug,Poison,450,70,65,60,90,75,90,1,False
50,Diglett,Ground,,265,10,55,25,35,45,95,1,False
51,Dugtrio,Ground,,405,35,80,50,50,70,120,1,False
52,Meowth,Normal,,290,40,45,35,40,40,90,1,False
53,Persian,Normal,,440,65,70,60,65,65,115,1,False
54,Psyduck,Water,,320,50,52,48,65,50,55,1,False
55,Golduck,Water,,500,80,82,78,95,80,85,1,False
56,Mankey,Fighting,,305,40,80,35,35,45,70,1,False
57,Primeape,Fighting,,455,65,105,60,60,70,95,1,False
58,Growlithe,Fire,,350,55,70,45,70,50,60,1,False
59,Arcanine,Fire,,555,90,110,80,100,80,95,1,False
60,Poliwag,Water,,300,40,50,40,40,40,90,1,False
61,Poliwhirl,Water,,385,65,65,65,50,50,90,1,False
62,Poliwrath,Water,Fighting,510,90,95,95,70,90,70,1,False
63,Abra,Psychic,,310,25,20,15,105,55,90,1,False
64,Kadabra,Psychic,,400,40,35,30,120,70,105,1,False
65,Alakazam,Psychic,,500,55,50,45,135,95,120,1,False
65,AlakazamMega Alakazam,Psychic,,590,55,50,65,175,95,150,1,False
66,Machop,Fighting,,305,70,80,50,35,35,35,1,False
67,Machoke,Fighting,,405,80,100,70,50,60,45,1,False
68,Machamp,Fighting,,505,90,130,80,65,85,55,1,False
69,Bellsprout,Grass,Poison,300,50,75,35,70,30,40,1,False
70,Weepinbell,Grass,Poison,390,65,90,50,85,45,55,1,False
71,Victreebel,Grass,Poison,490,80,105,65,100,70,70,1,False
72,Tentacool,Water,Poison,335,40,40,35,50,100,70,1,False
73,Tentacruel,Water,Poison,515,80,70,65,80,120,100,1,False
74,Geodude,Rock,Ground,300,40,80,100,30,30,20,1,False
75,Graveler,Rock,Ground,390,55,95,115,45,45,35,1,False
76,Golem,Rock,Ground,495,80,120,130,55,65,45,1,False
77,Ponyta,Fire,,410,50,85,55,65,65,90,1,False
78,Rapidash,Fire,,500,65,100,70,80,80,105,1,False
79,Slowpoke,Water,Psychic,315,90,65,65,40,40,15,1,False
80,Slowbro,Water,Psychic,490,95,75,110,100,80,30,1,False
80,SlowbroMega Slowbro,Water,Psychic,590,95,75,180,130,80,30,1,False
81,Magnemite,Electric,Steel,325,25,35,70,95,55,45,1,False
82,Magneton,Electric,Steel,465,50,60,95,120,70,70,1,False
83,Farfetch'd,Normal,Flying,352,52,65,55,58,62,60,1,False
84,Doduo,Normal,Flying,310,35,85,45,35,35,75,1,False
85,Dodrio,Normal,Flying,460,60,110,70,60,60,100,1,False
86,Seel,Water,,325,65,45,55,45,70,45,1,False
87,Dewgong,Water,Ice,475,90,70,80,70,95,70,1,False
88,Grimer,Poison,,325,80,80,50,40,50,25,1,False
89,Muk,Poison,,500,105,105,75,65,100,50,1,False
90,Shellder,Water,,305,30,65,100,45,25,40,1,False
91,Cloyster,Water,Ice,525,50,95,180,85,45,70,1,False
92,Gastly,Ghost,Poison,310,30,35,30,100,35,80,1,False
93,Haunter,Ghost,Poison,405,45,50,45,115,55,95,1,False
94,Gengar,Ghost,Poison,500,60,65,60,130,75,110,1,False
94,GengarMega Gengar,Ghost,Poison,600,60,65,80,170,95,130,1,False
95,Onix,Rock,Ground,385,35,45,160,30,45,70,1,False
96,Drowzee,Psychic,,328,60,48,45,43,90,42,1,False
97,Hypno,Psychic,,483,85,73,70,73,115,67,1,False
98,Krabby,Water,,325,30,105,90,25,25,50,1,False
99,Kingler,Water,,475,55,130,115,50,50,75,1,False
100,Voltorb,Electric,,330,40,30,50,55,55,100,1,False
101,Electrode,Electric,,480,60,50,70,80,80,140,1,False
102,Exeggcute,Grass,Psychic,325,60,40,80,60,45,40,1,False
103,Exeggutor,Grass,Psychic,520,95,95,85,125,65,55,1,False
104,Cubone,Ground,,320,50,50,95,40,50,35,1,False
105,Marowak,Ground,,425,60,80,110,50,80,45,1,False
106,Hitmonlee,Fighting,,455,50,120,53,35,110,87,1,False
107,Hitmonchan,Fighting,,455,50,105,79,35,110,76,1,False
108,Lickitung,Normal,,385,90,55,75,60,75,30,1,False
109,Koffing,Poison,,340,40,65,95,60,45,35,1,False
110,Weezing,Poison,,490,65,90,120,85,70,60,1,False
111,Rhyhorn,Ground,Rock,345,80,85,95,30,30,25,1,False
112,Rhydon,Ground,Rock,485,105,130,120,45,45,40,1,False
113,Chansey,Normal,,450,250,5,5,35,105,50,1,False
114,Tangela,Grass,,435,65,55,115,100,40,60,1,False
115,Kangaskhan,Normal,,490,105,95,80,40,80,90,1,False
115,KangaskhanMega Kangaskhan,Normal,,590,105,125,100,60,100,100,1,False
116,Horsea,Water,,295,30,40,70,70,25,60,1,False
117,Seadra,Water,,440,55,65,95,95,45,85,1,False
118,Goldeen,Water,,320,45,67,60,35,50,63,1,False
119,Seaking,Water,,450,80,92,65,65,80,68,1,False
120,Staryu,Water,,340,30,45,55,70,55,85,1,False
121,Starmie,Water,Psychic,520,60,75,85,100,85,115,1,False
122,Mr. Mime,Psychic,Fairy,460,40,45,65,100,120,90,1,False
123,Scyther,Bug,Flying,500,70,110,80,55,80,105,1,False
124,Jynx,Ice,Psychic,455,65,50,35,115,95,95,1,False
125,Electabuzz,Electric,,490,65,83,57,95,85,105,1,False
126,Magmar,Fire,,495,65,95,57,100,85,93,1,False
127,Pinsir,Bug,,500,65,125,100,55,70,85,1,False
127,PinsirMega Pinsir,Bug,Flying,600,65,155,120,65,90,105,1,False
128,Tauros,Normal,,490,75,100,95,40,70,110,1,False
129,Magikarp,Water,,200,20,10,55,15,20,80,1,False
130,Gyarados,Water,Flying,540,95,125,79,60,100,81,1,False
130,GyaradosMega Gyarados,Water,Dark,640,95,155,109,70,130,81,1,False
131,Lapras,Water,Ice,535,130,85,80,85,95,60,1,False
132,Ditto,Normal,,288,48,48,48,48,48,48,1,False
133,Eevee,Normal,,325,55,55,50,45,65,55,1,False
134,Vaporeon,Water,,525,130,65,60,110,95,65,1,False
135,Jolteon,Electric,,525,65,65,60,110,95,130,1,False
136,Flareon,Fire,,525,65,130,60,95,110,65,1,False
137,Porygon,Normal,,395,65,60,70,85,75,40,1,False
138,Omanyte,Rock,Water,355,35,40,100,90,55,35,1,False
139,Omastar,Rock,Water,495,70,60,125,115,70,55,1,False
140,Kabuto,Rock,Water,355,30,80,90,55,45,55,1,False
141,Kabutops,Rock,Water,495,60,115,105,65,70,80,1,False
142,Aerodactyl,Rock,Flying,515,80,105,65,60,75,130,1,False
142,AerodactylMega Aerodactyl,Rock,Flying,615,80,135,85,70,95,150,1,False
143,Snorlax,Normal,,540,160,110,65,65,110,30,1,False
144,Articuno,Ice,Flying,580,90,85,100,95,125,85,1,True
145,Zapdos,Electric,Flying,580,90,90,85,125,90,100,1,True
146,Moltres,Fire,Flying,580,90,100,90,125,85,90,1,True
147,Dratini,Dragon,,300,41,64,45,50,50,50,1,False
148,Dragonair,Dragon,,420,61,84,65,70,70,70,1,False
149,Dragonite,Dragon,Flying,600,91,134,95,100,100,80,1,False
150,Mewtwo,Psychic,,680,106,110,90,154,90,130,1,True
150,MewtwoMega Mewtwo X,Psychic,Fighting,780,106,190,100,154,100,130,1,True
150,MewtwoMega Mewtwo Y,Psychic,,780,106,150,70,194,120,140,1,True
151,Mew,Psychic,,600,100,100,100,100,100,100,1,False
152,Chikorita,Grass,,318,45,49,65,49,65,45,2,False
153,Bayleef,Grass,,405,60,62,80,63,80,60,2,False
154,Meganium,Grass,,525,80,82,100,83,100,80,2,False
155,Cyndaquil,Fire,,309,39,52,43,60,50,65,2,False
156,Quilava,Fire,,405,58,64,58,80,65,80,2,False
157,Typhlosion,Fire,,534,78,84,78,109,85,100,2,False
158,Totodile,Water,,314,50,65,64,44,48,43,2,False
159,Croconaw,Water,,405,65,80,80,59,63,58,2,False
160,Feraligatr,Water,,530,85,105,100,79,83,78,2,False
161,Sentret,Normal,,215,35,46,34,35,45,20,2,False
162,Furret,Normal,,415,85,76,64,45,55,90,2,False
163,Hoothoot,Normal,Flying,262,60,30,30,36,56,50,2,False
164,Noctowl,Normal,Flying,442,100,50,50,76,96,70,2,False
165,Ledyba,Bug,Flying,265,40,20,30,40,80,55,2,False
166,Ledian,Bug,Flying,390,55,35,50,55,110,85,2,False
167,Spinarak,Bug,Poison,250,40,60,40,40,40,30,2,False
168,Ariados,Bug,Poison,390,70,90,70,60,60,40,2,False
169,Crobat,Poison,Flying,535,85,90,80,70,80,130,2,False
170,Chinchou,Water,Electric,330,75,38,38,56,56,67,2,False
171,Lanturn,Water,Electric,460,125,58,58,76,76,67,2,False
172,Pichu,Electric,,205,20,40,15,35,35,60,2,False
173,Cleffa,Fairy,,218,50,25,28,45,55,15,2,False
174,Igglybuff,Normal,Fairy,210,90,30,15,40,20,15,2,False
175,Togepi,Fairy,,245,35,20,65,40,65,20,2,False
176,Togetic,Fairy,Flying,405,55,40,85,80,105,40,2,False
177,Natu,Psychic,Flying,320,40,50,45,70,45,70,2,False
178,Xatu,Psychic,Flying,470,65,75,70,95,70,95,2,False
179,Mareep,Electric,,280,55,40,40,65,45,35,2,False
180,Flaaffy,Electric,,365,70,55,55,80,60,45,2,False
181,Ampharos,Electric,,510,90,75,85,115,90,55,2,False
181,AmpharosMega Ampharos,Electric,Dragon,610,90,95,105,165,110,45,2,False
182,Bellossom,Grass,,490,75,80,95,90,100,50,2,False
183,Marill,Water,Fairy,250,70,20,50,20,50,40,2,False
184,Azumarill,Water,Fairy,420,100,50,80,60,80,50,2,False
185,Sudowoodo,Rock,,410,70,100,115,30,65,30,2,False
186,Politoed,Water,,500,90,75,75,90,100,70,2,False
187,Hoppip,Grass,Flying,250,35,35,40,35,55,50,2,False
188,Skiploom,Grass,Flying,340,55,45,50,45,65,80,2,False
189,Jumpluff,Grass,Flying,460,75,55,70,55,95,110,2,False
190,Aipom,Normal,,360,55,70,55,40,55,85,2,False
191,Sunkern,Grass,,180,30,30,30,30,30,30,2,False
192,Sunflora,Grass,,425,75,75,55,105,85,30,2,False
193,Yanma,Bug,Flying,390,65,65,45,75,45,95,2,False
194,Wooper,Water,Ground,210,55,45,45,25,25,15,2,False
195,Quagsire,Water,Ground,430,95,85,85,65,65,35,2,False
196,Espeon,Psychic,,525,65,65,60,130,95,110,2,False
197,Umbreon,Dark,,525,95,65,110,60,130,65,2,False
198,Murkrow,Dark,Flying,405,60,85,42,85,42,91,2,False
199,Slowking,Water,Psychic,490,95,75,80,100,110,30,2,False
200,Misdreavus,Ghost,,435,60,60,60,85,85,85,2,False
201,Unown,Psychic,,336,48,72,48,72,48,48,2,False
202,Wobbuffet,Psychic,,405,190,33,58,33,58,33,2,False
203,Girafarig,Normal,Psychic,455,70,80,65,90,65,85,2,False
204,Pineco,Bug,,290,50,65,90,35,35,15,2,False
205,Forretress,Bug,Steel,465,75,90,140,60,60,40,2,False
206,Dunsparce,Normal,,415,100,70,70,65,65,45,2,False
207,Gligar,Ground,Flying,430,65,75,105,35,65,85,2,False
208,Steelix,Steel,Ground,510,75,85,200,55,65,30,2,False
208,SteelixMega Steelix,Steel,Ground,610,75,125,230,55,95,30,2,False
209,Snubbull,Fairy,,300,60,80,50,40,40,30,2,False
210,Granbull,Fairy,,450,90,120,75,60,60,45,2,False
211,Qwilfish,Water,Poison,430,65,95,75,55,55,85,2,False
212,Scizor,Bug,Steel,500,70,130,100,55,80,65,2,False
212,ScizorMega Scizor,Bug,Steel,600,70,150,140,65,100,75,2,False
213,Shuckle,Bug,Rock,505,20,10,230,10,230,5,2,False
214,Heracross,Bug,Fighting,500,80,125,75,40,95,85,2,False
214,HeracrossMega Heracross,Bug,Fighting,600,80,185,115,40,105,75,2,False
215,Sneasel,Dark,Ice,430,55,95,55,35,75,115,2,False
216,Teddiursa,Normal,,330,60,80,50,50,50,40,2,False
217,Ursaring,Normal,,500,90,130,75,75,75,55,2,False
218,Slugma,Fire,,250,40,40,40,70,40,20,2,False
219,Magcargo,Fire,Rock,410,50,50,120,80,80,30,2,False
220,Swinub,Ice,Ground,250,50,50,40,30,30,50,2,False
221,Piloswine,Ice,Ground,450,100,100,80,60,60,50,2,False
222,Corsola,Water,Rock,380,55,55,85,65,85,35,2,False
223,Remoraid,Water,,300,35,65,35,65,35,65,2,False
224,Octillery,Water,,480,75,105,75,105,75,45,2,False
225,Delibird,Ice,Flying,330,45,55,45,65,45,75,2,False
226,Mantine,Water,Flying,465,65,40,70,80,140,70,2,False
227,Skarmory,Steel,Flying,465,65,80,140,40,70,70,2,False
228,Houndour,Dark,Fire,330,45,60,30,80,50,65,2,False
229,Houndoom,Dark,Fire,500,75,90,50,110,80,95,2,False
229,HoundoomMega Houndoom,Dark,Fire,600,75,90,90,140,90,115,2,False
230,Kingdra,Water,Dragon,540,75,95,95,95,95,85,2,False
231,Phanpy,Ground,,330,90,60,60,40,40,40,2,False
232,Donphan,Ground,,500,90,120,120,60,60,50,2,False
233,Porygon2,Normal,,515,85,80,90,105,95,60,2,False
234,Stantler,Normal,,465,73,95,62,85,65,85,2,False
235,Smeargle,Normal,,250,55,20,35,20,45,75,2,False
236,Tyrogue,Fighting,,210,35,35,35,35,35,35,2,False
237,Hitmontop,Fighting,,455,50,95,95,35,110,70,2,False
238,Smoochum,Ice,Psychic,305,45,30,15,85,65,65,2,False
239,Elekid,Electric,,360,45,63,37,65,55,95,2,False
240,Magby,Fire,,365,45,75,37,70,55,83,2,False
241,Miltank,Normal,,490,95,80,105,40,70,100,2,False
242,Blissey,Normal,,540,255,10,10,75,135,55,2,False
243,Raikou,Electric,,580,90,85,75,115,100,115,2,True
244,Entei,Fire,,580,115,115,85,90,75,100,2,True
245,Suicune,Water,,580,100,75,115,90,115,85,2,True
246,Larvitar,Rock,Ground,300,50,64,50,45,50,41,2,False
247,Pupitar,Rock,Ground,410,70,84,70,65,70,51,2,False
248,Tyranitar,Rock,Dark,600,100,134,110,95,100,61,2,False
248,TyranitarMega Tyranitar,Rock,Dark,700,100,164,150,95,120,71,2,False
249,Lugia,Psychic,Flying,680,106,90,130,90,154,110,2,True
250,Ho-oh,Fire,Flying,680,106,130,90,110,154,90,2,True
251,Celebi,Psychic,Grass,600,100,100,100,100,100,100,2,False
252,Treecko,Grass,,310,40,45,35,65,55,70,3,False
253,Grovyle,Grass,,405,50,65,45,85,65,95,3,False
254,Sceptile,Grass,,530,70,85,65,105,85,120,3,False
254,SceptileMega Sceptile,Grass,Dragon,630,70,110,75,145,85,145,3,False
255,Torchic,Fire,,310,45,60,40,70,50,45,3,False
256,Combusken,Fire,Fighting,405,60,85,60,85,60,55,3,False
257,Blaziken,Fire,Fighting,530,80,120,70,110,70,80,3,False
257,BlazikenMega Blaziken,Fire,Fighting,630,80,160,80,130,80,100,3,False
258,Mudkip,Water,,310,50,70,50,50,50,40,3,False
259,Marshtomp,Water,Ground,405,70,85,70,60,70,50,3,False
260,Swampert,Water,Ground,535,100,110,90,85,90,60,3,False
260,SwampertMega Swampert,Water,Ground,635,100,150,110,95,110,70,3,False
261,Poochyena,Dark,,220,35,55,35,30,30,35,3,False
262,Mightyena,Dark,,420,70,90,70,60,60,70,3,False
263,Zigzagoon,Normal,,240,38,30,41,30,41,60,3,False
264,Linoone,Normal,,420,78,70,61,50,61,100,3,False
265,Wurmple,Bug,,195,45,45,35,20,30,20,3,False
266,Silcoon,Bug,,205,50,35,55,25,25,15,3,False
267,Beautifly,Bug,Flying,395,60,70,50,100,50,65,3,False
268,Cascoon,Bug,,205,50,35,55,25,25,15,3,False
269,Dustox,Bug,Poison,385,60,50,70,50,90,65,3,False
270,Lotad,Water,Grass,220,40,30,30,40,50,30,3,False
271,Lombre,Water,Grass,340,60,50,50,60,70,50,3,False
272,Ludicolo,Water,Grass,480,80,70,70,90,100,70,3,False
273,Seedot,Grass,,220,40,40,50,30,30,30,3,False
274,Nuzleaf,Grass,Dark,340,70,70,40,60,40,60,3,False
275,Shiftry,Grass,Dark,480,90,100,60,90,60,80,3,False
276,Taillow,Normal,Flying,270,40,55,30,30,30,85,3,False
277,Swellow,Normal,Flying,430,60,85,60,50,50,125,3,False
278,Wingull,Water,Flying,270,40,30,30,55,30,85,3,False
279,Pelipper,Water,Flying,430,60,50,100,85,70,65,3,False
280,Ralts,Psychic,Fairy,198,28,25,25,45,35,40,3,False
281,Kirlia,Psychic,Fairy,278,38,35,35,65,55,50,3,False
282,Gardevoir,Psychic,Fairy,518,68,65,65,125,115,80,3,False
282,GardevoirMega Gardevoir,Psychic,Fairy,618,68,85,65,165,135,100,3,False
283,Surskit,Bug,Water,269,40,30,32,50,52,65,3,False
284,Masquerain,Bug,Flying,414,70,60,62,80,82,60,3,False
285,Shroomish,Grass,,295,60,40,60,40,60,35,3,False
286,Breloom,Grass,Fighting,460,60,130,80,60,60,70,3,False
287,Slakoth,Normal,,280,60,60,60,35,35,30,3,False
288,Vigoroth,Normal,,440,80,80,80,55,55,90,3,False
289,Slaking,Normal,,670,150,160,100,95,65,100,3,False
290,Nincada,Bug,Ground,266,31,45,90,30,30,40,3,False
291,Ninjask,Bug,Flying,456,61,90,45,50,50,160,3,False
292,Shedinja,Bug,Ghost,236,1,90,45,30,30,40,3,False
293,Whismur,Normal,,240,64,51,23,51,23,28,3,False
294,Loudred,Normal,,360,84,71,43,71,43,48,3,False
295,Exploud,Normal,,490,104,91,63,91,73,68,3,False
296,Makuhita,Fighting,,237,72,60,30,20,30,25,3,False
297,Hariyama,Fighting,,474,144,120,60,40,60,50,3,False
298,Azurill,Normal,Fairy,190,50,20,40,20,40,20,3,False
299,Nosepass,Rock,,375,30,45,135,45,90,30,3,False
300,Skitty,Normal,,260,50,45,45,35,35,50,3,False
301,Delcatty,Normal,,380,70,65,65,55,55,70,3,False
302,Sableye,Dark,Ghost,380,50,75,75,65,65,50,3,False
302,SableyeMega Sableye,Dark,Ghost,480,50,85,125,85,115,20,3,False
303,Mawile,Steel,Fairy,380,50,85,85,55,55,50,3,False
303,MawileMega Mawile,Steel,Fairy,480,50,105,125,55,95,50,3,False
304,Aron,Steel,Rock,330,50,70,100,40,40,30,3,False
305,Lairon,Steel,Rock,430,60,90,140,50,50,40,3,False
306,Aggron,Steel,Rock,530,70,110,180,60,60,50,3,False
306,AggronMega Aggron,Steel,,630,70,140,230,60,80,50,3,False
307,Meditite,Fighting,Psychic,280,30,40,55,40,55,60,3,False
308,Medicham,Fighting,Psychic,410,60,60,75,60,75,80,3,False
308,MedichamMega Medicham,Fighting,Psychic,510,60,100,85,80,85,100,3,False
309,Electrike,Electric,,295,40,45,40,65,40,65,3,False
310,Manectric,Electric,,475,70,75,60,105,60,105,3,False
310,ManectricMega Manectric,Electric,,575,70,75,80,135,80,135,3,False
311,Plusle,Electric,,405,60,50,40,85,75,95,3,False
312,Minun,Electric,,405,60,40,50,75,85,95,3,False
313,Volbeat,Bug,,400,65,73,55,47,75,85,3,False
314,Illumise,Bug,,400,65,47,55,73,75,85,3,False
315,Roselia,Grass,Poison,400,50,60,45,100,80,65,3,False
316,Gulpin,Poison,,302,70,43,53,43,53,40,3,False
317,Swalot,Poison,,467,100,73,83,73,83,55,3,False
318,Carvanha,Water,Dark,305,45,90,20,65,20,65,3,False
319,Sharpedo,Water,Dark,460,70,120,40,95,40,95,3,False
319,SharpedoMega Sharpedo,Water,Dark,560,70,140,70,110,65,105,3,False
320,Wailmer,Water,,400,130,70,35,70,35,60,3,False
321,Wailord,Water,,500,170,90,45,90,45,60,3,False
322,Numel,Fire,Ground,305,60,60,40,65,45,35,3,False
323,Camerupt,Fire,Ground,460,70,100,70,105,75,40,3,False
323,CameruptMega Camerupt,Fire,Ground,560,70,120,100,145,105,20,3,False
324,Torkoal,Fire,,470,70,85,140,85,70,20,3,False
325,Spoink,Psychic,,330,60,25,35,70,80,60,3,False
326,Grumpig,Psychic,,470,80,45,65,90,110,80,3,False
327,Spinda,Normal,,360,60,60,60,60,60,60,3,False
328,Trapinch,Ground,,290,45,100,45,45,45,10,3,False
329,Vibrava,Ground,Dragon,340,50,70,50,50,50,70,3,False
330,Flygon,Ground,Dragon,520,80,100,80,80,80,100,3,False
331,Cacnea,Grass,,335,50,85,40,85,40,35,3,False
332,Cacturne,Grass,Dark,475,70,115,60,115,60,55,3,False
333,Swablu,Normal,Flying,310,45,40,60,40,75,50,3,False
334,Altaria,Dragon,Flying,490,75,70,90,70,105,80,3,False
334,AltariaMega Altaria,Dragon,Fairy,590,75,110,110,110,105,80,3,False
335,Zangoose,Normal,,458,73,115,60,60,60,90,3,False
336,Seviper,Poison,,458,73,100,60,100,60,65,3,False
337,Lunatone,Rock,Psychic,440,70,55,65,95,85,70,3,False
338,Solrock,Rock,Psychic,440,70,95,85,55,65,70,3,False
339,Barboach,Water,Ground,288,50,48,43,46,41,60,3,False
340,Whiscash,Water,Ground,468,110,78,73,76,71,60,3,False
341,Corphish,Water,,308,43,80,65,50,35,35,3,False
342,Crawdaunt,Water,Dark,468,63,120,85,90,55,55,3,False
343,Baltoy,Ground,Psychic,300,40,40,55,40,70,55,3,False
344,Claydol,Ground,Psychic,500,60,70,105,70,120,75,3,False
345,Lileep,Rock,Grass,355,66,41,77,61,87,23,3,False
346,Cradily,Rock,Grass,495,86,81,97,81,107,43,3,False
347,Anorith,Rock,Bug,355,45,95,50,40,50,75,3,False
348,Armaldo,Rock,Bug,495,75,125,100,70,80,45,3,False
349,Feebas,Water,,200,20,15,20,10,55,80,3,False
350,Milotic,Water,,540,95,60,79,100,125,81,3,False
351,Castform,Normal,,420,70,70,70,70,70,70,3,False
352,Kecleon,Normal,,440,60,90,70,60,120,40,3,False
353,Shuppet,Ghost,,295,44,75,35,63,33,45,3,False
354,Banette,Ghost,,455,64,115,65,83,63,65,3,False
354,BanetteMega Banette,Ghost,,555,64,165,75,93,83,75,3,False
355,Duskull,Ghost,,295,20,40,90,30,90,25,3,False
356,Dusclops,Ghost,,455,40,70,130,60,130,25,3,False
357,Tropius,Grass,Flying,460,99,68,83,72,87,51,3,False
358,Chimecho,Psychic,,425,65,50,70,95,80,65,3,False
359,Absol,Dark,,465,65,130,60,75,60,75,3,False
359,AbsolMega Absol,Dark,,565,65,150,60,115,60,115,3,False
360,Wynaut,Psychic,,260,95,23,48,23,48,23,3,False
361,Snorunt,Ice,,300,50,50,50,50,50,50,3,False
362,Glalie,Ice,,480,80,80,80,80,80,80,3,False
362,GlalieMega Glalie,Ice,,580,80,120,80,120,80,100,3,False
363,Spheal,Ice,Water,290,70,40,50,55,50,25,3,False
364,Sealeo,Ice,Water,410,90,60,70,75,70,45,3,False
365,Walrein,Ice,Water,530,110,80,90,95,90,65,3,False
366,Clamperl,Water,,345,35,64,85,74,55,32,3,False
367,Huntail,Water,,485,55,104,105,94,75,52,3,False
368,Gorebyss,Water,,485,55,84,105,114,75,52,3,False
369,Relicanth,Water,Rock,485,100,90,130,45,65,55,3,False
370,Luvdisc,Water,,330,43,30,55,40,65,97,3,False
371,Bagon,Dragon,,300,45,75,60,40,30,50,3,False
372,Shelgon,Dragon,,420,65,95,100,60,50,50,3,False
373,Salamence,Dragon,Flying,600,95,135,80,110,80,100,3,False
373,SalamenceMega Salamence,Dragon,Flying,700,95,145,130,120,90,120,3,False
374,Beldum,Steel,Psychic,300,40,55,80,35,60,30,3,False
375,Metang,Steel,Psychic,420,60,75,100,55,80,50,3,False
376,Metagross,Steel,Psychic,600,80,135,130,95,90,70,3,False
376,MetagrossMega Metagross,Steel,Psychic,700,80,145,150,105,110,110,3,False
377,Regirock,Rock,,580,80,100,200,50,100,50,3,True
378,Regice,Ice,,580,80,50,100,100,200,50,3,True
379,Registeel,Steel,,580,80,75,150,75,150,50,3,True
380,Latias,Dragon,Psychic,600,80,80,90,110,130,110,3,True
380,LatiasMega Latias,Dragon,Psychic,700,80,100,120,140,150,110,3,True
381,Latios,Dragon,Psychic,600,80,90,80,130,110,110,3,True
381,LatiosMega Latios,Dragon,Psychic,700,80,130,100,160,120,110,3,True
382,Kyogre,Water,,670,100,100,90,150,140,90,3,True
382,KyogrePrimal Kyogre,Water,,770,100,150,90,180,160,90,3,True
383,Groudon,Ground,,670,100,150,140,100,90,90,3,True
383,GroudonPrimal Groudon,Ground,Fire,770,100,180,160,150,90,90,3,True
384,Rayquaza,Dragon,Flying,680,105,150,90,150,90,95,3,True
384,RayquazaMega Rayquaza,Dragon,Flying,780,105,180,100,180,100,115,3,True
385,Jirachi,Steel,Psychic,600,100,100,100,100,100,100,3,True
386,DeoxysNormal Forme,Psychic,,600,50,150,50,150,50,150,3,True
386,DeoxysAttack Forme,Psychic,,600,50,180,20,180,20,150,3,True
386,DeoxysDefense Forme,Psychic,,600,50,70,160,70,160,90,3,True
386,DeoxysSpeed Forme,Psychic,,600,50,95,90,95,90,180,3,True
387,Turtwig,Grass,,318,55,68,64,45,55,31,4,False
388,Grotle,Grass,,405,75,89,85,55,65,36,4,False
389,Torterra,Grass,Ground,525,95,109,105,75,85,56,4,False
390,Chimchar,Fire,,309,44,58,44,58,44,61,4,False
391,Monferno,Fire,Fighting,405,64,78,52,78,52,81,4,False
392,Infernape,Fire,Fighting,534,76,104,71,104,71,108,4,False
393,Piplup,Water,,314,53,51,53,61,56,40,4,False
394,Prinplup,Water,,405,64,66,68,81,76,50,4,False
395,Empoleon,Water,Steel,530,84,86,88,111,101,60,4,False
396,Starly,Normal,Flying,245,40,55,30,30,30,60,4,False
397,Staravia,Normal,Flying,340,55,75,50,40,40,80,4,False
398,Staraptor,Normal,Flying,485,85,120,70,50,60,100,4,False
399,Bidoof,Normal,,250,59,45,40,35,40,31,4,False
400,Bibarel,Normal,Water,410,79,85,60,55,60,71,4,False
401,Kricketot,Bug,,194,37,25,41,25,41,25,4,False
402,Kricketune,Bug,,384,77,85,51,55,51,65,4,False
403,Shinx,Electric,,263,45,65,34,40,34,45,4,False
404,Luxio,Electric,,363,60,85,49,60,49,60,4,False
405,Luxray,Electric,,523,80,120,79,95,79,70,4,False
406,Budew,Grass,Poison,280,40,30,35,50,70,55,4,False
407,Roserade,Grass,Poison,515,60,70,65,125,105,90,4,False
408,Cranidos,Rock,,350,67,125,40,30,30,58,4,False
409,Rampardos,Rock,,495,97,165,60,65,50,58,4,False
410,Shieldon,Rock,Steel,350,30,42,118,42,88,30,4,False
411,Bastiodon,Rock,Steel,495,60,52,168,47,138,30,4,False
412,Burmy,Bug,,224,40,29,45,29,45,36,4,False
413,WormadamPlant Cloak,Bug,Grass,424,60,59,85,79,105,36,4,False
413,WormadamSandy Cloak,Bug,Ground,424,60,79,105,59,85,36,4,False
413,WormadamTrash Cloak,Bug,Steel,424,60,69,95,69,95,36,4,False
414,Mothim,Bug,Flying,424,70,94,50,94,50,66,4,False
415,Combee,Bug,Flying,244,30,30,42,30,42,70,4,False
416,Vespiquen,Bug,Flying,474,70,80,102,80,102,40,4,False
417,Pachirisu,Electric,,405,60,45,70,45,90,95,4,False
418,Buizel,Water,,330,55,65,35,60,30,85,4,False
419,Floatzel,Water,,495,85,105,55,85,50,115,4,False
420,Cherubi,Grass,,275,45,35,45,62,53,35,4,False
421,Cherrim,Grass,,450,70,60,70,87,78,85,4,False
422,Shellos,Water,,325,76,48,48,57,62,34,4,False
423,Gastrodon,Water,Ground,475,111,83,68,92,82,39,4,False
424,Ambipom,Normal,,482,75,100,66,60,66,115,4,False
425,Drifloon,Ghost,Flying,348,90,50,34,60,44,70,4,False
426,Drifblim,Ghost,Flying,498,150,80,44,90,54,80,4,False
427,Buneary,Normal,,350,55,66,44,44,56,85,4,False
428,Lopunny,Normal,,480,65,76,84,54,96,105,4,False
428,LopunnyMega Lopunny,Normal,Fighting,580,65,136,94,54,96,135,4,False
429,Mismagius,Ghost,,495,60,60,60,105,105,105,4,False
430,Honchkrow,Dark,Flying,505,100,125,52,105,52,71,4,False
431,Glameow,Normal,,310,49,55,42,42,37,85,4,False
432,Purugly,Normal,,452,71,82,64,64,59,112,4,False
433,Chingling,Psychic,,285,45,30,50,65,50,45,4,False
434,Stunky,Poison,Dark,329,63,63,47,41,41,74,4,False
435,Skuntank,Poison,Dark,479,103,93,67,71,61,84,4,False
436,Bronzor,Steel,Psychic,300,57,24,86,24,86,23,4,False
437,Bronzong,Steel,Psychic,500,67,89,116,79,116,33,4,False
438,Bonsly,Rock,,290,50,80,95,10,45,10,4,False
439,Mime Jr.,Psychic,Fairy,310,20,25,45,70,90,60,4,False
440,Happiny,Normal,,220,100,5,5,15,65,30,4,False
441,Chatot,Normal,Flying,411,76,65,45,92,42,91,4,False
442,Spiritomb,Ghost,Dark,485,50,92,108,92,108,35,4,False
443,Gible,Dragon,Ground,300,58,70,45,40,45,42,4,False
444,Gabite,Dragon,Ground,410,68,90,65,50,55,82,4,False
445,Garchomp,Dragon,Ground,600,108,130,95,80,85,102,4,False
445,GarchompMega Garchomp,Dragon,Ground,700,108,170,115,120,95,92,4,False
446,Munchlax,Normal,,390,135,85,40,40,85,5,4,False
447,Riolu,Fighting,,285,40,70,40,35,40,60,4,False
448,Lucario,Fighting,Steel,525,70,110,70,115,70,90,4,False
448,LucarioMega Lucario,Fighting,Steel,625,70,145,88,140,70,112,4,False
449,Hippopotas,Ground,,330,68,72,78,38,42,32,4,False
450,Hippowdon,Ground,,525,108,112,118,68,72,47,4,False
451,Skorupi,Poison,Bug,330,40,50,90,30,55,65,4,False
452,Drapion,Poison,Dark,500,70,90,110,60,75,95,4,False
453,Croagunk,Poison,Fighting,300,48,61,40,61,40,50,4,False
454,Toxicroak,Poison,Fighting,490,83,106,65,86,65,85,4,False
455,Carnivine,Grass,,454,74,100,72,90,72,46,4,False
456,Finneon,Water,,330,49,49,56,49,61,66,4,False
457,Lumineon,Water,,460,69,69,76,69,86,91,4,False
458,Mantyke,Water,Flying,345,45,20,50,60,120,50,4,False
459,Snover,Grass,Ice,334,60,62,50,62,60,40,4,False
460,Abomasnow,Grass,Ice,494,90,92,75,92,85,60,4,False
460,AbomasnowMega Abomasnow,Grass,Ice,594,90,132,105,132,105,30,4,False
461,Weavile,Dark,Ice,510,70,120,65,45,85,125,4,False
462,Magnezone,Electric,Steel,535,70,70,115,130,90,60,4,False
463,Lickilicky,Normal,,515,110,85,95,80,95,50,4,False
464,Rhyperior,Ground,Rock,535,115,140,130,55,55,40,4,False
465,Tangrowth,Grass,,535,100,100,125,110,50,50,4,False
466,Electivire,Electric,,540,75,123,67,95,85,95,4,False
467,Magmortar,Fire,,540,75,95,67,125,95,83,4,False
468,Togekiss,Fairy,Flying,545,85,50,95,120,115,80,4,False
469,Yanmega,Bug,Flying,515,86,76,86,116,56,95,4,False
470,Leafeon,Grass,,525,65,110,130,60,65,95,4,False
471,Glaceon,Ice,,525,65,60,110,130,95,65,4,False
472,Gliscor,Ground,Flying,510,75,95,125,45,75,95,4,False
473,Mamoswine,Ice,Ground,530,110,130,80,70,60,80,4,False
474,Porygon-Z,Normal,,535,85,80,70,135,75,90,4,False
475,Gallade,Psychic,Fighting,518,68,125,65,65,115,80,4,False
475,GalladeMega Gallade,Psychic,Fighting,618,68,165,95,65,115,110,4,False
476,Probopass,Rock,Steel,525,60,55,145,75,150,40,4,False
477,Dusknoir,Ghost,,525,45,100,135,65,135,45,4,False
478,Froslass,Ice,Ghost,480,70,80,70,80,70,110,4,False
479,Rotom,Electric,Ghost,440,50,50,77,95,77,91,4,False
479,RotomHeat Rotom,Electric,Fire,520,50,65,107,105,107,86,4,False
479,RotomWash Rotom,Electric,Water,520,50,65,107,105,107,86,4,False
479,RotomFrost Rotom,Electric,Ice,520,50,65,107,105,107,86,4,False
479,RotomFan Rotom,Electric,Flying,520,50,65,107,105,107,86,4,False
479,RotomMow Rotom,Electric,Grass,520,50,65,107,105,107,86,4,False
480,Uxie,Psychic,,580,75,75,130,75,130,95,4,True
481,Mesprit,Psychic,,580,80,105,105,105,105,80,4,True
482,Azelf,Psychic,,580,75,125,70,125,70,115,4,True
483,Dialga,Steel,Dragon,680,100,120,120,150,100,90,4,True
484,Palkia,Water,Dragon,680,90,120,100,150,120,100,4,True
485,Heatran,Fire,Steel,600,91,90,106,130,106,77,4,True
486,Regigigas,Normal,,670,110,160,110,80,110,100,4,True
487,GiratinaAltered Forme,Ghost,Dragon,680,150,100,120,100,120,90,4,True
487,GiratinaOrigin Forme,Ghost,Dragon,680,150,120,100,120,100,90,4,True
488,Cresselia,Psychic,,600,120,70,120,75,130,85,4,False
489,Phione,Water,,480,80,80,80,80,80,80,4,False
490,Manaphy,Water,,600,100,100,100,100,100,100,4,False
491,Darkrai,Dark,,600,70,90,90,135,90,125,4,True
492,ShayminLand Forme,Grass,,600,100,100,100,100,100,100,4,True
492,ShayminSky Forme,Grass,Flying,600,100,103,75,120,75,127,4,True
493,Arceus,Normal,,720,120,120,120,120,120,120,4,True
494,Victini,Psychic,Fire,600,100,100,100,100,100,100,5,True
495,Snivy,Grass,,308,45,45,55,45,55,63,5,False
496,Servine,Grass,,413,60,60,75,60,75,83,5,False
497,Serperior,Grass,,528,75,75,95,75,95,113,5,False
498,Tepig,Fire,,308,65,63,45,45,45,45,5,False
499,Pignite,Fire,Fighting,418,90,93,55,70,55,55,5,False
500,Emboar,Fire,Fighting,528,110,123,65,100,65,65,5,False
501,Oshawott,Water,,308,55,55,45,63,45,45,5,False
502,Dewott,Water,,413,75,75,60,83,60,60,5,False
503,Samurott,Water,,528,95,100,85,108,70,70,5,False
504,Patrat,Normal,,255,45,55,39,35,39,42,5,False
505,Watchog,Normal,,420,60,85,69,60,69,77,5,False
506,Lillipup,Normal,,275,45,60,45,25,45,55,5,False
507,Herdier,Normal,,370,65,80,65,35,65,60,5,False
508,Stoutland,Normal,,500,85,110,90,45,90,80,5,False
509,Purrloin,Dark,,281,41,50,37,50,37,66,5,False
510,Liepard,Dark,,446,64,88,50,88,50,106,5,False
511,Pansage,Grass,,316,50,53,48,53,48,64,5,False
512,Simisage,Grass,,498,75,98,63,98,63,101,5,False
513,Pansear,Fire,,316,50,53,48,53,48,64,5,False
514,Simisear,Fire,,498,75,98,63,98,63,101,5,False
515,Panpour,Water,,316,50,53,48,53,48,64,5,False
516,Simipour,Water,,498,75,98,63,98,63,101,5,False
517,Munna,Psychic,,292,76,25,45,67,55,24,5,False
518,Musharna,Psychic,,487,116,55,85,107,95,29,5,False
519,Pidove,Normal,Flying,264,50,55,50,36,30,43,5,False
520,Tranquill,Normal,Flying,358,62,77,62,50,42,65,5,False
521,Unfezant,Normal,Flying,488,80,115,80,65,55,93,5,False
522,Blitzle,Electric,,295,45,60,32,50,32,76,5,False
523,Zebstrika,Electric,,497,75,100,63,80,63,116,5,False
524,Roggenrola,Rock,,280,55,75,85,25,25,15,5,False
525,Boldore,Rock,,390,70,105,105,50,40,20,5,False
526,Gigalith,Rock,,515,85,135,130,60,80,25,5,False
527,Woobat,Psychic,Flying,313,55,45,43,55,43,72,5,False
528,Swoobat,Psychic,Flying,425,67,57,55,77,55,114,5,False
529,Drilbur,Ground,,328,60,85,40,30,45,68,5,False
530,Excadrill,Ground,Steel,508,110,135,60,50,65,88,5,False
531,Audino,Normal,,445,103,60,86,60,86,50,5,False
531,AudinoMega Audino,Normal,Fairy,545,103,60,126,80,126,50,5,False
532,Timburr,Fighting,,305,75,80,55,25,35,35,5,False
533,Gurdurr,Fighting,,405,85,105,85,40,50,40,5,False
534,Conkeldurr,Fighting,,505,105,140,95,55,65,45,5,False
535,Tympole,Water,,294,50,50,40,50,40,64,5,False
536,Palpitoad,Water,Ground,384,75,65,55,65,55,69,5,False
537,Seismitoad,Water,Ground,509,105,95,75,85,75,74,5,False
538,Throh,Fighting,,465,120,100,85,30,85,45,5,False
539,Sawk,Fighting,,465,75,125,75,30,75,85,5,False
540,Sewaddle,Bug,Grass,310,45,53,70,40,60,42,5,False
541,Swadloon,Bug,Grass,380,55,63,90,50,80,42,5,False
542,Leavanny,Bug,Grass,500,75,103,80,70,80,92,5,False
543,Venipede,Bug,Poison,260,30,45,59,30,39,57,5,False
544,Whirlipede,Bug,Poison,360,40,55,99,40,79,47,5,False
545,Scolipede,Bug,Poison,485,60,100,89,55,69,112,5,False
546,Cottonee,Grass,Fairy,280,40,27,60,37,50,66,5,False
547,Whimsicott,Grass,Fairy,480,60,67,85,77,75,116,5,False
548,Petilil,Grass,,280,45,35,50,70,50,30,5,False
549,Lilligant,Grass,,480,70,60,75,110,75,90,5,False
550,Basculin,Water,,460,70,92,65,80,55,98,5,False
551,Sandile,Ground,Dark,292,50,72,35,35,35,65,5,False
552,Krokorok,Ground,Dark,351,60,82,45,45,45,74,5,False
553,Krookodile,Ground,Dark,519,95,117,80,65,70,92,5,False
554,Darumaka,Fire,,315,70,90,45,15,45,50,5,False
555,DarmanitanStandard Mode,Fire,,480,105,140,55,30,55,95,5,False
555,DarmanitanZen Mode,Fire,Psychic,540,105,30,105,140,105,55,5,False
556,Maractus,Grass,,461,75,86,67,106,67,60,5,False
557,Dwebble,Bug,Rock,325,50,65,85,35,35,55,5,False
558,Crustle,Bug,Rock,475,70,95,125,65,75,45,5,False
559,Scraggy,Dark,Fighting,348,50,75,70,35,70,48,5,False
560,Scrafty,Dark,Fighting,488,65,90,115,45,115,58,5,False
561,Sigilyph,Psychic,Flying,490,72,58,80,103,80,97,5,False
562,Yamask,Ghost,,303,38,30,85,55,65,30,5,False
563,Cofagrigus,Ghost,,483,58,50,145,95,105,30,5,False
564,Tirtouga,Water,Rock,355,54,78,103,53,45,22,5,False
565,Carracosta,Water,Rock,495,74,108,133,83,65,32,5,False
566,Archen,Rock,Flying,401,55,112,45,74,45,70,5,False
567,Archeops,Rock,Flying,567,75,140,65,112,65,110,5,False
568,Trubbish,Poison,,329,50,50,62,40,62,65,5,False
569,Garbodor,Poison,,474,80,95,82,60,82,75,5,False
570,Zorua,Dark,,330,40,65,40,80,40,65,5,False
571,Zoroark,Dark,,510,60,105,60,120,60,105,5,False
572,Minccino,Normal,,300,55,50,40,40,40,75,5,False
573,Cinccino,Normal,,470,75,95,60,65,60,115,5,False
574,Gothita,Psychic,,290,45,30,50,55,65,45,5,False
575,Gothorita,Psychic,,390,60,45,70,75,85,55,5,False
576,Gothitelle,Psychic,,490,70,55,95,95,110,65,5,False
577,Solosis,Psychic,,290,45,30,40,105,50,20,5,False
578,Duosion,Psychic,,370,65,40,50,125,60,30,5,False
579,Reuniclus,Psychic,,490,110,65,75,125,85,30,5,False
580,Ducklett,Water,Flying,305,62,44,50,44,50,55,5,False
581,Swanna,Water,Flying,473,75,87,63,87,63,98,5,False
582,Vanillite,Ice,,305,36,50,50,65,60,44,5,False
583,Vanillish,Ice,,395,51,65,65,80,75,59,5,False
584,Vanilluxe,Ice,,535,71,95,85,110,95,79,5,False
585,Deerling,Normal,Grass,335,60,60,50,40,50,75,5,False
586,Sawsbuck,Normal,Grass,475,80,100,70,60,70,95,5,False
587,Emolga,Electric,Flying,428,55,75,60,75,60,103,5,False
588,Karrablast,Bug,,315,50,75,45,40,45,60,5,False
589,Escavalier,Bug,Steel,495,70,135,105,60,105,20,5,False
590,Foongus,Grass,Poison,294,69,55,45,55,55,15,5,False
591,Amoonguss,Grass,Poison,464,114,85,70,85,80,30,5,False
592,Frillish,Water,Ghost,335,55,40,50,65,85,40,5,False
593,Jellicent,Water,Ghost,480,100,60,70,85,105,60,5,False
594,Alomomola,Water,,470,165,75,80,40,45,65,5,False
595,Joltik,Bug,Electric,319,50,47,50,57,50,65,5,False
596,Galvantula,Bug,Electric,472,70,77,60,97,60,108,5,False
597,Ferroseed,Grass,Steel,305,44,50,91,24,86,10,5,False
598,Ferrothorn,Grass,Steel,489,74,94,131,54,116,20,5,False
599,Klink,Steel,,300,40,55,70,45,60,30,5,False
600,Klang,Steel,,440,60,80,95,70,85,50,5,False
601,Klinklang,Steel,,520,60,100,115,70,85,90,5,False
602,Tynamo,Electric,,275,35,55,40,45,40,60,5,False
603,Eelektrik,Electric,,405,65,85,70,75,70,40,5,False
604,Eelektross,Electric,,515,85,115,80,105,80,50,5,False
605,Elgyem,Psychic,,335,55,55,55,85,55,30,5,False
606,Beheeyem,Psychic,,485,75,75,75,125,95,40,5,False
607,Litwick,Ghost,Fire,275,50,30,55,65,55,20,5,False
608,Lampent,Ghost,Fire,370,60,40,60,95,60,55,5,False
609,Chandelure,Ghost,Fire,520,60,55,90,145,90,80,5,False
610,Axew,Dragon,,320,46,87,60,30,40,57,5,False
611,Fraxure,Dragon,,410,66,117,70,40,50,67,5,False
612,Haxorus,Dragon,,540,76,147,90,60,70,97,5,False
613,Cubchoo,Ice,,305,55,70,40,60,40,40,5,False
614,Beartic,Ice,,485,95,110,80,70,80,50,5,False
615,Cryogonal,Ice,,485,70,50,30,95,135,105,5,False
616,Shelmet,Bug,,305,50,40,85,40,65,25,5,False
617,Accelgor,Bug,,495,80,70,40,100,60,145,5,False
618,Stunfisk,Ground,Electric,471,109,66,84,81,99,32,5,False
619,Mienfoo,Fighting,,350,45,85,50,55,50,65,5,False
620,Mienshao,Fighting,,510,65,125,60,95,60,105,5,False
621,Druddigon,Dragon,,485,77,120,90,60,90,48,5,False
622,Golett,Ground,Ghost,303,59,74,50,35,50,35,5,False
623,Golurk,Ground,Ghost,483,89,124,80,55,80,55,5,False
624,Pawniard,Dark,Steel,340,45,85,70,40,40,60,5,False
625,Bisharp,Dark,Steel,490,65,125,100,60,70,70,5,False
626,Bouffalant,Normal,,490,95,110,95,40,95,55,5,False
627,Rufflet,Normal,Flying,350,70,83,50,37,50,60,5,False
628,Braviary,Normal,Flying,510,100,123,75,57,75,80,5,False
629,Vullaby,Dark,Flying,370,70,55,75,45,65,60,5,False
630,Mandibuzz,Dark,Flying,510,110,65,105,55,95,80,5,False
631,Heatmor,Fire,,484,85,97,66,105,66,65,5,False
632,Durant,Bug,Steel,484,58,109,112,48,48,109,5,False
633,Deino,Dark,Dragon,300,52,65,50,45,50,38,5,False
634,Zweilous,Dark,Dragon,420,72,85,70,65,70,58,5,False
635,Hydreigon,Dark,Dragon,600,92,105,90,125,90,98,5,False
636,Larvesta,Bug,Fire,360,55,85,55,50,55,60,5,False
637,Volcarona,Bug,Fire,550,85,60,65,135,105,100,5,False
638,Cobalion,Steel,Fighting,580,91,90,129,90,72,108,5,True
639,Terrakion,Rock,Fighting,580,91,129,90,72,90,108,5,True
640,Virizion,Grass,Fighting,580,91,90,72,90,129,108,5,True
641,TornadusIncarnate Forme,Flying,,580,79,115,70,125,80,111,5,True
641,TornadusTherian Forme,Flying,,580,79,100,80,110,90,121,5,True
642,ThundurusIncarnate Forme,Electric,Flying,580,79,115,70,125,80,111,5,True
642,ThundurusTherian Forme,Electric,Flying,580,79,105,70,145,80,101,5,True
643,Reshiram,Dragon,Fire,680,100,120,100,150,120,90,5,True
644,Zekrom,Dragon,Electric,680,100,150,120,120,100,90,5,True
645,LandorusIncarnate Forme,Ground,Flying,600,89,125,90,115,80,101,5,True
645,LandorusTherian Forme,Ground,Flying,600,89,145,90,105,80,91,5,True
646,Kyurem,Dragon,Ice,660,125,130,90,130,90,95,5,True
646,KyuremBlack Kyurem,Dragon,Ice,700,125,170,100,120,90,95,5,True
646,KyuremWhite Kyurem,Dragon,Ice,700,125,120,90,170,100,95,5,True
647,KeldeoOrdinary Forme,Water,Fighting,580,91,72,90,129,90,108,5,False
647,KeldeoResolute Forme,Water,Fighting,580,91,72,90,129,90,108,5,False
648,MeloettaAria Forme,Normal,Psychic,600,100,77,77,128,128,90,5,False
648,MeloettaPirouette Forme,Normal,Fighting,600,100,128,90,77,77,128,5,False
649,Genesect,Bug,Steel,600,71,120,95,120,95,99,5,False
650,Chespin,Grass,,313,56,61,65,48,45,38,6,False
651,Quilladin,Grass,,405,61,78,95,56,58,57,6,False
652,Chesnaught,Grass,Fighting,530,88,107,122,74,75,64,6,False
653,Fennekin,Fire,,307,40,45,40,62,60,60,6,False
654,Braixen,Fire,,409,59,59,58,90,70,73,6,False
655,Delphox,Fire,Psychic,534,75,69,72,114,100,104,6,False
656,Froakie,Water,,314,41,56,40,62,44,71,6,False
657,Frogadier,Water,,405,54,63,52,83,56,97,6,False
658,Greninja,Water,Dark,530,72,95,67,103,71,122,6,False
659,Bunnelby,Normal,,237,38,36,38,32,36,57,6,False
660,Diggersby,Normal,Ground,423,85,56,77,50,77,78,6,False
661,Fletchling,Normal,Flying,278,45,50,43,40,38,62,6,False
662,Fletchinder,Fire,Flying,382,62,73,55,56,52,84,6,False
663,Talonflame,Fire,Flying,499,78,81,71,74,69,126,6,False
664,Scatterbug,Bug,,200,38,35,40,27,25,35,6,False
665,Spewpa,Bug,,213,45,22,60,27,30,29,6,False
666,Vivillon,Bug,Flying,411,80,52,50,90,50,89,6,False
667,Litleo,Fire,Normal,369,62,50,58,73,54,72,6,False
668,Pyroar,Fire,Normal,507,86,68,72,109,66,106,6,False
669,Flabébé,Fairy,,303,44,38,39,61,79,42,6,False
670,Floette,Fairy,,371,54,45,47,75,98,52,6,False
671,Florges,Fairy,,552,78,65,68,112,154,75,6,False
672,Skiddo,Grass,,350,66,65,48,62,57,52,6,False
673,Gogoat,Grass,,531,123,100,62,97,81,68,6,False
674,Pancham,Fighting,,348,67,82,62,46,48,43,6,False
675,Pangoro,Fighting,Dark,495,95,124,78,69,71,58,6,False
676,Furfrou,Normal,,472,75,80,60,65,90,102,6,False
677,Espurr,Psychic,,355,62,48,54,63,60,68,6,False
678,MeowsticMale,Psychic,,466,74,48,76,83,81,104,6,False
678,MeowsticFemale,Psychic,,466,74,48,76,83,81,104,6,False
679,Honedge,Steel,Ghost,325,45,80,100,35,37,28,6,False
680,Doublade,Steel,Ghost,448,59,110,150,45,49,35,6,False
681,AegislashBlade Forme,Steel,Ghost,520,60,150,50,150,50,60,6,False
681,AegislashShield Forme,Steel,Ghost,520,60,50,150,50,150,60,6,False
682,Spritzee,Fairy,,341,78,52,60,63,65,23,6,False
683,Aromatisse,Fairy,,462,101,72,72,99,89,29,6,False
684,Swirlix,Fairy,,341,62,48,66,59,57,49,6,False
685,Slurpuff,Fairy,,480,82,80,86,85,75,72,6,False
686,Inkay,Dark,Psychic,288,53,54,53,37,46,45,6,False
687,Malamar,Dark,Psychic,482,86,92,88,68,75,73,6,False
688,Binacle,Rock,Water,306,42,52,67,39,56,50,6,False
689,Barbaracle,Rock,Water,500,72,105,115,54,86,68,6,False
690,Skrelp,Poison,Water,320,50,60,60,60,60,30,6,False
691,Dragalge,Poison,Dragon,494,65,75,90,97,123,44,6,False
692,Clauncher,Water,,330,50,53,62,58,63,44,6,False
693,Clawitzer,Water,,500,71,73,88,120,89,59,6,False
694,Helioptile,Electric,Normal,289,44,38,33,61,43,70,6,False
695,Heliolisk,Electric,Normal,481,62,55,52,109,94,109,6,False
696,Tyrunt,Rock,Dragon,362,58,89,77,45,45,48,6,False
697,Tyrantrum,Rock,Dragon,521,82,121,119,69,59,71,6,False
698,Amaura,Rock,Ice,362,77,59,50,67,63,46,6,False
699,Aurorus,Rock,Ice,521,123,77,72,99,92,58,6,False
700,Sylveon,Fairy,,525,95,65,65,110,130,60,6,False
701,Hawlucha,Fighting,Flying,500,78,92,75,74,63,118,6,False
702,Dedenne,Electric,Fairy,431,67,58,57,81,67,101,6,False
703,Carbink,Rock,Fairy,500,50,50,150,50,150,50,6,False
704,Goomy,Dragon,,300,45,50,35,55,75,40,6,False
705,Sliggoo,Dragon,,452,68,75,53,83,113,60,6,False
706,Goodra,Dragon,,600,90,100,70,110,150,80,6,False
707,Klefki,Steel,Fairy,470,57,80,91,80,87,75,6,False
708,Phantump,Ghost,Grass,309,43,70,48,50,60,38,6,False
709,Trevenant,Ghost,Grass,474,85,110,76,65,82,56,6,False
710,PumpkabooAverage Size,Ghost,Grass,335,49,66,70,44,55,51,6,False
710,PumpkabooSmall Size,Ghost,Grass,335,44,66,70,44,55,56,6,False
710,PumpkabooLarge Size,Ghost,Grass,335,54,66,70,44,55,46,6,False
710,PumpkabooSuper Size,Ghost,Grass,335,59,66,70,44,55,41,6,False
711,GourgeistAverage Size,Ghost,Grass,494,65,90,122,58,75,84,6,False
711,GourgeistSmall Size,Ghost,Grass,494,55,85,122,58,75,99,6,False
711,GourgeistLarge Size,Ghost,Grass,494,75,95,122,58,75,69,6,False
711,GourgeistSuper Size,Ghost,Grass,494,85,100,122,58,75,54,6,False
712,Bergmite,Ice,,304,55,69,85,32,35,28,6,False
713,Avalugg,Ice,,514,95,117,184,44,46,28,6,False
714,Noibat,Flying,Dragon,245,40,30,35,45,40,55,6,False
715,Noivern,Flying,Dragon,535,85,70,80,97,80,123,6,False
716,Xerneas,Fairy,,680,126,131,95,131,98,99,6,True
717,Yveltal,Dark,Flying,680,126,131,95,131,98,99,6,True
718,Zygarde50% Forme,Dragon,Ground,600,108,100,121,81,95,95,6,True
719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True
721,Volcanion,Fire,Water,600,80,110,120,130,90,70,6,True
PCA
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt
# 降维也有很多种方法,这个只是其中的一种。PCA降维
# 因子分析也是降维的一种。
# (1)读取数据
df = pd.read_csv('Pokemon.csv', encoding='utf-8')
# Renaming one column for clarity
# columns = df.columns.tolist()
# columns[0] = 'id'
# df.columns = columns
print(df)
# 考虑这六个特征 这六个特征(唯度) 降为4个维度。
cols = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
# print(df.head())
# 各个特征的值之间差的比较大,需要标准化处理。值差的越大,标准化处理的作用越明显。
# scaler = StandardScaler().fit(df[cols])
# (2)标准化数据
df_scaled = StandardScaler().fit_transform(df[cols])
# (3)n_components=4 设置需要降维之后的维度。
# 维度压的越小,损失的信息越多。
pca = PCA(n_components=4)
pca.fit(df_scaled)
print("1*****************")
print(pca.transform(df_scaled))
# [[-1.5563747 -0.02148212 0.66650392 0.18417628]
# [-0.36286656 -0.05026854 0.66791349 0.2692545 ]
# [ 1.28015158 -0.06272022 0.62391394 0.33139136]
# ...
# [ 2.45821626 -0.51588158 1.11019708 0.81838576]
# [ 3.5303971 -0.95106516 0.40828385 0.18071018]
# [ 2.23378629 0.53762985 0.24332015 -0.34570673]]
pcscores = pd.DataFrame(pca.transform(df_scaled))
print("2*****************")
print(pcscores.head(), pcscores.shape, pcscores.columns)
# pcscores.head()的结果
# 0 1 2 3
# 0 -1.556375 -0.021482 0.666504 0.184176
# 1 -0.362867 -0.050269 0.667913 0.269254
# 2 1.280152 -0.062720 0.623914 0.331391
# 3 2.620916 0.704263 0.995538 -0.199321
# 4 -1.758284 -0.706179 0.411454 -0.268602
# pcscores.shape 的结果 (800, 4)
# pcscores.columns 的结果 RangeIndex(start=0, stop=4, step=1)
# PC1 PC2 PC3 PC4
pcscores.columns = ['PC' + str(i + 1) for i in range(len(pcscores.columns))]
print("3*****************")
print(pca.components_)
# 新的4个维度,和原来6个维度的关系
# 比如新的第一个维度 和 HP的0.38988584关系,要小于'Attack'的0.43925373相关系数
# [[ 0.38988584 0.43925373 0.36374733 0.45716229 0.4485704 0.33544048]
# [ 0.08483455 -0.01182493 0.62878867 -0.30541446 0.2390967 -0.66846305]
# [-0.47192614 -0.59415339 0.06933913 0.30561186 0.56559403 0.07851327]
# [ 0.71769131 -0.4058359 -0.41923734 0.14751659 0.18544475 -0.29716251]]
# (3)_2 loadings主要看这个值 (将上面的值更友好打出来)
loadings = pd.DataFrame(pca.components_, columns=cols)
print("4*****************")
print(loadings)
# HP Attack Defense Sp. Atk Sp. Def Speed
# 0 0.389886 0.439254 0.363747 0.457162 0.448570 0.335440
# 1 0.084835 -0.011825 0.628789 -0.305414 0.239097 -0.668463
# 2 -0.471926 -0.594153 0.069339 0.305612 0.565594 0.078513
# 3 0.717691 -0.405836 -0.419237 0.147517 0.185445 -0.297163
loadings.index = ['PC' + str(i + 1) for i in range(len(pcscores.columns))]
print("5*****************")
print(pd.DataFrame(df_scaled).head(), df_scaled.shape)
# pd.DataFrame(df_scaled).head()的结果
# 0 1 2 3 4 5
# 0 -0.950626 -0.924906 -0.797154 -0.239130 -0.248189 -0.801503
# 1 -0.362822 -0.524130 -0.347917 0.219560 0.291156 -0.285015
# 2 0.420917 0.092448 0.293849 0.831146 1.010283 0.403635
# 3 0.420917 0.647369 1.577381 1.503891 1.729409 0.403635
# 4 -1.185748 -0.832419 -0.989683 -0.392027 -0.787533 -0.112853
# df_scaled.shape的结果
# (800, 6)
# (4)新维度和原维度的关系图(这个是线性降维)
# 由图可看出
# 高防低速度的宠物 PC2值更高.
# 高 Sp. Defense低攻 PC3更高
# 高HP 低攻防 PC4更高
ax = sns.heatmap(loadings.transpose(), center=0, linewidths=0.5,
cmap="RdBu", vmin=-1, vmax=1, annot=True)
ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=0)
ax.set_yticklabels(ax.yaxis.get_majorticklabels(), rotation=0)
plt.show()
# print(loadings)
# 上面loadings.transpose()的值
# HP Attack Defense Sp. Atk Sp. Def Speed
# 0 0.389886 0.439254 0.363747 0.457162 0.448570 0.335440
# 1 0.084835 -0.011825 0.628789 -0.305414 0.239097 -0.668463
# 2 -0.471926 -0.594153 0.069339 0.305612 0.565594 0.078513
# 3 0.717691 -0.405836 -0.419237 0.147517 0.185445 -0.297163
6.预测
基础数据
职位名,公司,工作地点,薪资,经验,学历,url
高级大数据开发工程师,亚信科技(中国)有限公司,济南,1.5-2万/月,5-7年经验,,https://jobs.job.com/jinan/101210613.html?s=01&t=0
大数据建模及大数据开发工程师,亚信科技(中国)有限公司,北京-海淀区,1.8-2.5万/月,无工作经验,,https://jobs.job.com/beijing-hdq/103836258.html?s=01&t=0
6.1 案例1
'''
用
chapter8中51jobs2.py
抓取的csv数据,通过经验,学历,地点预测职位工资
用哑变量, 比较麻烦
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # 画3D图
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['SimHei']
rtn = None
def _salary(salary):
global rtn
'''薪资统一以月计算,取均值'''
# 6-8千/月 2-6万/月 2.5-5万/月 0.8-1.5万/月 150元/天 20-30万/年
if salary.endswith('万/月'):
salary = salary.replace('万/月', '')
s = salary.split('-')
rtn = (float(s[0]) + float(s[1])) * 10000 / 2
elif salary.endswith('千/月'):
salary = salary.replace('千/月', '')
s = salary.split('-')
rtn = (float(s[0]) + float(s[1])) * 1000 / 2
elif salary.endswith('万/年'):
salary = salary.replace('万/年', '')
s = salary.split('-')
rtn = (float(s[0]) + float(s[1])) * 10000 / 2 / 12
elif salary.endswith('元/天'):
salary = salary.replace('元/天', '')
rtn = float(salary) * 21.75
# print(rtn)
return rtn
def _exp(exp):
global rtn
if exp == '无工作经验':
rtn = 0
else:
rtn = int(exp[:1])
return rtn
# print(_exp('无工作经验'))
def _area(area):
global rtn
if '-' in area:
rtn = area.split('-')[0]
elif area == '异地招聘':
rtn = None
else:
rtn = area
return rtn
# print(_area('杭州'))
# (1) 读取文件
df = pd.read_csv(r'51jobs.csv', encoding='gbk')
df = df.dropna()
df = df[['工作地点', '经验', '学历', '薪资']]
# (2)数据清洗
df['薪资'] = df['薪资'].apply(_salary) # series用map,或apply dataframe用applymap
df['经验'] = df['经验'].map(_exp)
df['工作地点'] = df['工作地点'].map(_area)
df = df.dropna()
# print(df['工作地点'].value_counts())
'''
数据探索 start
-------------------------------------------------------------
-------------------------------------------------------------
'''
def salary_hist():
bins = range(0, 55555, 5000) # 范围0到200,每个柱的宽度20
data = df["薪资"]
# data = df[df['工作地点']=='上海']['薪资']
plt.hist(data, bins, color='#3333ee', width=4000)
plt.xlabel('薪资')
plt.ylabel('计数')
plt.plot()
# 平均值
plt.axvline(df['薪资'].mean(), linestyle='dashed', color='red')
plt.show()
# salary_hist()
def salary_boxplot():
top_areas = df['工作地点'].value_counts()[:5] # 上海,北京,深圳,广州,杭州
df1 = df[df['工作地点'].isin(top_areas.index)] # 只要这五个城市的值,进行分析
# sns.boxplot(data=df1, y='薪资', x='工作地点')
# sns.boxplot(data=df1, y='薪资', x='经验')
sns.boxplot(data=df1, y='薪资', x='学历')
plt.ylim(0, 55555) # y轴的取值返回
plt.show()
# salary_boxplot()
'''
-------------------------------------------------------------
-------------------------------------------------------------
数据探索 end
'''
# 学历用哑变量 将学历该列的不同类型 展开成多个字段 . 该列是谁 谁就是1 其他都是0
# ['中专 0' '中技0' '初中及以下0' '博士0' '大专0' '本科1' '硕士0' '高中0']
# le = LabelEncoder() # 这个预测出来学历不同结果却一样,所以用哑变量
# df['学历'] = le.fit_transform(df['学历'].values)
# print("*****************用哑变量**********************")
xueli = pd.get_dummies(df['学历'])
df = pd.concat([xueli, df], axis=1).drop('学历', axis=1)
# print(xueli)
# print(df) # 字段变为 :中专 中技 初中及以下 博士 大专 本科 硕士 高中 工作地点 经验 薪资
# 工作地点用哑变量
area = pd.get_dummies(df['工作地点'])
df = pd.concat([area, df], axis=1).drop('工作地点', axis=1)
# print(area)
# print(df) # 字段变为:上海 东莞 中山 乌鲁木齐 佛山 兰州 包头 ... 博士 大专 本科 硕士 高中 经验 薪资
# (2) 确定训练的 自变量x 因变量y
X = df.drop('薪资', axis=1) # 上海 东莞 中山 乌鲁木齐 佛山 兰州 包头 ... 博士 大专 本科 硕士 高中 经验
y = df['薪资'] # 薪资
# (3)70%的数据训练,30%的数据测试
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, test_size=0.3)
# print(X_test)
model = LinearRegression()
model.fit(X_train, y_train)
# # 预测
# y_pred = model.predict(X_test)
# # 打印特征与结果
# for i, y in enumerate(y_pred):
# xi = X_test.iloc[i]
# print(xi[xi > 0], y)
# 重庆 1
# 大专 1
# 经验 3
# Name: 2980, dtype: int64 11472.0
# 上海 1
# 本科 1
# 经验 3
# Name: 1812, dtype: int64 20952.0
# 自己构建一个函数,来预测具体给定的值
def predict_salary(area, xueli, exp):
x = X_test.iloc[0].copy() # 随便取一行,构建一个特征的结构
x[:] = 0
x['经验'] = exp
x[area] = 1
x[xueli] = 1
# 字段变为:上海 东莞 中山 乌鲁木齐 佛山 兰州 包头 ... 博士 大专 本科 硕士 高中 经验 薪资
# print("x赋值后的值", x) 使用哑变量的赋值形式
x = x.values.reshape(1, -1)
# print("x的reshape后的值", x)
pred = model.predict(x)
print('地区:{} 学历:{} 经验:{} 薪资:{}'.format(area, xueli, exp, pred[0]))
# predict_salary('北京', '大专', 1)
# predict_salary('北京', '本科', 1)
# predict_salary('上海', '本科', 1)
# predict_salary('北京', '大专', 2)
# predict_salary('北京', '本科', 2)
# predict_salary('上海', '本科', 2)
# predict_salary('深圳', '本科', 2)
print("1**********************")
predict_salary('上海', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('上海', '本科', 2)
predict_salary('上海', '硕士', 2)
predict_salary('上海', '本科', 3)
predict_salary('上海', '硕士', 3)
# 用哑变量来预测的值要高些
# 地区:上海 学历:本科 经验:1 薪资:17215.0
# 地区:上海 学历:硕士 经验:1 薪资:19852.0
# 地区:上海 学历:本科 经验:2 薪资:19276.0
# 地区:上海 学历:硕士 经验:2 薪资:21913.0
# 地区:上海 学历:本科 经验:3 薪资:21338.0
# 地区:上海 学历:硕士 经验:3 薪资:23975.0
# 用label来预测的值要低些 但是一般是用自动label会出现问题 还是用自定义label好些
# [15925.69435697]
# [17255.06001189]
# [18207.61243563]
# [19536.97809055]
# [20489.5305143]
# [21818.89616922]
print("2**********************")
predict_salary('上海', '高中', 1)
predict_salary('上海', '中专', 1)
predict_salary('上海', '大专', 1)
predict_salary('上海', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('上海', '博士', 1)
# 地区:上海 学历:高中 经验:1 薪资:14616.0
# 地区:上海 学历:中专 经验:1 薪资:11432.0
# 地区:上海 学历:大专 经验:1 薪资:13400.0
# 地区:上海 学历:本科 经验:1 薪资:17024.0
# 地区:上海 学历:硕士 经验:1 薪资:20680.0
# 地区:上海 学历:博士 经验:1 薪资:50608.0
print("3***************")
predict_salary('上海', '高中', 1)
predict_salary('北京', '高中', 1)
predict_salary('上海', '中专', 1)
predict_salary('北京', '中专', 1)
predict_salary('上海', '大专', 1)
predict_salary('北京', '大专', 1)
predict_salary('上海', '本科', 1)
predict_salary('北京', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('北京', '硕士', 1)
predict_salary('上海', '博士', 1)
predict_salary('北京', '博士', 1)
# 用哑铃型变量会出现问题
# 地区:上海 学历:高中 经验:1 薪资:14616.0
# 地区:北京 学历:高中 经验:1 薪资:14104.0
# 地区:上海 学历:中专 经验:1 薪资:11432.0
# 地区:北京 学历:中专 经验:1 薪资:10920.0
# 地区:上海 学历:大专 经验:1 薪资:13400.0
# 地区:北京 学历:大专 经验:1 薪资:12888.0
# 地区:上海 学历:本科 经验:1 薪资:17024.0
# 地区:北京 学历:本科 经验:1 薪资:16512.0
# 地区:上海 学历:硕士 经验:1 薪资:20680.0
# 地区:北京 学历:硕士 经验:1 薪资:20168.0
# 地区:上海 学历:博士 经验:1 薪资:50608.0
# 地区:北京 学历:博士 经验:1 薪资:50096.0
6.2 案例2
'''
用
chapter8中51jobs2.py
抓取的csv数据,通过经验,学历,地点预测职位工资
用LabelEncoder
'''
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
rtn = None
def get_salary(origin):
'''# 0.5-1万/月 7-9千/月 20-30万/年 500元/天
统一以月薪计算,取平均
'''
global rtn
if origin.endswith('万/月'):
origin = origin.replace('万/月', '')
splited = origin.split('-')
# print(splited)
rtn = (float(splited[0]) + float(splited[1])) / 2 * 10000
elif origin.endswith('千/月'):
origin = origin.replace('千/月', '')
splited = origin.split('-')
rtn = (float(splited[0]) + float(splited[1])) / 2 * 1000
elif origin.endswith('万/年'):
origin = origin.replace('万/年', '')
splited = origin.split('-')
rtn = (float(splited[0]) + float(splited[1])) / 12 / 2 * 10000
elif origin.endswith('元/天'):
# origin = origin.replace('元/天', '')
# splited = origin.split('-')
rtn = float(origin.replace('元/天', '')) * 21.75
return rtn
# print(get_salary('500元/天')) # 10875.0
def get_area(origin):
global rtn
if '-' in origin:
rtn = origin.split('-')[0]
elif origin == '异地招聘':
rtn = None
else:
rtn = origin
return rtn
# print(get_area('北京-海淀区'))
def get_exp(origin):
global rtn
if origin == '无工作经验':
rtn = 0
else:
rtn = int(origin[:1])
return rtn
# 初中及以下 0 高中 1 中专2 中技 3 大专4 本科 5 硕士6 博士 7
def get_education(origin):
global rtn
if origin == '初中及以下':
rtn = 0
elif origin == '高中':
rtn = 1
elif origin == '中专':
rtn = 2
elif origin == '中技':
rtn = 3
elif origin == '大专':
rtn = 4
elif origin == '本科':
rtn = 5
elif origin == '硕士':
rtn = 6
elif origin == '博士':
rtn = 7
# 我一开始没有写返回值,当然是None
return rtn
# print(get_exp('5-7年经验'))
# (1) 读取文件
# df = pd.read_csv(r'E:\python_files\csv\51jobs.csv', encoding='gbk')
df = pd.read_csv('51jobs.csv', encoding='gbk')
df = df.dropna()
df = df[['工作地点', '经验', '学历', '薪资']]
# print(df.head(10))
# (2)数据清洗
df['薪资'] = df['薪资'].apply(get_salary)
df['经验'] = df['经验'].map(get_exp)
df['工作地点'] = df['工作地点'].apply(get_area)
df = df.dropna()
# print(df.head(10))
print(df['工作地点'].value_counts()) # 对不同工作地点进行汇总,排序
# 上海 559
# 北京 486
# 深圳 373
# 广州 354
# 杭州 237
# (3) 数据转换
# A.将工作地点 转换成标签
le_area = LabelEncoder()
df['工作地点'] = le_area.fit_transform(df['工作地点'].values)
# print(le_area.classes_) 0-87 地区也有不同地区薪资不同的情况,这样设计会有问题?
# ['上海' '东莞' '中山' '乌鲁木齐' '佛山' '兰州' '包头' '北京' '南京' '南宁' '南昌' '南通' '厦门' '合肥'
# '呼和浩特' '哈尔滨' '唐山' '商丘' '嘉兴' '大同' '大连' '天津' '太原' '娄底' '宁德' '宁波' '常熟' '广州'
# '徐州' '惠州' '成都' '承德' '无锡' '日照' '昆山' '昆明' '本溪' '来宾' '杭州' '枣庄' '柳州' '株洲'
# '桂林' '武汉' '池州' '沈阳' '泰安' '泰州' '洛阳' '济南' '济宁' '海口' '淄博' '深圳' '清远' '温州'
# '滨州' '漳州' '潍坊' '烟台' '玉林' '珠海' '盐城' '石家庄' '福州' '秦皇岛' '绵阳' '芜湖' '苏州' '茂名'
# '荆州' '荆门' '蚌埠' '西安' '许昌' '贵州省' '贵阳' '郑州' '重庆' '镇江' '长春' '长沙' '青岛' '鹰潭'
# '黄石' '黔南' '龙岩']
# print(le_area.inverse_transform(6))
# B.将学历转换成标签
print(df['学历'].value_counts())
# 通过自定义的函数打标签 结果不对
# 130和 192 行匹配改动 这种方法正确
df['学历'] = df['学历'].map(get_education)
# print(df.head(10))
# 132和 195行匹配改动 这种方法会出现问题
# le_xueli = LabelEncoder()
# df['学历'] = le_xueli.fit_transform(df['学历'].values)
# print(le_xueli.classes_) # ['中专 0' '中技1' '初中及以下2' '博士3' '大专4' '本科5' '硕士6' '高中7'] 这个标签自动(从0开始),不是很好
# print(list(le_xueli.classes_).index('高中')) # 7
# print(df.head(10))
# xueli = df['学历'].copy
#
# xueli[xueli == "中专"] == 1
# xueli[xueli == "高中"] == 2
# xueli[xueli == "大专"] == 3
# xueli[xueli == "本科"] == 4
# xueli[xueli == "硕士"] == 5
#
# df['学历'] = xueli
# (4) 线性回归
X = df[['工作地点', '学历', '经验']]
y = df['薪资']
# print(X)
# 工作地点 学历 经验
# 2 7 5 3
# 4 7 5 5
# 5 30 5 3
model = LinearRegression()
model.fit(X, y)
# (5)预测薪资 df[['工作地点', '学历', '经验']]
print("1****************")
print(model.predict([[4, 0, 1]]))
print(model.predict([[4, 1, 1]]))
print(model.predict([[2, 2, 1]]))
print(model.predict([[3, 3, 1]]))
print(model.predict([[2, 4, 1]]))
print(model.predict([[2, 5, 1]]))
# [9034.09953654]
# [10363.46519146]
# [11815.2141193]
# [13083.38813776]
# [14473.94542913]
# [15803.31108405]
# (5)_2 更加友好的界面预测
def predict_salary(area, xueli, exp):
"""
预测薪资函数
:param area: 地区
:param xueli: 学历
:param exp: 工作经验
:return:
"""
# label encoder后的数字就是这个类别在classes_中的下标
area = list(le_area.classes_).index(area) # 将输入的 地区 变成相应的下标
# 方法一:使用label方法 需要使用 (这样做会出现问题)
# xueli = list(le_xueli.classes_).index(xueli)
# 方法二:使用 自定义标签 需要使用 (这样做正确)
# 初中及以下 0 高中 1 中专2 中技 3 大专4 本科 5 硕士6 博士 7
xueli = list(['初中及以下', '高中', '中专', '中技', '大专', '本科', '硕士', '博士']).index(xueli)
n = model.predict([[area, xueli, exp]])
print(n)
print("2****************")
predict_salary('北京', '大专', 1)
predict_salary('北京', '本科', 1)
predict_salary('北京', '硕士', 1)
predict_salary('北京', '大专', 2)
predict_salary('北京', '本科', 2)
predict_salary('北京', '硕士', 2)
print("3*****************")
predict_salary('上海', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('上海', '本科', 2)
predict_salary('上海', '硕士', 2)
predict_salary('上海', '本科', 3)
predict_salary('上海', '硕士', 3)
# 方法一: 系统 给的label标签
# ['中专 0' '中技1' '初中及以下2' '博士3' '大专4' '本科5' '硕士6' '高中7']
# [15925.69435697]
# [17255.06001189]
# [18207.61243563]
# [19536.97809055]
# [20489.5305143]
# [21818.89616922]
# 方法二:使用 自定义标签 需要使用 这样就能够看出来权重?
# 初中及以下 0 高中 1 中专2 中技 3 大专4 本科 5 硕士6 博士 7
# [16238.68472635]
# [20058.00098448]
# [18405.92764002]
# [22225.24389814]
# [20573.17055369]
# [24392.48681181]
print("4***************")
predict_salary('上海', '高中', 1)
predict_salary('上海', '中专', 1)
predict_salary('上海', '大专', 1)
predict_salary('上海', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('上海', '博士', 1)
# [961.41969385]
# [4780.73595198]
# [12419.36846823]
# [16238.68472635]
# [20058.00098448]
# [23877.3172426]
# 如果使用系统定义的,完全会出现问题,高中的太高了.所以还是要自定义
# [18584.42566681]
# [9278.86608238]
# [14596.32870205]
# [15925.69435697]
# [17255.06001189]
# [13266.96304713]
print("5***************")
predict_salary('上海', '高中', 1)
predict_salary('北京', '高中', 1)
predict_salary('上海', '中专', 1)
predict_salary('北京', '中专', 1)
predict_salary('上海', '大专', 1)
predict_salary('北京', '大专', 1)
predict_salary('上海', '本科', 1)
predict_salary('北京', '本科', 1)
predict_salary('上海', '硕士', 1)
predict_salary('北京', '硕士', 1)
predict_salary('上海', '博士', 1)
predict_salary('北京', '博士', 1)
# [961.41969385]
# [604.80857057]
# [4780.73595198]
# [4424.1248287]
# [12419.36846823]
# [12062.75734495]
# [16238.68472635]
# [15882.07360307]
# [20058.00098448]
# [19701.3898612]
# [23877.3172426]
# [23520.70611932]
print("6********************")
predict_salary('乌鲁木齐', '高中', 1)
predict_salary('乌鲁木齐', '中专', 1)
predict_salary('乌鲁木齐', '大专', 1)
predict_salary('乌鲁木齐', '本科', 1)
predict_salary('乌鲁木齐', '硕士', 1)
predict_salary('乌鲁木齐', '博士', 1)
# [808.58635531]
# [4627.90261343]
# [12266.53512968]
# [16085.8513878]
# [19905.16764593]
# [23724.48390405]