阿里云天池大数据:【入门】精灵宝可梦数据集分析

目的

学习,实践,不同机器学习算法

使用的包及安装
pip install numpy
pip install Pandas
数据获取

阿里云天池大数据竞赛官网获取

莫某
引入包
import pandas as pd
import numpy as np
获取数据
poke = pd.read_csv('./pokemon0820.csv')
获取18项定向攻击的伤害指数
# 获取数据
against_ = poke.iloc[:,1:19]
# 转化为NumPy对象
against_.to_numpy()
获取宝可梦的捕捉几率
capture_rate = poke['capture_rate']
capture_rate.to_numpy()
获取宝可梦活跃指数
base_happiness = poke['base_happiness']
base_happiness.to_numpy()
获取基础攻击属性
attack = poke['attack']
attack.to_numpy()
获取基础防御属性
defense = poke['defense']
defense.to_numpy()
获取特殊攻击属性
sp_attack = poke['sp_attack']
sp_attack.to_numpy()
获取特殊防御属性
sp_defense = poke['sp_defense']
sp_defense.to_numpy()
获取基础速度属性
speed = poke['speed']
speed.to_numpy()
获取宝可梦为第几代
generation = poke['generation']
generation.to_numpy()
获取是否为传奇宝可梦
is_legendary = poke['is_legendary']
is_legendary.to_numpy()
获取hp
hp = poke['hp']
hp.to_numpy()
获取孵化阶段
base_egg_steps = poke['base_egg_steps']
base_egg_steps.to_numpy()
获取身高
height_m = poke['height_m']
height_m.to_numpy()
获取体重
weight_kg = poke['weight_kg']
weight_kg.to_numpy()
获取主属性
type1 = poke['type1']
type1.to_numpy()
获取副属性
type2 = poke['type2']
type2.to_numpy()
获取成长经历
experience_growth = poke['experience_growth']
experience_growth.to_numpy()
获取能力
abilities = poke['abilities']
abilities.to_numpy()
获取精灵类型
classification = poke['classification']
classification.to_numpy()
获取宝可梦为男性的比率(空白为无性别属性)
percentage_male = poke['percentage_male']
percentage_male.to_numpy()
获取宝可梦图鉴ID
pokedex_number = poke['pokedex_number']
pokedex_number.to_numpy()
获取宝可梦的日文名
japanese_name = poke['japanese_name']
japanese_name.to_numpy()
获取宝可梦的英文名
name = poke['name']
name.to_numpy()
解决的问题
通过决策树解决18项定向攻击的伤害指数判断其基础攻击属性
# 导入需要的包
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

# 引入数据
poke = pd.read_csv('./pokemon0820.csv')
# 18项定向攻击的伤害指数
against_ = poke.iloc[:,1:19]
against =  against_.to_numpy()
# 基础攻击属性
attack_ = poke['attack']
attack = attack_.to_numpy()

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(against, attack, test_size=0.23, random_state=32)

# 建立模型
DTCmodel = DecisionTreeClassifier()
DTCmodel.fit(X_train,y_train)
DTCmodel.predict(X_test)
DTCmodel.score(X_test,y_test)

# 决策树不适用 准确率很低
0.06486486486486487
通过线性判别分析解决18项定向攻击的伤害指数判断其特殊攻击属性
# 导入需要的包
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 
from sklearn.model_selection import train_test_split

# 引入数据
poke = pd.read_csv('./pokemon0820.csv')
# 18项定向攻击的伤害指数
against_ = poke.iloc[:,1:19]
against =  against_.to_numpy()
# 基础攻击属性
attack_ = poke['attack']
attack = attack_.to_numpy()

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(against, attack, test_size=0.23, random_state=32)

# 建立模型
LDAmodel = LinearDiscriminantAnalysis()
LDAmodel.fit(X_train,y_train)
LDAmodel.score(X_test,y_test)

# 线性判别分析
0.021621621621621623
通过对数几率回归解决18项定向攻击的伤害指数判断其特殊攻击属性
# 导入需要的包
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# 引入数据
poke = pd.read_csv('./pokemon0820.csv')
# 18项定向攻击的伤害指数
against_ = poke.iloc[:,1:19]
against =  against_.to_numpy()
# 基础攻击属性
attack_ = poke['attack']
attack = attack_.to_numpy()

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(against, attack, test_size=0.23, random_state=32)

# 建立模型
LRmodel = LogisticRegression(random_state=0, solver='newton-cg')
LRmodel.fit(X_train,y_train)
LRmodel.score(X_test,y_test)

# 对数几率回归
0.043243243243243246
通过神经网络回归模块解决18项定向攻击的伤害指数判断其特殊攻击属性
# 导入需要的包
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 引入数据
poke = pd.read_csv('./pokemon0820.csv')
# 18项定向攻击的伤害指数
against_ = poke.iloc[:,1:19]
against =  against_.to_numpy()
# 基础攻击属性
attack_ = poke['attack']
attack = attack_.to_numpy()

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(against, attack, test_size=0.23, random_state=32)

# 数据标准化
ss = StandardScaler()
ss.fit(X_train)
X_train = ss.transform(X_train)
X_test = ss.transform(X_test)

# 建立模型
mlpmodel = MLPRegressor(hidden_layer_sizes=(500,1000))
mlpmodel.fit(X_train,y_train)
mlpmodel.score(X_test,y_test,sample_weight=None)
0.159366755682658

# 模型2
mlpmodel = MLPRegressor(hidden_layer_sizes=(1000,1000))
mlpmodel.fit(X_train,y_train)
mlpmodel.score(X_test,y_test,sample_weight=None)
0.16249714098191714

  • 2
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值