一,获取王者信息存入LOL.csv
import requests,re
import pandas as pd
head = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3538.400 QQBrowser/9.6.12501.400'} #代理
url = 'http://www.laoyuegou.com/x/zh-cn/lol/lol/godrank.html?region=cn&area=1'
html_cn = requests.get(url,headers = head) #请求
ren = re.compile(r'class="cn-l.*?href="(.*?)">',re.S)
zhanqu_list_cn = re.findall(ren,html_cn.text) #正则匹配战区网址
rem = re.compile(r'class="subStrTitle">(.*?)</span>.*? class="player-server">(.*?)</div>.*?class="item3">.*?<img src=".*?" alt="" class="icon-dan"><em>(.*?)</em>.*? class="color-win">(.*?)</em>.*? class="color-defeat">(.*?)</em>.*? class="percentage">(.*?)</span>.*? class="color-zhongdan">(.*?)</span></div>.*?<img src=".*?" alt="(.*?)"/>.*?<img src=".*?" alt="(.*?)"/>.*?<img src="https:.*?" alt="(.*?)"/>',re.S)
data = re.findall(rem,html_cn.text) #正则匹配王者信息
data2 = pd.DataFrame(data) #数据表格化
data2.to_csv(r'C:\Users\主人\Desktop\My\LOL.csv',header=False,index=False,mode='a+') #存入信息
def upwodn_cn(zhanqu_list_cn):
for zhanqu in zhanqu_list_cn: #选区
for m in range(1,11): #翻页
url = zhanqu+'&page='+str(m)
im = requests.get(url,headers = head)
if im.status_code == 200:
data = re.findall(rem,im.text) #将rem信息以text形式存入data
data = pd.DataFrame(data)
data.to_csv(r'C:\Users\主人\Desktop\My\LOL.csv',header=False,index=False,mode='a+')
upwodn_cn(zhanqu_list_cn) #调用函数每战区的每一页信息存入
二.数据可视化和分析(未进行数据清理)
import re
import pandas
import matplotlib.pyplot as plt #数据可视化
import matplotlib as mpl #配置字体
mpl.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams["axes.labelsize"] = 16.
plt.rcParams["xtick.labelsize"] = 14.
plt.rcParams["ytick.labelsize"] = 14.
plt.rcParams["legend.fontsize"] = 12.
plt.rcParams["figure.figsize"] = [15.,6.]
data = pandas.read_csv(r'C:\Users\主人\Desktop\My\LOL.csv')#导入数据
1.数据分析
data_cn = data[0:120]
data_cn.head(7) #tail
data_cn.describle() #对数值类型的值分析 #箱型图
data.columns #数据变量/列 :第一行
2.数据可视化
①data_cn.ix[0:2,['王者ID','胜率']]
②data_cn['所在战区'].value_counts().plot(kind = 'bar',rot = 90,color = 'g') #所在战区数量的可视化
plt.show()
③data_cn['擅长位置'].value_counts().drop('--').plot(kind = 'bar',rot = 45,color = 'g') #王者擅长的位置
plt.show()
④pandas.cancat([data_cn['本命英雄1'],data_cn['本命英雄1'],data_cn['本命英雄1']]).value_counts().drop('--').plot(kind = 'bar',rot = 45,color = 'g')
plt.show()