中国队又没能在世界杯中出线,对此我准备从球员层面探求一下原因,意图通过数据分析拯救中国足球.
一、数据抓取
使用python暴力抓取懂球帝上的球员数据,如下图:
主要包括球员基本信息,以及各项能力得分。
import requests
import time
import pymysql
from bs4 import BeautifulSoup
user_agent = 'Your user_agent'
headers = {'User-Agent':user_agent}
coon = pymysql.connect(
host='localhost',
user='root',
password='123456',
db='tset',
port=3306
)
cur = coon.cursor()
sql = """
insert into player_test (cn_name,en_name,player_img,club,pos,num,country,age,birthday,height,weight,foot,score,speed,power,defense,dribble,pass_ball,shoot)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
url = 'https://www.dongqiudi.com/player/50025255.html'
html = requests.get(url=url, headers=headers)
soup = BeautifulSoup(html.text, 'lxml')
img_src = soup.find_all('img')[1].attrs['src']
if img_src == 'https://static1.dongqiudi.com/web-new/web/images/icon_error.png':
pass
else:
# 个人信息
cn_name = soup.h1.text # 姓名
en_name = soup.find('span',class_='en_name').text # 英文名
player_img = soup.find('img',class_='player_img').attrs['src&