足球比赛分析中,我们有时候发现必发交易所的数据非常具有参考价值。比如总成交量的大小规模、买家、卖家对比赛的倾向、挂牌量的多少,成交的多少等,从中我们可以看出一场比赛的冷热程度。有时候我们观察到一个关键点或者必杀技,那么就需要大量的数据来验证。这个时候,批量的数据就必不可少了。
话不多说,我们直接看代码。
import requests
import random
import time
import datetime
import csv
import bs4
import sys
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import pandas as pd
from datetime import timedelta
pd.set_option('display.max_columns',None)
pd.set_option('display.width',1000)
pd.set_option('display.unicode.ambiguous_as_wide',True)
pd.set_option('display.unicode.east_asian_width',True)
import json
def spider_xijia_IDlist():
url = "https://www.okooo.com/livecenter/danchang/"
USER_AGENT_LIST = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
]
headers = {'User-Agent': random.choice(USER_AGENT_LIST),
'Cookie':'xxx',#此处xxx请替换成自己电脑上的数据。不会可留言或私信。
'Accept-Encoding':'gzip, deflate',
'Accept-Language':'zh-CN'
}
r = requests.get(url,headers=headers)
r.encoding = r.apparent_encoding
html = r.text
with open ('linshi2.html','wb') as f:
f.write(html.encode())
f.close()
soup = BeautifulSoup(open('linshi2.html',encoding='utf-8'), 'html.parser')
Id= soup.find_all('tr',state='Not')
basic_information_list=[]
matchid_list=[]
for a1 in Id[:5]:
basic_information=[]
ls=a1.find('td',class_='ls').text
a3=a1.find_all('td',class_='graytx')
date=a3[1].text
beidan=a3[0].text
homename=a1.find('a',class_='ctrl_homename jsJumpTo').text
awayname=a1.find('a',class_='ctrl_awayname jsJumpTo').text
#homescore=a1.find('b',class_='font_red ctrl_homescore').text
#awayscore=a1.find('b',class_='font_red ctrl_awayscore').text
matchid=a1.find('td',class_='show_score')['val']
matchid_list.append(matchid)
basic_information.append(beidan)
basic_information.append(ls)
basic_information.append(date)
basic_information.append(homename)
#basic_information.append(homescore)
#basic_information.append(awayscore)
basic_information.append(awayname)
basic_information.append(matchid)
print(basic_information)
basic_information_list.append(basic_information)
return (basic_information_list,matchid_list)
spider_xijia_IDlist()
运行后效果如下:
关于Python网络爬虫技术以及足球比赛数据分析与预测的探讨交流,欢迎大家关注、收藏以及留言和私信。或者可以关注个人vx公号”今日足篮“。