目的:
瞧瞧双色球里的各种数据.
用阿里云的pai来分析分析双色球相关的东西.
获取数据
中奖公告:
http://www.cwl.gov.cn/kjxx/ssq/
环境搭建
安装 python3
安装 pip
安装第三方模块
pip install BeautifulSoup
pip install requests
算了,不废话,直接上代码吧.
import requests
from bs4 import BeautifulSoup
import json
import time
def url_find(url):
r = requests.get(url)
r.encoding='utf-8'
soup = BeautifulSoup(r.text, 'html.parser')
x = soup.find_all('a')
xx = []
for i in x:
#print(i)
if str(i).find("期开奖公告")>0:
m = "http://www.cwl.gov.cn"+i.get('href')
#print(m)
xx.append(m)
return xx
def cat_text(url):
m={}
r = requests.get(url)
r.encoding='utf-8'
soup = BeautifulSoup(r.text, 'html.parser')
y=[]
for x in soup.find_all('td'):
y.append(x.get_text())
x_id=str(soup.h2.get_text())[10:17]
for i in soup.find_all("script"):
if str(i).find("var khHq") > 0:
qiu_h=json.loads(str(i)[24:55])
for i in soup.find_all("span"):
if i.get("class") == ["qiuL"]:
qiu_l=i.get_text()
for i in soup.find_all("div"):
if i.get("class") == ["zjqkzy"]:
address=i.find("dd").get_text()
if y[3]=='- 元':
mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[9] +","+ y[10].split("(")[0] + ","+ y[12] +",TEst"+ y[13].split("(含")[0]+ ","+ y[15].split("(")[0] +","+ y[16] + ","+ y[18] +","+ y[19] + ","+ y[21] +","+ y[22] + ","+ y[24] +","+ y[25] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","")
elif y[11]=='其中:一等奖复式投注':
mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[9] +","+ y[10].split("(")[0] + ","+ y[12] +","+ y[13].split("(含")[0]+ ","+ y[15].split("(")[0] +","+ y[16] + ","+ y[18] +","+ y[19] + ","+ y[21] +","+ y[22] + ","+ y[24] +","+ y[25] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","")
else:
mm = x_id + "," + y[0] + "," + y[1].rstrip(" 元").replace(",","") + ","+ y[2].rstrip(" 元").replace(",","") + ","+ y[7] +","+ y[8].split("(")[0] + ",,,"+ y[10] +","+ y[11].split("(含")[0]+ ","+ y[13].split("(")[0] +","+ y[14] + ","+ y[16] +","+ y[17] + ","+ y[19] +","+ y[20] + ","+ y[22] +","+ y[23] +","+str(qiu_h).replace("[","").replace("]","").replace(" ","").replace("'","")+","+qiu_l+","+address.replace(",","--").replace("。","").replace("共","").replace("注","")
return mm
url = 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/list.shtml'
url_list = []
url_list = url_list + url_find(url)
for i in range(29):
url = 'http://www.cwl.gov.cn/kjxx/ssq/kjgg/list_' + str(i+2) + '.shtml'
url_list = url_list+url_find(url)
#print(url_list)
def save_file(somea):
with open('./data', 'a') as f:
f.write(somea )
for i in url_list:
data=str(cat_text(i))+"\n"
save_file(data)