#!/usr/bin/python
# -*- coding:utf8 -*-
import requests
import re
import os
import time
# from urllib
import json
from bs4 import BeautifulSoup
from datetime import date
def getTimeExpire(time_play,time_gap):
# print(time_play)
try:
time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")
except:
print(‘时间转化失败‘)
return ‘‘
else:
t1=time.mktime(time_arr)
x = time.localtime(t1+time_gap)#是秒不是毫秒
return time.strftime(‘%Y-%m-%d %H:%M:%S‘,x)
def getHtml():
#改成从网站直接获取,但是网站需要分页
with open(‘F:\\test\\python\\worldcup.html‘, ‘r‘,encoding=‘utf-8‘) as f:
content = f.read()
soup = BeautifulSoup(content,‘lxml‘)
nodes=soup.select(‘.b-pull-refresh-content > div‘)
arr=[]
#写入CSV文件的头部
filename = "F:\\test\\python\\worldcup.csv"
f = open(filename,‘a‘)
f.writelines(‘team1,team2,time_expire,time_play \n‘)
f.close()
for node in nodes:
date = node.select(‘.wa-match-schedule-list-title‘)[0].get_text().strip()
datas = node.select(‘.sfc-contacts-list .wa-match-schedule-list-item‘)
for d in datas:
obj={‘team1‘:‘‘,‘team2‘:‘‘,‘time‘:‘‘}
obj[‘team1‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[0].get_text().strip()
obj[‘team2‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[1].get_text().strip()
obj[‘time_play‘]=‘2018-‘+date[2:8]+‘‘+d.select(‘.status-text‘)[0].get_text().strip()+‘:00‘
obj[‘time_expire‘]=getTimeExpire(obj[‘time_play‘],-10*60)
filename = "F:\\test\\python\\worldcup.csv"
f = open(filename,‘a‘)
f.writelines(obj[‘team1‘]+‘,‘+obj[‘team2‘]+‘,‘+obj[‘time_expire‘]+‘,‘+obj[‘time_play‘]+‘\n‘)
f.close()
#getHtml()
def getFromAPI():
month=6
day=11
#从2018-06-14 到 07-15
for d in range(0,15):
day+=2
if day>30:
month+=1
day=1
url="http://tiyu.baidu.com/api/match/%E4%B8%96%E7%95%8C%E6%9D%AF/live/date/2018-"+str(month)+‘-‘+str(day)+"/direction/after?from=self"
time.sleep(1)
data = json.loads(requests.get(url,timeout=3).text)
if(data[‘status‘]==‘0‘):
print(‘为0‘)
for matches in data[‘data‘]:
for m in matches[‘list‘]:
filename = "F:\\test\\python\\worldcupFromAPI.csv"
f = open(filename,‘a‘)
if m[‘startTime‘]>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):
f.writelines(m[‘leftLogo‘][‘name‘]+‘,‘+m[‘rightLogo‘][‘name‘]+‘,‘+getTimeExpire(m[‘startTime‘],-10*60)+‘,‘+m[‘startTime‘]+‘\n‘)
f.close()
getFromAPI()
原文地址:https://www.cnblogs.com/cao-zhen/p/9215222.html