python比赛网站_python 爬虫获取世界杯比赛赛程

#!/usr/bin/python

# -*- coding:utf8 -*-

import requests

import re

import os

import time

# from urllib

import json

from bs4 import BeautifulSoup

from datetime import date

def getTimeExpire(time_play,time_gap):

# print(time_play)

try:

time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")

except:

print(‘时间转化失败‘)

return ‘‘

else:

t1=time.mktime(time_arr)

x = time.localtime(t1+time_gap)#是秒不是毫秒

return time.strftime(‘%Y-%m-%d %H:%M:%S‘,x)

def getHtml():

#改成从网站直接获取,但是网站需要分页

with open(‘F:\\test\\python\\worldcup.html‘, ‘r‘,encoding=‘utf-8‘) as f:

content = f.read()

soup = BeautifulSoup(content,‘lxml‘)

nodes=soup.select(‘.b-pull-refresh-content > div‘)

arr=[]

#写入CSV文件的头部

filename = "F:\\test\\python\\worldcup.csv"

f = open(filename,‘a‘)

f.writelines(‘team1,team2,time_expire,time_play \n‘)

f.close()

for node in nodes:

date = node.select(‘.wa-match-schedule-list-title‘)[0].get_text().strip()

datas = node.select(‘.sfc-contacts-list .wa-match-schedule-list-item‘)

for d in datas:

obj={‘team1‘:‘‘,‘team2‘:‘‘,‘time‘:‘‘}

obj[‘team1‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[0].get_text().strip()

obj[‘team2‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[1].get_text().strip()

obj[‘time_play‘]=‘2018-‘+date[2:8]+‘‘+d.select(‘.status-text‘)[0].get_text().strip()+‘:00‘

obj[‘time_expire‘]=getTimeExpire(obj[‘time_play‘],-10*60)

filename = "F:\\test\\python\\worldcup.csv"

f = open(filename,‘a‘)

f.writelines(obj[‘team1‘]+‘,‘+obj[‘team2‘]+‘,‘+obj[‘time_expire‘]+‘,‘+obj[‘time_play‘]+‘\n‘)

f.close()

#getHtml()

def getFromAPI():

month=6

day=11

#从2018-06-14 到 07-15

for d in range(0,15):

day+=2

if day>30:

month+=1

day=1

url="http://tiyu.baidu.com/api/match/%E4%B8%96%E7%95%8C%E6%9D%AF/live/date/2018-"+str(month)+‘-‘+str(day)+"/direction/after?from=self"

time.sleep(1)

data = json.loads(requests.get(url,timeout=3).text)

if(data[‘status‘]==‘0‘):

print(‘为0‘)

for matches in data[‘data‘]:

for m in matches[‘list‘]:

filename = "F:\\test\\python\\worldcupFromAPI.csv"

f = open(filename,‘a‘)

if m[‘startTime‘]>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):

f.writelines(m[‘leftLogo‘][‘name‘]+‘,‘+m[‘rightLogo‘][‘name‘]+‘,‘+getTimeExpire(m[‘startTime‘],-10*60)+‘,‘+m[‘startTime‘]+‘\n‘)

f.close()

getFromAPI()

原文地址:https://www.cnblogs.com/cao-zhen/p/9215222.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值