python比赛网站_python 爬虫获取世界杯比赛赛程

最新推荐文章于 2022-11-22 17:33:07 发布

weixin_39616222

最新推荐文章于 2022-11-22 17:33:07 发布

阅读量420

点赞数

文章标签： python比赛网站

#!/usr/bin/python

# -*- coding:utf8 -*-

import requests

import re

import os

import time

# from urllib

import json

from bs4 import BeautifulSoup

from datetime import date

def getTimeExpire(time_play,time_gap):

# print(time_play)

try:

time_arr=time.strptime(time_play,"%Y-%m-%d %H:%M:%S")

except:

print(‘时间转化失败‘)

return ‘‘

else:

t1=time.mktime(time_arr)

x = time.localtime(t1+time_gap)#是秒不是毫秒

return time.strftime(‘%Y-%m-%d %H:%M:%S‘,x)

def getHtml():

#改成从网站直接获取，但是网站需要分页

with open(‘F:\\test\\python\\worldcup.html‘, ‘r‘,encoding=‘utf-8‘) as f:

content = f.read()

soup = BeautifulSoup(content,‘lxml‘)

nodes=soup.select(‘.b-pull-refresh-content > div‘)

arr=[]

#写入CSV文件的头部

filename = "F:\\test\\python\\worldcup.csv"

f = open(filename,‘a‘)

f.writelines(‘team1,team2,time_expire,time_play \n‘)

f.close()

for node in nodes:

date = node.select(‘.wa-match-schedule-list-title‘)[0].get_text().strip()

datas = node.select(‘.sfc-contacts-list .wa-match-schedule-list-item‘)

for d in datas:

obj={‘team1‘:‘‘,‘team2‘:‘‘,‘time‘:‘‘}

obj[‘team1‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[0].get_text().strip()

obj[‘team2‘]=d.select(‘.wa-tiyu-schedule-item-name.c-line-clamp1‘)[1].get_text().strip()

obj[‘time_play‘]=‘2018-‘+date[2:8]+‘‘+d.select(‘.status-text‘)[0].get_text().strip()+‘:00‘

obj[‘time_expire‘]=getTimeExpire(obj[‘time_play‘],-10*60)

filename = "F:\\test\\python\\worldcup.csv"

f = open(filename,‘a‘)

f.writelines(obj[‘team1‘]+‘,‘+obj[‘team2‘]+‘,‘+obj[‘time_expire‘]+‘,‘+obj[‘time_play‘]+‘\n‘)

f.close()

#getHtml()

def getFromAPI():

month=6

day=11

#从2018-06-14 到 07-15

for d in range(0,15):

day+=2

if day>30:

month+=1

day=1

url="http://tiyu.baidu.com/api/match/%E4%B8%96%E7%95%8C%E6%9D%AF/live/date/2018-"+str(month)+‘-‘+str(day)+"/direction/after?from=self"

time.sleep(1)

data = json.loads(requests.get(url,timeout=3).text)

if(data[‘status‘]==‘0‘):

print(‘为0‘)

for matches in data[‘data‘]:

for m in matches[‘list‘]:

filename = "F:\\test\\python\\worldcupFromAPI.csv"

f = open(filename,‘a‘)

if m[‘startTime‘]>time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()):

f.writelines(m[‘leftLogo‘][‘name‘]+‘,‘+m[‘rightLogo‘][‘name‘]+‘,‘+getTimeExpire(m[‘startTime‘],-10*60)+‘,‘+m[‘startTime‘]+‘\n‘)

f.close()

getFromAPI()

原文地址：https://www.cnblogs.com/cao-zhen/p/9215222.html

weixin_39616222

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。