Python递归爬取今日头条指定用户一个月内发表的所有文章,视频

res = requests.get(first_url, headers=headers_a, cookies=cookies)

data = json.loads(res.text)

print(data)

max_behot_time = data[‘next’][‘max_behot_time’]

if max_behot_time:

video_list = data[‘data’]

for i in video_list:

try:

start_time = i[‘behot_time’]

video_title = i[‘title’]

video_source = i[‘source’]

detail_url = ‘https://www.ixigua.com/i’ + i[‘item_id’]

resp = requests.get(detail_url, headers=headers())

r = str(random.random())[2:]

url_part = “/video/urls/v/1/toutiao/mp4/{}?r={}”.format(

re.findall(‘“video_id”:“(.*?)”’, resp.text)[0], r)

s = crc32(url_part.encode())

api_url = “https://ib.365yg.com{}&s={}”.format(url_part, s)

resp = requests.get(api_url, headers=headers())

j_resp = resp.json()

video_url = j_resp[‘data’][‘video_list’][‘video_1’][‘main_url’]

video_url = b64decode(video_url.encode()).decode()

print((int(str(time.time()).split(‘.’)[0])-start_time)/86400)

if 30 < (int(str(time.time()).split(‘.’)[0]) - start_time) / 86400 <= 32:

print(‘完成’)

break_flag_video.append(1)

continue

if (int(str(time.time()).split(‘.’)[0]) - start_time) / 86400 > 32:

print(‘完成’)

break_flag_video.append(1)

break

row = {‘视频发表时间’: time.strftime(‘%Y-%m-%d %H:%M:%S’, time.localtime(start_time)),

‘标题’: video_title, ‘来源’: video_source,

‘视频链接’: video_url}

with open(‘/toutiao/’ + str(csv_name) + ‘视频.csv’, ‘a’, newline=‘’, encoding=‘gb18030’)as f:

f_csv = csv.DictWriter(f, headers2)

f_csv.writeheader()

f_csv.writerow(row)

print(‘正在爬取视频:’, video_title, detail_url, video_url)

time.sleep(3)

except Exception as e:

print(e, ‘https://www.ixigua.com/i’ + i[‘item_id’])

shipin(url=url, max_behot_time=max_behot_time, csv_name=csv_name, n=n)

except KeyError:

n += 1

print(‘第’ + str(n) + ‘次请求’, first_url)

time.sleep(3)

if n == max_qingqiu:

print(‘请求超过最大次数’)

break_flag_video.append(1)

except Exception as e:

print(e)

  • 15
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值