# coding:utf-8
# 电视猫网址:https://www.tvmao.com/program
import requests
from lxml import etree
from selenium import webdriver
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/69.0.3497.100 Safari/537.36',
'Referer': 'https://www.tvmao.com/'}
url_list = ['https://www.tvmao.com/program/duration/cctv/w{}.html',
'https://www.tvmao.com/program/duration/satellite/w{}.html']
# 央视和卫视
for url_program in url_list:
# 星期一到星期日
for i in range(1, 8):
url = url_program.format(i)
html = requests.get(url, headers=headers).text
html = etree.HTML(html)
td = html.xpath('//td[@class="tdchn"]')
# 电视台
for j in td:
href = 'https://www.tvmao.com'+j.xpath('./a/@href')[0]
name_tv
python爬虫获取js动态资源
最新推荐文章于 2024-06-25 13:36:02 发布