
1.案例实现
import requests
from lxml import etree
import re
import asyncio
import aiohttp
import time
import random
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
#所有视屏是url
all_video_url = []
def request_html(url):
response = requests.get(url=url,headers=headers).content.decode('utf-8')
return response
def parse_html(response):
tree = etree.HTML(response)
# 第一个大视频名字
video_big_name = tree.xpath('//*[@id="vervideoTlist"]/div/div/div/a/div[2]/div[2]/text()')[0]
# 视频详情页url
video_big_detail_url = 'https://www.pearvideo.com/'+tree.xpath('//*[@id="vervideoTlist"]/div/div/div/a/@href')[0]
# print(video_bi