- 以xpath方式爬腾讯视频里电影的 链接, 海报链接, 演员,简介等
- 如 href=" " 这种类型数据, 可以@href来提取
![在这里插入图片描述](https://img-blog.csdnimg.cn/20201027095444177.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQ2NjU5OTEy,size_16,color_FFFFFF,t_70#pic_center)
![在这里插入图片描述](https://img-blog.csdnimg.cn/20201027095500358.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQ2NjU5OTEy,size_16,color_FFFFFF,t_70#pic_center)
- 代码如下
import requests
from fake_useragent import UserAgent
from lxml import etree
import numpy as np
from pandas import DataFrame
class Tencent(object):
def __init__(self):
self.headers = {
'User-Agent': UserAgent().firefox}
self.onePage_url = 'https://v.qq.com/x/bu/pagesheet/list?append=1&channel=movie&itype=100062&listpage=2' \
'&offset={}&pagesize=30 ' # offset={
} 是因为翻页是offset是变化的,值会在后面引入
def get_page(self, url):
req = requests.get(url=url, headers=self.headers).content.decode('utf-8') # .content 存的是字节码,
return req
def parse_page(self, html):
a = 'http:{}'</