简介
Python上有一个非常著名的HTTP库——requests,相比大家都听说过,用过的人都说好!现在requests库的作者又发布了一个新库,叫做requests-html,看名字也能猜出来,这是一个解析HTML的库,而且用起来和requests一样爽,下面就来介绍一下它
安装
pip3 install requests_html
使用
代理使用
from requests_html import HTMLSession
session = HTMLSession()
#验证代理的使用
proxie = {
"http":"http://{}:密码@主机:端口".format(用户名)
}
url = “http://httpbin.org/ip”
response = session.get(url,proxies=proxie)
获取js渲染数据
response = session.get(url,proxies=proxie)
response.html.render() #render() 可以获取js渲染数据 代码将会自动下载Chromium(网络巨坑下载速度感人)
[官网说明](https://cncert.github.io/requests-html-doc-cn/#/?id=%E6%94%AF%E6%8C%81javascript)
js渲染浏览器设置代理(使用response.html.render()方法只能通过下面方式设置代理)
from requests_html import HTMLSession
import asyncio
import pyppeteer
import random
class HTMLSession2(HTMLSession):
@property
def browser(self):
if not hasattr(self, "_browser"):
port = random.randint(20000,20999)
ip = "www.ttt.cm:{}".format(port)
self.loop = asyncio.get_event_loop()
self._browser = self.loop.run_until_complete(pyppeteer.launch(headless=True, args=['--no-sandbox', '--proxy-server={}'.format(ip)]))
return self._browser
session = HTMLSession2()
session.browser #开线程一定要的参数
url = "https://httpbin.org/ip"
response = session.get(url=url)
response.html.render()
print(response.html.html) #ip已经改变了