首先需要安装 Requests-HTML,使用
pip3 install Requests-HTML
或:
pip3 install requests-html
注意:Linux 系统注意切换最高权限模式。
下面上代码:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from requests_html import HTMLSession
def parse_data(url):
all_parse = HTMLSession().post(url).html.find(".article-item-box")
if not len(all_parse):
raise Exception("End Load!")
return [{"title": item.text.split("\n")[0][2:], "link": item.links.pop()} for item in all_parse]
def get_all_data(url):
suffix = "/article/list/{}"
all_data = list()
try:
index = 1
while True:
this_data = parse_data(url + suffix.format(index))
print(index)
all_data.extend(this_data)
index += 1
except Exception as e:
print(e)
return