#requests etree lxml xpath
某人的课上作业,就成了我的作业;
备注:本文只记录方法,重要信息已xxx;
import requests
from lxml import etree
def get_sdyw():
try:
headers = {
"User-Agent": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
res = requests.get(url, headers=headers)
#print(res.encoding) #查看编码
#print(res_html) #打印网页
html = etree.HTML(res.content) #解析文本
content_list = html.xpath('//div[@id="l-container"]//div[@id="wp_news_w6"]//span[@class="news_title"]//text()')
#print(content_list)
content_str = '\n'.join(content_list) #使用join可以拼接
print(content_str)
except:
print('error')
def get_page():
urls = ["https://www.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/list{}.htm".format(str(i)) for i in range(1, 11)]
return urls
if __name__ == '__main__':
get_page()
for url in get_page():
get_sdyw()
效果: