关于requests-html 第一次用

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_36607551/article/details/80516957
from requests_html import HTMLSession
import requests
import os
import time

img_num = 0
session = HTMLSession()

# 得到首页下所有li a 下的href链接
def li_lists_url(url):
request = session.get(url) # 这里返回的师requests对象
li_lists = request.html.find('div.Left_bar',first=True).find('li') #前台比较好的 好理解
for i in li_lists:
href = i.find('a',first=True).attrs['href']
# title = i.find('a',first=True).attrs['title']
get_girls(href)

# 得到详细的小图片
def get_girls(url):
request = session.get(url)
li_lists = request.html.find('div.scroll-img-cont',first=True).find('li')
for i in li_lists:
img_url = i.find('a img',first=True).attrs['data-original']
img_url =img_url[0:img_url.find('_')]+'.jpg'
# print(img_url)
try:
save_img(img_url)
except:
print('报错了')

# 下载图片
def save_img(img_url):
global img_num
img_num += 1
request = requests.get(img_url)
title = int(round(time.time()*1000))
with open(r'E:\python项目\python\requests_html\img\%d.jpg'%title,'wb') as file:
file.write(request.content)

def main(url):
li_lists_url(url)

if __name__ == '__main__':
url = 'http://www.win4000.com/zt/xinggan.html'
main(url)
print('爬取完毕,总爬取%d张小姐姐'%img_num)
阅读更多
想对作者说点什么?

博主推荐

换一批

没有更多推荐了,返回首页