from requests_html
import HTMLSession
import requests
import os
import time
img_num =
0
session = HTMLSession()
# 得到首页下所有li a 下的href链接
def li_lists_url(url):
request = session.get(url)
# 这里返回的师requests对象
li_lists = request.html.find(
'div.Left_bar',first=
True).find(
'li') #前台比较好的 好理解
for i
in li_lists:
href = i.find(
'a',first=
True).attrs[
'href']
# title = i.find('a',first=True).attrs['title']
get_girls(href)
# 得到详细的小图片
def get_girls(url):
request = session.get(url)
li_lists = request.html.find(
'div.scroll-img-cont',first=
True).find(
'li')
for i
in li_lists:
img_url = i.find(
'a img',first=
True).attrs[
'data-original']
img_url =img_url[
0:img_url.find(
'_')]+
'.jpg'
# print(img_url)
try:
save_img(img_url)
except:
print(
'报错了')
# 下载图片
def save_img(img_url):
global img_num
img_num +=
1
request = requests.get(img_url)
title = int(round(time.time()*
1000))
with open(
r
'E:\python项目\python\requests_html\img\%d.jpg'%title,
'wb')
as file:
file.write(request.content)
def main(url):
li_lists_url(url)
if __name__ ==
'__main__':
url =
'http://www.win4000.com/zt/xinggan.html'
main(url)
print(
'爬取完毕,总爬取%d张小姐姐'%img_num)