今日 好热,照样是挖洞挖不到,看了几天的python爬虫,学会了xpath解析
撸一个代码玩玩】
不要说什么,优化之类的,刚学完,跑了一阵 ,还可以 挺稳定
# -*- coding:utf-8 -*-
#Xm17
import os
import urllib
import requests
from lxml import etree
import random
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
}
url = "http://www.ye1001.com/p06/list_{}.html"
base_url = "http://www.ye1001.com/"
def auto_down(url, filename):
try:
urllib.urlretrieve(url,filename)
except urllib.ContentTooShortError:
print 'Network conditions is not good.Reloading.'
auto_down(url,filename)
for i in range(1,40):
response = requests.get(url.format(i),headers=headers)
html = etree.HTML(response.text)
page = html.xpath("//div[@class='content bord mtop']//a/@href")
for x in page:
page_url = base_url + x
if page_url.endswith("html"):
title = str(page_url[-11:-5])
responses = requests.get(page_url,headers=headers)
htmls = etree.HTML(responses.text)
pages = htmls.xpath("//div[@class='mtop']//img/@src")
os.mkdir(title)
for i in pages:
print i
ddd = random.randint(1, 100)
auto_down(i,title+"/%s"%title+"_"+str(ddd)+".jpg" )
今日就到这里 ,洗澡去了