python 爬虫

最新推荐文章于 2024-08-03 14:18:54 发布

记忆的残缺

最新推荐文章于 2024-08-03 14:18:54 发布

阅读量318

点赞数

分类专栏： python 文章标签： python

本文链接：https://blog.csdn.net/u014017121/article/details/48468837

版权

python 专栏收录该内容

0 篇文章 0 订阅

订阅专栏

import re
import urllib2
import os

# 网址链接地址放在 lists 列表里面
start_urls=[
"url地址"

]
# 下载图片保存指定路劲
def getImg(url):
urldata=urllib2.urlopen(url)
data=urldata.read()
urldata.close()
imglist=re.findall('<img src="/resources/.*?"',data)
fileall="C:\Python27\imgdemo\\" + url.split("/")[4]
x=0
for imgurl in imglist:
print('==========222==========')
imgall=re.findall('src="(.*?)"',imgurl)

print(imgall[0])
print('<br/>')

filename=imgall[0].split("/")[-1]
path=fileall.strip()
isExist=os.path.exists(path)
if not isExist:
os.makedirs(path)

try:
img='URL地址'+ imgall[0]

socket=urllib2.urlopen(img)
data1=socket.read()
socket.close()
with open(fileall + '/' + filename,'wb') as code:
code.write(data1)

except IOError:
print 'download error:' + img

x = x + 1
#遍历 URL网址
for url in start_urls:
print(url)
getImg(url)