python获取网页图片

最新推荐文章于 2023-06-15 00:00:00 发布

子杣

最新推荐文章于 2023-06-15 00:00:00 发布

阅读量145

点赞数

本文链接：https://blog.csdn.net/qq_41749069/article/details/120079891

版权

Python 爬虫正则表达式图片下载编码解码

关键词由CSDN通过智能技术生成

import urllib.request
import re
from urllib.request import quote, unquote
#import chardet

header = {

'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'

}
url = "http://www.4399.com/" #http://findicons.com/pack/2787/beautiful_flat_icons
# utf8编码，指定安全字符。
url = quote(url, safe=";/?:@&=+$,", encoding="GB2312")
#quote safe内填写的是不需要被转义的字符 encoding是当前编码格式
webPage=urllib.request.urlopen(url)
#print (webPage)
data = webPage.read()

data = data.decode('UTF-8',"ignore") #decode是解码格式

k = re.split(r'\s+',data)
print (k)
s = []
sp = []
si = []
for i in k :
if (re.match(r'src',i) or re.match(r'href',i) or re.match(r'lz_src',i)):
if (re.match(r'.*?png"',i) or re.match(r'.*?ico"',i) or re.match(r'.*?jpg"',i) ):
print(i)
if (re.match(r'src',i) or re.match(r'lz_src',i)): #修改了这里用于获取4399特殊的图片格式
s.append(i)

for it in s :
if (re.match(r'.*?png"',it)):
sp.append(it)

for it in s :
if (re.match(r'.*?jpg"',it)):
sp.append(it)

cnt = 0
cou = 1
for it in sp:
m = re.search(r'src="(.*?)"',it)
iturl = m.group(1)
print(iturl)
if (iturl[0]=='/'):
continue;
web = urllib.request.urlopen(iturl)
itdata = web.read()

f = open('d:/image/'+str(cou)+'.png',"wb")
cou = cou+1
f.write(itdata)
f.close()
print(it)
cnt = cnt+1

#f = open('1.txt',mode='w',encoding='UTF-8') 把网页内容写入，转换数据类型
#f.write(data)
#f.close()

子杣

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
python获取网页图片

import urllib.requestimport refrom urllib.request import quote, unquote#import chardetheader = {'User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}url = "http://www.4
复制链接

扫一扫