[Python]简单的爬取图片

最新推荐文章于 2024-04-20 15:12:20 发布

csdn产品小助手

最新推荐文章于 2024-04-20 15:12:20 发布

阅读量92

点赞数

文章标签： python 操作系统 xhtml

原文链接：http://www.cnblogs.com/Start12/p/9342092.html

版权

 1 import os
 2 import io
 3 import sys
 4 import re
 5 import urllib.request
 6 
 7 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb2312')
 8 
 9 """
10 headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
11            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
12            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
13            'Accept-Encoding': 'none',
14            'Accept-Language': 'en-US,en;q=0.8',
15            'Connection': 'keep-alive'}
16 """
17 
18 headers = {
19     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
20     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
21     'Accept-Encoding': 'gzip, deflate',
22     'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4,en-GB;q=0.2'
23 }
24 """
25 def get_image(url):
26     request = urllib.request.Request(url, headers=headers)
27     # params = urllib.urlencode(post_params)
28     responseurl = urllib.request.urlopen(request)
29     get_img = responseurl.read()
30     with open('001.jpg', 'wb') as fp:
31         fp.write(get_img)
32         print('图片下载完成')
33     return
34 
35 url = 'http://image.tianjimedia.com/uploadImages/2016/009/27/FW632S21L801.jpg'
36 get_image(url)
37 
38 """
39 # headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
40 
41 
42 def download_page(url):
43     # request = urllib.request.Request(url)
44     request = urllib.request.Request(url, headers=headers)
45     responseurl = urllib.request.urlopen(url)
46     data = responseurl.read()
47     # data = data.decode('gbk')
48     return data
49 
50 
51 def get_image(html):
52     regx = r'http://[\S]*\.jpg'
53     pattern = re.compile(regx)
54     get_img = re.findall(pattern, repr(html))
55     num = 1
56     for img in get_img:
57         image = download_page(img)
58         with open('%s.jpg' % num, 'wb') as fp:
59             fp.write(image)
60             num += 1
61             # fp.close()
62             print(u'正在下载第%s张图片' % num)
63     return
64 
65 url = 'http://pic.yesky.com/180/99839180_2.shtml'
66 html = download_page(url)
67 get_image(html)

转载于:https://www.cnblogs.com/Start12/p/9342092.html

csdn产品小助手

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
[Python]简单的爬取图片

1 import os 2 import io 3 import sys 4 import re 5 import urllib.request 6 7 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb2312') 8 9 """10 headers = {'User-Agent...
复制链接

扫一扫