学了两天爬虫,在各种参考资料的帮助下小试牛刀,成功爬取了placekitten的猫咪图,开心。
贴上代码,有需要的自取,第一次自己写爬虫代码,虽然有点low,继续努力啊。
import urllib.request as ur
import re
import os
count = 0
def open_url(url):
req = ur.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299')
response = ur.urlopen(req)
html = response.read()
return html
def get_pix(url):
html = open_url(url).decode('utf-8')
match = re.findall(r'/?\d?\d\d/\d\d\d', html)
return match
def get_img(url, folder='cat'):
global count
os.mkdir(folder)
os.chdir(folder)
match = get_pix(url)
for each in match:
req = ur.Request(url+ each)
response = ur.urlopen(req)
count += 1
cat_img = response.read()
with open('cat_img' + str(count) + '.jpg', 'wb') as f:
f.write(cat_img)
print('cat_img' + str(count) + '.jpg下载完成')
def main():
get_img('http://placekitten.com/')
if __name__ == '__main__':
main()
2652

被折叠的 条评论
为什么被折叠?



