Python爬取知乎上的图片
from urllib import request as rr
from bs4 import BeautifulSoup
import os
import re
url = "https://www.zhihu.com/question/281282523/answer/473126030"
def download(_url, file_name):
if (_url == None):
pass
result = rr.urlopen(_url)
if (result.getcode() != 200):
pass
else:
data = result.read()
with open(file_name, "wb") as f:
f.write(data)
f.close()
if __name__ == '__main__':
res = rr.urlopen(url)
content = res.read()
cnt = 0
soup = BeautifulSoup(content)
link_list = []
for link in soup.find_all('img'):
addr = link.get('data-original')
link_list.append(addr)
link_set = set(link_list)
for addr in link_set:
if (addr != None):
pathName = '/Users/jiangnan/Desktop/zhihu/images/' + str(cnt + 1) + '.jpg'
cnt = cnt + 1
print("Doenloading the " + str(cnt) + "th picture")
download(addr, pathName)