import sys
import re
import urllib.request
import os
import socket
page_url = "http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B7%E7%BE%B0%CD%BC%C6%AC&fr=ala&ala=1&alatpl=others&pos=0"
def getHtml(url):
req = urllib.request.Request(url)
response = urllib.request.urlopen(req)
the_page = response.read()
return the_page.decode("utf-8")
def getImag(html):
reg = r'(http:[^s]*?(jpg|png|gif))'
imagelist = re.findall(reg, str(getHtml(page_url)))
return imagelist
print(getImag(page_url))
import sys
import re
import urllib.request
import os
import socket
targeDir = "d:\\PythonLoadPlace"
page_url = "http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B7%E7%BE%B0%CD%BC%C6%AC&fr=ala&ala=1&alatpl=others&pos=0"
def destFile(path):
if not os.path.isdir(path):
os.mkdir(path)
pos = path.rindex('/')
t = os.path.join(path, path[pos+1:])
return t
def getHtml(url):
req = urllib.request.Request(url)
response = urllib.request.urlopen(req)
the_page = response.read()
return the_page.decode("utf-8")
def getImag(html):
reg = r'(http:[^s]*?(jpg|png|gif))|(<a.*?href=.*?>)'
imagelist = re.findall(reg, str(getHtml(html)))
return imagelist
for link in getImag(page_url):
print(link)
try:
urllib.request.urlretrieve(link[0], destFile(targeDir))
except:
print('失败')