今天用Python抓取网页上的图片,发现抓取下来的图片全打不开。代码如下
#!/usr/bin/env python
from pyquery import PyQuery as pq
import urllib
import os
from os.path import split, splitext
from sys import argv
def get_images(url):
images = []
d = pq(url=url)
p = d('img')
for img in p:
images.append(dict(img.items()))
return images
class WebImage(object):
def __init__(self, url=None):
self.url = url
def rseturl(self, url):
self.url = url
def retrieve(self, dir=None, name=None):
if self.url is None:
return
if dir is None:
dir = os.getcwd()
if name is None:
name = split(self.url)[1]
else:
name = name + splitext(self.url)[1]
self.file = os.path.join(dir, name)
print 'download', self.url, 'to ', self.file
urllib.urlretrieve(self.url, self.file)
def filename(self):
return self.file
def main():
if len(argv) > 1:
url = argv[1]
else:
try:
url = raw_input('Enter URL:')
except (KeyboardInterrupt, EOFError):
url = ''
if not url:
return
images = get_images(url)
i = WebImage()
for img in images:
i.rseturl(img['src'])
i.retrieve()
if __name__ == '__main__':
main()
请问是什么问题?