今天学习了python爬虫,刚好老婆想下载一些菜谱图片,就改了改网上的代码,试着抓了一下,把菜谱的步骤打印出来,图片保存到桌面上。
# -*- conding:utf-8 -*-
import re
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html, x):
reg = r'</em>(.*?)</p>.*?src="(http://images.*?\.jpg)"'
imgre = re.compile(reg)
items = re.findall(imgre, html)
for item in items:
print item[0]
urllib.urlretrieve(item[1],'C:\Users\Administrator\Desktop\Recipe\%s.jpg' % x)
x = x + 1
x = 1
url = "http://www.meishij.net/zuofa/songhuadanchang.html"
html = getHtml(url)
x = getImg(html, x)