环境:python2.7+BeautifulSoup的库,如果安装了pip,就直接pip install BeautifulSoup就ok。
直接上代码吧。
# -*- coding: utf-8 -*-
import urllib2
from BeautifulSoup import BeautifulSoup
f = open('Answer.txt','w')
for pagenum in range(1,21):
strpagenum = str(pagenum)
print "Getting data for Page " + strpagenum
url = "http://www.zhihu.com/collection/27109279?page="+strpagenum
page = urllib2.urlopen(url)
soup = BeautifulSoup(page)
ALL = soup.findAll(attrs = {'class' : ['zm-item-title','zh-summary summary clearfix'] })
for each in ALL :
if each.name == 'h2' :
print each.a.string
if each.a.string:
f.write(each.a.string.encode('utf-8'))
else :
f.write("No Answer")
else :
print each.string
if each.string:
f.write(each.string.encode('utf-8'))
else :
f.write("No Answer")
f.close()