http://www.crummy.com/software/BeautifulSoup/
from html.parser import HTMLParser
from bs4 import BeautifulSoup as bs
import urllib.request
import re
def parsechapter(url, out):
data = urllib.request.urlopen(url)
dataDecode = data.read().decode('utf-8')
data.close()
soup = bs(dataDecode)
for content in soup.findAll(id="chapterContent"):
for nc in content.findAll("p"):
print(nc.span.previousSibling, file=out)
try
a_file = open("test.txt", mode="a", encoding="utf-8")
showchapter_url = 'http://book.zongheng.com/showchapter/189169.html'
chapterData = urllib.request.urlopen(showchapter_url)
chapterDataDecode = chapterData.read().decode('utf-8')
chapterData.close()
chapterDataSoup = bs(chapterDataDecode)
for chapters in chapterDataSoup.findAll("div", attrs={'class':"booklist"}):
for chapter in chapters.findAll("a"):
print(chapter.get_text(), file=a_file)
parsechapter(chapter['href'], a_file)
except IOError:
print('file error!')
finally:
if 'a_file' in locals():
a_file.close()
分享到:
2012-11-07 14:29
浏览 1470
评论