python抓取小说_python抓取小说

http://www.crummy.com/software/BeautifulSoup/

from html.parser import HTMLParser

from bs4 import BeautifulSoup as bs

import urllib.request

import re

def parsechapter(url, out):

data = urllib.request.urlopen(url)

dataDecode = data.read().decode('utf-8')

data.close()

soup = bs(dataDecode)

for content in soup.findAll(id="chapterContent"):

for nc in content.findAll("p"):

print(nc.span.previousSibling, file=out)

try

a_file = open("test.txt", mode="a", encoding="utf-8")

showchapter_url = 'http://book.zongheng.com/showchapter/189169.html'

chapterData = urllib.request.urlopen(showchapter_url)

chapterDataDecode = chapterData.read().decode('utf-8')

chapterData.close()

chapterDataSoup = bs(chapterDataDecode)

for chapters in chapterDataSoup.findAll("div", attrs={'class':"booklist"}):

for chapter in chapters.findAll("a"):

print(chapter.get_text(), file=a_file)

parsechapter(chapter['href'], a_file)

except IOError:

print('file error!')

finally:

if 'a_file' in locals():

a_file.close()

分享到:

18e900b8666ce6f233d25ec02f95ee59.png

72dd548719f0ace4d5f9bca64e1d7715.png

2012-11-07 14:29

浏览 1470

评论

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值