1 importurllib.request2 importos3
4 headers ={5 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163"
6 "Safari/535.1",7 }8
9
10 #主程序,获取每个章节的名称及链接
11 defmain():12 print('开始下载')13 url_list =[]14 url = 'http://www.eywedu.com/honglou/01/index.htm'
15 res = urllib.request.Request(url, data=None, headers=headers)16 html = urllib.request.urlopen(res).read().decode('gb18030')17 lists = html.split('')34 get_content(net+cut[0], cut[1])35 print('第', page, '章已下完')36 page = page+1
37
38
39 #根据每章链接获得内容
40 defget_content(url, name):41 paragraphs =[]42 res = urllib.request.Request(url, data=None, headers=headers)43 html = urllib.request.urlopen(res).read().decode('gb18030')44 lists = html.split('
')45 t =046 while t
59 save_txt(paragraphs, name)60
61
62 #将得到的信息保存
63 defsave_txt(content, name):64 f = open('红楼梦\\'+name+'.txt', "a", encoding='utf-8')65 for i incontent:66 f.write(i)67 f.close()68
69
70 if __name__ == '__main__':71 main()