# -*- coding: UTF-8 -*- from bs4 import BeautifulSoup from urllib import request if __name__=='__main__': file = open('一念永恒.txt', 'w', encoding='utf-8') yinianyonghen_url="http://www.biqukan.com/1_1094/" head={} head['User-Agent']="Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19" yinianyonghen_req=request.Request(url=yinianyonghen_url,headers=head) yinianyonghen_response=request.urlopen(yinianyonghen_req) yinianyonghen_html=yinianyonghen_response.read().decode('gbk','ignore') listmain_soup=BeautifulSoup(yinianyonghen_html,'lxml') chapters=listmain_soup.find_all('div',class_='listmain') download_soup=BeautifulSoup(str(chapters),'lxml') numbers = (len(download_soup.dl.contents) - 1) / 2 - 8 index = 1 begin_flag=False for child in download_soup.dl.children: if child != '\n': if child.string == "《一念永恒》正文卷": begin_flag = True if begin_flag == True and child.a != None: download_url = "http://www.biqukan.com" + child.a.get('href') download_name = child.string # print(download_name + " : " + download_url) download_req=request.Request(url=download_url,headers=head) download_response=request.urlopen(download_req) download_html=download_response.read().decode('gbk','ignore') download_name=child.string soup_texts=BeautifulSoup(download_html,'lxml') texts=soup_texts.find_all(id='content',class_='showtxt') soup_text=BeautifulSoup(str(texts),'lxml') write_flag=True file.write(download_name+'\n\n') for each in soup_text.div.text.replace('\xa0', ''): if each == 'h': write_flag = False if write_flag == True and each != ' ': file.write(each) if write_flag == True and each == '\r': file.write('\n') file.write('\n\n') # 打印爬取进度 print("已下载:%.3f%%" % float(index / numbers) + '\r') # sys.stdout.flush() index += 1 file.close()
用python爬取笔趣阁小说
最新推荐文章于 2024-05-01 21:57:08 发布