#coding:utf-8 from requests import get import re from bs4 import BeautifulSoup from time import sleep,ctime from threading import Thread import sys reload(sys) sys.setdefaultencoding('utf-8') print sys.getdefaultencoding() headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'} def Usage(): print "\n[Usage]python 笔趣阁.py" def List_Fiction(name): Search_Url = "http://www.biquge5200.com/modules/article/search.php?searchkey="+name print Search_Url result = get(Search_Url.decode('gbk')) return result.text def Make_Form(html): Fiction_urls = [] Fiction_names = [] num = 0 result = re.findall(r'<td class="odd"><a href="(.*?)">(.*?)</a></td>',html) if result[0] == "": Usage() exit() for i in result: Fiction_urls.append(i[0]) Fiction_names.append(i[1]) A = re.findall(r'<td class="odd">(.*?)</td>',html) Authors = [] for i in A: if "<a" not in i: Authors.append(i) for i in Fiction_urls: print "["+str(num)+"]"+i+"\t[name]"+Fiction_names[num]+"\t[Author]"+Authors[num] num += 1 number = raw_input("选择想下载的小说(填写编号):") content = get(Fiction_urls[int(number)]).text Chapter = re.findall(r'<dd><a href="(.*?)">(.*?)</a></dd>',content) return Chapter def Write_into_TXT(cont): f = open(Fiction_Name+'.txt','ab+') pattern = '<(.*?)>' CH_cont = re.sub(pattern,'\n',cont) f.write(CH_cont) f.close() def Download_all(Chapter_urls): Number = 0 for i in Chapter_urls: Number += 1 try: Code = get(i,headers=headers).content #Status_code = str(get(i,headers=headers).status_code) #Content = re.search(r'<div id="content">(.*)',Code)#这里的正则不理解 soup = BeautifulSoup(Code,"html.parser",from_encoding="utf8") Content = soup.find('div',id="content") H1 = re.search(r'<h1>(.*)</h1>',Code).group(1) cont = H1+"\n"+Content.encode('gbk') if cont == "": Download_Part(Chapter_urls,Number-1,Number) continue Write_into_TXT(cont) sleep(0.3) print "[Successful]%s" %H1 except: print "[%s]Error Happenning!" %ctime() sleep(2) Download_Part(Chapter_urls,Number-1,Number) # Code = get(Chapter_urls[Number-1],headers=headers).content # H1 = re.search(r'<h1>(.*)</h1>',Code).group(1) # print "[Download_Again]%s" %H1 # soup = BeautifulSoup(Code,"html.parser",from_encoding="utf8") # Content = soup.find('div',id="content") # cont = H1+"\n"+Content.encode('gbk') # Write_into_TXT(cont) def Download_Part(Chapter_urls,num1,num2): Part_Urls = Chapter_urls[num1:num2] Download_all(Part_Urls) def Select_Chapter(html): Chapter_urls = [] Chapter_names = [] num = 0 for i in html: Chapter_urls.append(i[0]) Chapter_names.append(i[1]) print "[%s]%s[Chapter_name]%s" %(num,i[0],i[1]) num += 1 print '''[选择要下载的章节数]\n\n===>[Default][all]所有章节\n===>[0-100]0到99章\n''' Chapter_Numbres = raw_input("[章节数]:") if Chapter_Numbres == "" or Chapter_Numbres == "all": #多线程下载 # for i in range(num): # t = Thread(target=Download_all,args=(Chapter_urls,)) # sleep(0.5) # t.start() Download_all(Chapter_urls) else: result = re.search(r'(.*?)-(.*)',Chapter_Numbres) number1 = int(result.group(1)) number2 = int(result.group(2)) Download_Part(Chapter_urls,number1,number2) if __name__ == "__main__": try: Fiction_Name = raw_input("[小说名称]:") while Fiction_Name == "": Fiction_Name = raw_input("[小说名称]:") print Fiction_Name html = List_Fiction(Fiction_Name) Chapter_Html = Make_Form(html) Select_Chapter(Chapter_Html) except: exit()