#!/usr/bin/env python #-*-encoding:utf-8-*- import os import urllib2 import string import re #---------------配置部分----------------- __BaseDir__ = os.getcwd() __DataDir__ = __BaseDir__ + "/data" __NovelList__ = __DataDir__ + "/novelList.txt" #---------------天翼文学----------------- __TYWXDir__ = r'<a href="(.*)" mce_href="(.*)"(.*)</a>' __TYWXFile__ = r' (.*)<br />' def parseDirForTianyi(chapters): return([i for i in chapters if i[0][-4:] == 'html']) def parseFileForTianyi(content): return(string.replace(string.join(content, '/n'), ' ', '')) def constructUrlForTianyi(novelName, url): novelUrl = getNovelUrl(novelName) return(novelUrl + '/' + url) #---------------测试配置部分------------- def docTestConfig(): """ 对配置部分进行DocTest >>> docTestConfig() /home/liuxin/work/python/readnovel /home/liuxin/work/python/readnovel/data /home/liuxin/work/python/readnovel/data/novelList.txt """ print __BaseDir__ print __DataDir__ print __NovelList__ def testConfig(): """ >>> testConfig() 开始检查目录结构... 检查目录结构结束 """ print "开始检查目录结构..." if not os.path.isdir(__BaseDir__): os.mkdir(__BaseDir__) if not os.path.isdir(__DataDir__): os.mkdir(__DataDir__) if not os.path.isfile(__NovelList__): open(__NovelList__, 'w').write('') print "检查目录结构结束" #小说结构目录 #basedir/data/novelName/ # /config.txt # /chapters.txt # /... def testNovelDir(novelName): novelDir = __DataDir__ + '/' + novelName novelConfig = novelDir + '/config.txt' novelChapters = novelDir + '/chapters.txt' if not os.path.isdir(novelDir): os.mkdir(novelDir) if not os.path.isfile(novelConfig): url = getNovelUrl(novelName) open(novelConfig, 'w').write(url) if not os.path.isfile(novelChapters): open(novelChapters, 'w').write('') #---------------公共函数部分------------- #---------------------------------------- #小说列表部分 #小说列表结构 #id novelName novelUrl #使用分号间隔 def getNovelList(): """ 获得小说列表 """ if not os.path.isfile(__NovelList__): open(__NovelList__, 'w').write("") f = open(__NovelList__) novelList = f.readlines() f.close() novelList = [string.strip(novel) for novel in novelList if string.strip(novel) != "" ] novels = [] for novel in novelList: aNovel = string.split(novel, ";") novels.append(aNovel) return(novels) def getNovelUrl(name=None): novelList = getNovelList() for novel in novelList: if novel[1] == name: return(novel[2]) return(None) def addNovel(name, url): """ 添加小说 """ novelList = getNovelList() maxNovelID = 0 for novel in novelList: if maxNovelID < novel[0]: maxNovelID = novel[0] maxNovelID += 1 f = open(__NovelList__, 'a') aNovel = str(maxNovelID) + ";" + name + ";" + url + "/n" f.write(aNovel) f.close() def deleteNovel(name=None): """ 删除小说 """ novelList = getNovelList() for novel in novelList: if novel[1] == name: pass else: addNovel(novel[1], novel[2]) #--------------------------------------------------------- #小说章节部分 #id title url path #;分隔 def getChapterList(novelName, dirReg, funcBack): novelUrl = getNovelUrl(novelName) d = downloader() d.seturl(novelUrl) page = d.get() p = parser() p.setReg(dirReg) p.setContent(page) l = p.parse() chapterList = funcBack(l) return(chapterList) def getLocalChapterList(novelName): novelPath = __DataDir__ + '/' + novelName + '/chapters.txt' testNovelDir(novelName) f = open(novelPath) chapterList = f.readlines() chapterList = [string.split(string.strip(chapter),';') for chapter in chapterList if string.strip(chapter) != '' ] f.close() return(chapterList) def updateChapterList(novelName, dirReg, funcBack): remoteList = getChapterList(novelName, dirReg, funcBack) localList = getLocalChapterList(novelName) l = localList if len(localList) == 0: begin =1 for chapter in remoteList: l.append([str(begin), chapter[1], chapter[0], str(0)]) begin += 1 else: urlList = "" for chapter in localList: urlList += chapter[2] for chapter in remoteList: url = chapter[0] if url not in urlList: l.append([str(getNextMaxID(localList)), chapter[1], chapter[0], str(0)]) novelPath = __DataDir__ + '/' + novelName + '/chapters.txt' f = open(novelPath ,'w') for chapter in l: line = string.join(chapter, ';') + '/n' f.write(line) f.close() def updateLocalList(novelName, localList): testNovelDir(novelName) novelPath = __DataDir__ + '/' + novelName + '/chapters.txt' f = open(novelPath, 'w') for chapter in localList: line = string.join(chapter, ';') + '/n' f.write(line) f.close() def getNextMaxID(llist): return(int(llist[-1][0])+1) #------------------------------------------------------- #小说内容部分 def updateContents(novelName, reg, constructUrlFunc, parseContentFunc): localList = getLocalChapterList(novelName) d = downloader() p = parser() for index,chapter in enumerate(localList): if chapter[3] == '0': #下载该章节 url = chapter[2] pageUrl = constructUrlFunc(novelName, url) d.seturl(pageUrl) page = d.get() p.setReg(reg) p.setContent(page) r = p.parse() result = parseContentFunc(r) #写入文件 fileid = chapter[0] testNovelDir(novelName) fileName = __DataDir__ + '/' + novelName + '/' + fileid + '.txt' f = open(fileName, 'w') f.write(result) f.close() localList[index][3] = fileid + '.txt' updateLocalList(novelName, localList) print localList[index] #书签 class bookMark(object): def __init__(self): self.novelName = None self.bookMarkPath = None self.lastView = None def setPath(self): self.bookMarkPath = __DataDir__ + '/' + novelName + '/bookmark.txt' def setNovelName(self,novelName): self.novelName = novelName self.setPath() def setBookMark(self,lastid): f = open(self.bookMarkPath, 'w') f.write(lastid) f.close() #下载器 class downloader(object): def __init__(self): self.url = None self.content = None def seturl(self, url): self.url = url def get(self): self.download() return(self.content) def download(self): try: req = urllib2.Request(self.url) response = urllib2.urlopen(req) self.content = response.read() except: print("Error when getting the url:",self.url) self.content = None #页面解析器 class parser(object): def __init__(self): self.reg = None self.content = None self.result = None def setReg(self, reg): self.reg = reg def setContent(self, content): self.content = content def _parse(self): self.result = re.findall(self.reg, self.content, re.M) def parse(self): self._parse() return(self.result) def testTianyi(): """ 测试天翼文学 """ d = downloader() d.seturl("http://www.tianyibook.com/tianyibook/18/18086") page = d.get() p = parser() p.setReg(__TYWXDir__) p.setContent(page) r = p.parse() d.seturl("http://www.tianyibook.com/tianyibook/18/18086/2463582.html") page = d.get() p.setReg(__TYWXFile__) p.setContent(page) r = p.parse() print(r) #---------------测试公共函数部分--------- #---------------私有函数部分------------- #---------------界面部分----------------- #---------------主体部分----------------- def main(): pass if __name__ == "__main__": #测试部分 发布时需要屏蔽掉 import doctest doctest.testmod() #执行主体 发布时为执行主体 #main() 命令行版本,缺少界面,没有优化,目前只支持天翼文学,用于Linux,如果你在Windows下的话,建议使用小说下载阅读器。