- 绝对布局
- 无章节预加载
- 在不关闭的情况下再次搜索存在bug
import sys
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
import random
import requests
from lxml import etree
from bs4 import BeautifulSoup
from urllib.parse import quote
def dataGet(url):
user_agent_list = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
user_agent = random.choice(user_agent_list)
headers = {
'User-Agent': user_agent}
i = 0
while i < 4: # 4s超时, 4次重试
response = requests.get(url, headers, timeout=4)
response.encoding = 'gbk'
return response.text
except requests.exceptions.RequestException:
i += 1
def novelSearch(data):
soup = BeautifulSoup(data, features='lxml')
lis = soup.find_all('li')
novelList = []
novelInfoList = []
linkList = []
for li in lis:
html = etree.HTML(str(li))
class_ = html.xpath('//span[@class="s1"]/text()')
name = html.xpath('//span[@class="s2"]/a/text()')
link = html.xpath('//span[@class="s2"]/a/@href')
new = html.xpath('//span[@class="s3"]/a/text()')
author = html.xpath('//span[@class="s4"]/text()')
time = html.xpath('//span[@class="s5"]/text()')
now = html.xpath('//span[@class="s7"]/text()')
if class_ and now and new:
novelInfoList.append([class_[0], name[0], link[0], new[0], author[0], time[0], now[0]])
return [novelList, novelInfoList, linkList]
def chapterGet(data, link):
html = etree.HTML(data)
chapters_name = html.xpath('//dl/dd/a/text()')
chapters_link = html.xpath('//dl/dd/a/@href')
chapters = []
for i, j in zip(chapters_name, chapters_link):
chapters.append([i, link + j])
return chapters
def contentGet(data):
string = data.replace('<br />', '').replace<