爬取流程:
第一步:
第二步:
第三步:
第四步:
第五步:
代码流程:
import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib
header = {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
}
#通过请求找到 并解析标签下的链接 标题 作者 等等...
def qingqiu(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq(jiexi)
#上个函数的请求 解析获取开始阅读的链接
def yuedxq(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj(pinjie)
#通过上个函数的链接获取 文章列表的标题 和链接
def hqzangj(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq(pinjie)
#通过上个链接 解析文章内容和标题
def readxq(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
#遍历文章
for i in b:
writes(i,c)
#传递参数 获取文章内容标题 将文章放在txt文件中
def writes(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
#第二个标签 武侠修真
def qingqiu2(url_q):
response = requests.get(url_q,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq2(jiexi)
def yuedxq2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj2(pinjie)
def hqzangj2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq2(pinjie)
def readxq2(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes2(i,c)
def writes2(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu3(url_w):
response = requests.get(url_w,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq3(jiexi)
def yuedxq3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj3(pinjie)
def hqzangj3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq3(pinjie)
def readxq3(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes3(i,c)
def writes3(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu4(url_e):
response = requests.get(url_e,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq4(jiexi)
def yuedxq4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj4(pinjie)
def hqzangj4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq4(pinjie)
def readxq4(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes4(i,c)
def writes4(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu5(url_e):
response = requests.get(url_e,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq5(jiexi)
def yuedxq5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj5(pinjie)
def hqzangj5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq5(pinjie)
def readxq5(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes5(i,c)
def writes5(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu6(url_r):
response = requests.get(url_r,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq6(jiexi)
def yuedxq6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj6(pinjie)
def hqzangj6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq6(pinjie)
def readxq6(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes6(i,c)
def writes6(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu7(url_t):
response = requests.get(url_t,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq7(jiexi)
def yuedxq7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj7(pinjie)
def hqzangj7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq7(pinjie)
def readxq7(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes7(i,c)
def writes7(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu8(url_y):
response = requests.get(url_y,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq8(jiexi)
def yuedxq8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj8(pinjie)
def hqzangj8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq8(pinjie)
def readxq8(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes8(i,c)
def writes8(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu9(url_u):
response = requests.get(url_u,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq9(jiexi)
def yuedxq9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj9(pinjie)
def hqzangj9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq9(pinjie)
def readxq9(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes9(i,c)
def writes9(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu10(url_i):
response = requests.get(url_i,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq10(jiexi)
def yuedxq10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj10(pinjie)
def hqzangj10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq10(pinjie)
def readxq10(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes10(i,c)
def writes10(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu11(url_o):
response = requests.get(url_o,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq11(jiexi)
def yuedxq11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
hqzangj11(pinjie)
def hqzangj11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="clearfix dirconone"]/li')
for i in b:
title = i.xpath('./a/@title')
jiexi = i.xpath('./a/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(title,pinjie)
readxq11(pinjie)
def readxq11(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="mainContenr"]/text()')
c = a.xpath('//strong[@class="l jieqi_title"]/text()')
print(c,b)
for i in b:
writes11(i,c)
def writes11(t,c):
with open('{}.txt'.format(c),'a+') as f:
f.write(t)
print("-------------------------------------------------------------------------------")
def qingqiu12(url_p):
response = requests.get(url_p,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//ul[@class="seeWell cf"]/li')
for i in b:
#小说链接详情
jiexi = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
jiexi_title = i.xpath('.//span[@class="l"]/a[@class="clearfix stitle"]/@title')
jiexi_zz = i.xpath('.//span[@class="l"]/a[2]/text()')
jiexi_nr = i.xpath('.//span[@class="l"]/em[@class="c999 clearfix"]/text()')
print(pinjie,jiexi_title,jiexi_zz,jiexi_nr)
yuedxq12(jiexi)
def yuedxq12(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="detail"]/div[@class="b-info"]')
for i in b:
jiexi = i.xpath('.//div[@class="b-oper"]/a[@class="reader"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
jinru(pinjie)
def jinru(url):
response = requests.get(url,headers=header)
response.encoding = 'gbk'
print(response.status_code)
a= etree.HTML(response.text)
b = a.xpath('//div[@class="item"]/ul')
for i in b:
jiexi = i.xpath('.//li/a[@class="orange"]/@href')[0]
pinjie = urllib.parse.urljoin(response.url,jiexi)
print(pinjie)
if __name__ == '__main__':
for i in range(1,2): #共982页
url = "http://www.quanshuwang.com/list/1_%s.html"%str(i)
qingqiu(url)
# url_q = "http://www.quanshuwang.com/list/2_1.html"
# qingqiu2(url_q)
# url_w = "http://www.quanshuwang.com/list/3_1.html"
# qingqiu3(url_w)
# url_e = "http://www.quanshuwang.com/list/4_1.html"
# qingqiu4(url_e)
# url_e = "http://www.quanshuwang.com/list/5_1.html"
# qingqiu5(url_e)
# url_r = "http://www.quanshuwang.com/list/6_1.html"
# qingqiu6(url_r)
# url_t = "http://www.quanshuwang.com/list/7_1.html"
# qingqiu7(url_t)
# url_y = "http://www.quanshuwang.com/list/8_1.html"
# qingqiu8(url_y)
# url_u = "http://www.quanshuwang.com/list/9_1.html"
# qingqiu9(url_u)
# url_i = "http://www.quanshuwang.com/list/10_1.html"
# qingqiu10(url_i)
# url_o = "http://www.quanshuwang.com/list/11_1.html"
# qingqiu11(url_o)
# url_p = "http://www.quanshuwang.com/list/12_1.html"
# qingqiu12(url_p)