python 爬虫 - 静态

泛华·子曦

于 2021-11-12 17:02:13 发布

阅读量1.3k

点赞数

分类专栏： python基础全栈专栏文章标签： python jquery 前端

泛华

本文链接：https://blog.csdn.net/qq_49697349/article/details/121291266

版权

python基础全栈专栏专栏收录该内容

2 篇文章 0 订阅

订阅专栏

#-*-coding:utf-8-*-
# coding:utf-8

#导入模块
import os
import requests
from pyquery import PyQuery as body
#抓取地址
url = "https://www.xbiquge.la"
# 抓取小说 [笔趣阁] 函数
def xiaosuo():
	#判断目录xiaosuo是否存在
    if not os.path.isdir(os.getcwd() + '/xiaosuo'):
    	#不存在则创建
        os.mkdir(os.getcwd()+'/xiaosuo')
	#获取源码并转成jquery语法
    jquery = body(requests.get(url + '/xiaoshuodaquan/').content, parser='html')
    #获取目录所有（小说书名） 地址
    for a in jquery('.novellist').find('ul').find('li').items():
    	#判断目录（小说书名） 是否存在
        if not os.path.isdir(os.getcwd() + '/xiaosuo/' + a.find('a').text()):
        	#不存在则创建
            os.mkdir(os.getcwd() + '/xiaosuo/' + a.find('a').text())
        #抓取每本小说
        jquery = body(requests.get(a.find('a').attr('href')).content, parser='html')
        #获取每本小说
        for b in jquery('#list dd').items():
        	#获取源码并转成jquery语法格式
            jquery = body(requests.get(url + b.find('a').attr('href')).content, parser='html')
            #写入文本
            open(os.getcwd() + '/xiaosuo/' + a.find('a').text() + '/' + b.find('a').text() + '.txt', 'w').write(jquery('#content').remove('p').text())
#调用函数            
xiaosuo()

泛华·子曦

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python 爬虫 - 静态

#-*-coding:utf-8-*-# coding:utf-8#导入模块import osimport requestsfrom pyquery import PyQuery as body#抓取地址url = "https://www.xbiquge.la"# 抓取小说 [笔趣阁] 函数def xiaosuo(): #判断目录xiaosuo是否存在 if not os.path.isdir(os.getcwd() + '/xiaosuo'): #不存在则创建
复制链接

扫一扫