分享56个Python爬虫源码总有一个是您想要的

亚丁号

已于 2024-07-26 08:04:31 修改

阅读量1.4k

点赞数 18

文章标签：信息可视化

于 2024-07-25 11:21:38 首次发布

本文链接：https://blog.csdn.net/zy0412326/article/details/140684753

版权

分享56个Python爬虫源码总有一个是您想要的

学习知识费力气，收集整理更不易。

知识付费甚欢喜，为咱码农谋福利。

源码下载链接：https://pan.baidu.com/s/1YtCYI_6VMF0AAwgnFKOJ-w?pwd=8888
提取码：8888

项目名称：

Python JavaScript 逆向爬虫

Python 爬虫案例

python 编写，采用广度优先策略，线程池实现的爬虫

Python3爬虫课程代码

python、pysimplegui、GUI、爬虫、可视化、天气查询系统

Python实现基于协程的异步爬虫

import os
import shutil

def void_folder(path):
    # 访问path路径下的文件或文件夹
    lst = os.listdir(path)
    # 打印每一层的文件或文件夹
    for name in lst:
        # 拼接名称，得到绝对路径，判断该文件是否符合是文件夹
        real_path = os.path.join(path, name)
        # 如果是文件夹，则打空格表示，并且递归访问下一层
        if os.path.isdir(real_path):
            # print(name)
            files = os.listdir(real_path)
            if len(files) == 0:
                print("void_folder()："+name)
                shutil.rmtree(real_path)
                endindex = len(real_path) - len(name)
                real_path = real_path[0:endindex]
                void_folder(real_path)
            else:
                void_folder(real_path)
        # 如果不是文件夹，直接打印，不再递归访问下一层
        else:
            #print(name)
            pass


def void_file(dirPath):
    dirs = os.listdir(dirPath)  # 查找该层文件夹下所有的文件及文件夹，返回列表
    for file in dirs:
        file_full_name = dirPath + '/' + file
        file_ext = os.path.splitext(file_full_name)[-1]
        if file_ext is None  or file_ext=="":
            continue

        if "rar" == str(file_ext.split(".")[1]):
            os.remove(file_full_name)
        if "zip" == str(file_ext.split(".")[1]):
            os.remove(file_full_name)
        if "gz" == str(file_ext.split(".")[1]):
            os.remove(file_full_name)
        if "tgz" == str(file_ext.split(".")[1]):
            os.remove(file_full_name)

# 查找指定文件夹下所有相同名称的文件
def search_file(dirPath, fileName):
    dirs = os.listdir(dirPath)  # 查找该层文件夹下所有的文件及文件夹，返回列表
    for currentFile in dirs:  # 遍历列表
        absPath = dirPath + '/' + currentFile
        if os.path.isdir(absPath):  # 如果是目录则递归，继续查找该目录下的文件
            search_file(absPath, fileName)
        elif currentFile == fileName:
            print(absPath)  # 文件存在，则打印该文件的绝对路径
            os.remove(absPath)

if __name__ == "__main__":
    dirPath = 'D:\Spider\Html\DIV+CSS模板\\98个DIV+CSS模板\DIV+CSS模板'

    search_file(dirPath, "ReadMe.txt")
    search_file(dirPath, "下载网页模板.url")
    search_file(dirPath, "下载网页特效.url")
    search_file(dirPath, "下载字体.url")
    search_file(dirPath, "轻松设计漂亮的网页-mobanwang.com.url")
    search_file(dirPath, "松设计漂亮的网页-mobanwang.com.url")
    void_file(dirPath)

    # search_file(dirPath, "php中文网下载站.url")
    # search_file(dirPath, "php中文网免费下载站.txt")
    #
    # search_file(dirPath, "访问懒人之家.url")
    # search_file(dirPath, "lanrenzhijia.com下载说明.txt")
    #
    #
    # search_file(dirPath, "服务器软件.url")
    # search_file(dirPath, "downcode.com.txt")
    # search_file(dirPath, "中国源码下载站.url")
    #
    # search_file(dirPath, "脚本之家.url")
    # search_file(dirPath, 'jb51.net.txt')
    # search_file(dirPath, '说明.htm')
    # search_file(dirPath, "cnzzz.com.txt")
    # search_file(dirPath, "源码之家说明.txt")
    # search_file(dirPath, "服务器常用软件.html")
    # search_file(dirPath, "服务器常用软件.html")
    # search_file(dirPath, "访问脚本之家.html")
    # search_file(dirPath, "chinaz.com.txt")
    # search_file(dirPath, "访问查看.url")
    # fileName4 = '服务器软件.url'
    # fileName3 = '脚本之家.url'
    # fileName2 = 'Readme-说明.htm'
    # fileName5 = 'jb51.net.txt'
    # search_file(dirPath, fileName2)
    # search_file(dirPath, fileName3)
    # search_file(dirPath, fileName4)
    # search_file(dirPath, fileName5)
    # void_folder(dirPath)
    # void_folder(dirPath)
    # void_folder(dirPath)

Python爬虫

Python爬虫, 豆瓣, 逆水寒藏宝阁

python爬虫-旅游景点

python爬虫——抢课原理

Python爬虫基础，爬取王者荣耀、英雄联盟的英雄皮肤

Python爬虫工具库(异步爬虫类、线程池爬虫类、爬虫实用函数)

python爬虫爬取全国高校新闻

Python爬虫爬取公众号所有文章信息，包括标题、url、md链接

python爬虫爬取某度搜索内容

python爬虫练手项目，或许不止爬虫

python爬虫逆向项目合集，每个文件夹都是一个成品项目

Python编写的爬虫合集，欢迎Star(豆瓣，某度翻译，DY，优酷，B站，今日头疼，笔趣阁，Unsplash，起点中文网，一点资讯，空气质量，酷酷漫画)