小说获取器

最新推荐文章于 2024-04-18 13:54:36 发布

北晨lpl

最新推荐文章于 2024-04-18 13:54:36 发布

阅读量297

点赞数

分类专栏：笔记文章标签： python

本文链接：https://blog.csdn.net/weixin_45014413/article/details/115804078

版权

笔记专栏收录该内容

23 篇文章 0 订阅

订阅专栏

这是本人第一次尝试写python的包，未免有不足之处，望见谅。

一，文件结构
在这里插入图片描述
二，代码实现
1，get.py

import requests
from bs4 import BeautifulSoup

#用于获取文本数据
class GetText(object):
    def __init__(self,url):
        self.__url = url

    #向网页发送请求
    def request(self):
        try:
            response = requests.get(self.__url)
            response.raise_for_status()
            response.encoding = response.apparent_encoding
            return response
        except Exception as e:
            print(e)
            return False

    #处理从网页请求到的数据
    def halding_data(self):
        res_data = self.request()
        if res_data:
            soup = BeautifulSoup(res_data.text,'html.parser')
            data = soup.find_all('p')
            return data
        else:
            return False

2,save.py

#该模块用于存储数据
import os
from random import choice

class SaveData(object):
    def __init__(self,file_format,data):
        self.__fileFormat = file_format
        self.__data = data

    #存储数据
    def save(self):
        k = ''
        text_data = []
        head_name = ['q','w','e','r','t','y','u','i','o','p','a','s','d','f','g','h','j','k','l','z','x','c','v','b','n','m']
        end_name = ['0','1','2','3','4','5','6','7','8','9']
        filename = choice(head_name) + choice(head_name) + choice(end_name)
        if self.__fileFormat == 'html':
            filename += '.html'
            k = 'h'
        else:
            k = 't'
            filename += '.txt'
        if not os.path.exists('./data'):
            path = './data'
            os.makedirs(path)
        with open('./data/' + filename,'w') as wfile:
            if k == 'h':
                wfile.write(str(self.__data))
            if k == 't':
                for item in self.__data:
                    text_data.append(item.text)
                text = '\n'.join(text_data)
                wfile.write(text)
            wfile.close()
        print('获取完成!')
        return

main.py

import get
import save
import os

tip = '''
本软件对某些平台可能不支持，后续补全测试时可以使用test.py
===========text_get=================
命令格式（command format）:
            Tget <format> <url>
format : html or text
================================
'''
print(tip)

head,fileformat,url = input().strip().split()

while head != 'Tget':
    print('命令错误，请重新输入:')
    head,fileformat,url = input().strip().split()

GT = get.GetText(url)
SD = save.SaveData(fileformat,GT.halding_data())
SD.save()

os.system('pause')

4,test.py

#该模块用于测试
import get

url = '<url>'

GT = get.GetText(url)
print(GT.halding_data())

北晨lpl

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
2
评论
小说获取器

这是本人第一次尝试写python的包，未免有不足之处，望见谅。一，文件结构二，代码实现1，get.pyimport requestsfrom bs4 import BeautifulSoup#用于获取文本数据class GetText(object): def __init__(self,url): self.__url = url #向网页发送请求 def request(self): try: respo
复制链接

扫一扫