python常用模块

# coding = utf-8
"""
1.常用模块学习记录
"""
import docx
import xlrd
from io import StringIO
from io import open
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
import re
import xlwt
import random


class Study(object):
    """常用模块学习记录"""

    def __init__(self):
        """初始化"""
        self.user_agent = [
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"
        ]
        num = random.randint(0, 3)  # 生产0-3的随机数
        self.headers = {
   
            "User-Agent": self.user_agent[num]
        }

    def read_from_docx(self):
        """读取docx: https://zhuanlan.zhihu.com/p/38251812文件"""
        # 1.打开word文件,读取word文本
        file = docx.opendocx("./XXX.docx")
        text_list = docx.getdocumenttext(file)  # type: list
        # print(type(text_list), text_list)
        # 2.将列表中的内容按空格拼接为字符串
        text = " "
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值