# coding = utf-8
"""
1.常用模块学习记录
"""
import docx
import xlrd
from io import StringIO
from io import open
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
import re
import xlwt
import random
class Study(object):
"""常用模块学习记录"""
def __init__(self):
"""初始化"""
self.user_agent = [
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36"
]
num = random.randint(0, 3) # 生产0-3的随机数
self.headers = {
"User-Agent": self.user_agent[num]
}
def read_from_docx(self):
"""读取docx: https://zhuanlan.zhihu.com/p/38251812文件"""
# 1.打开word文件,读取word文本
file = docx.opendocx("./XXX.docx")
text_list = docx.getdocumenttext(file) # type: list
# print(type(text_list), text_list)
# 2.将列表中的内容按空格拼接为字符串
text = " "
python常用模块
最新推荐文章于 2024-05-29 23:46:42 发布