有没有大佬懂的帮忙看看代码,爬出来的pdf文件是损坏的。代码粘贴于@小吴不吃香菜
import requests, time, random
import json
import pandas as pd
def req(stock, year, org_dict):
# post请求地址(巨潮资讯网的那个查询框实质为该地址)
url = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
# 表单数据,需要在浏览器开发者模式中查看具体格式
data = {
"pageNum": "1",
"pageSize": "30",
"tabName": "fulltext",
"stock": stock + "," + org_dict[stock], # 按照浏览器开发者模式中显示的参数格式构造参数
"seDate": f"{str(int(year) + 1)}-01-01~{str(int(year) + 1)}-12-31",
"column": "szse",
"category": "category_ndbg_szsh",
"isHLtitle": "true",
"sortName": "time",
"sortType": "desc"
}
# 请求头
headers = {"Content-Length": "201", "Con