需求
给出关键字,然后爬取财政部ppp数据库中相应项目的数据,保存到sqlite中。
使用软件:requests、json、sqlite
代码写于2020-4-22,以后可能会出现网页改版而导致不可用
先上代码:
!!!代码删掉了一部分,完整的代码在这学期大作业交了之后发
import requests
import json
import sqlite3
db = sqlite3.connect('./caizhengbu.db')
cursor = db.cursor()
cursor.execute('create table if not exists ppp (项目名 TEXT)')
db.commit()
def ppp():
headers = {
# POST /api/pub/project/search-store HTTP/1.1
'Host': 'www.cpppc.org:8082',
'Connection': 'keep-alive',
'Content-Length': '167',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36',
'Content-Type': 'application/json',
'Accept': '*/*',
'Origin': 'https://www.cpppc.org:8082',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://www.cpppc.org:8082/inforpublic/homepage.html',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9'
}
total_num = None
cur_num = 0
pageNum = 1
while True:
# payloadData = {"name":"医疗","industry":"",
# "min":0,"max":10000000000000000,
# "pageNumber":pageNum,"size":5,"level":"","start":"","end":"","dist_province":"","dist_city":"","dist_code":""}
# 选择领域中,医疗卫生是016,准备阶段的代码是status,1是准备,2是采购,3是执行
payloadData = {
"name":"","industry":["016"],"min":0,"max":10000000000000000,
"pageNumber":pageNum,"size":5,"status":["1", "2", "3"