requests爬取IT橘子

requests爬取IT橘子

import requests
import json

def get_html(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
    }
    # 模拟登陆
    session = requests.session()
    login_form_data = {
        "account":"xxxxxxx",
        "password":"xxxxxxxx"
    }
    # 处理cookies
    cookies = "Cookie: Hm_lvt_1c587ad486cdb6b962e94fc2002edf89=1575336964; juzi_user=790974; juzi_token=bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJodHRwczpcL1wvd3d3Lml0anV6aS5jb21cL2FwaVwvYXV0aG9yaXphdGlvbnMiLCJpYXQiOjE1NzUzMzcwMzAsImV4cCI6MTU3NTM0MDYzMCwibmJmIjoxNTc1MzM3MDMwLCJqdGkiOiJxU3ZBSHlyWjQ0UGhId3JLIiwic3ViIjo3OTA5NzQsInBydiI6IjIzYmQ1Yzg5NDlmNjAwYWRiMzllNzAxYzQwMDg3MmRiN2E1OTc2ZjciLCJ1dWlkIjoicDlXTUVjIn0.H2hyoZ6at7VMv7-mspnjQWt8TfMfySPRxv39yFhGFrc; _ga=GA1.2.874127990.1575337042; _gid=GA1.2.111308867.1575337042; gr_user_id=c671585e-d0a5-413f-85fb-cbc890a0e29c; gr_session_id_eee5a46c52000d401f969f4535bdaa78=098951e0-d9ac-4c35-844f-e34141cb4daf; gr_session_id_eee5a46c52000d401f969f4535bdaa78_098951e0-d9ac-4c35-844f-e34141cb4daf=true; MEIQIA_TRACK_ID=1US5EUXX7pvM4mpap7cpZpW0gZ2; MEIQIA_VISIT_ID=1US5EOIFQsSpi5gGG9N3o34dTsd; Hm_lpvt_1c587ad486cdb6b962e94fc2002edf89=1575338724"
    cookies_list = cookies.split(";")
    cook_dict = {cookies.split("=")[0]:cookies.split("=")[1] for cookies in cookies_list}
    login_response = session.post(url=url, data=login_form_data, headers=headers, cookies=cook_dict)
    content = json.loads(login_response.content, encoding='utf-8')
    return content

def get_info(content):
    # 提取登录查看信息的密码
    token = content['data']['token']
    url = "https://www.itjuzi.com/api/investevents"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Content-Type": "application/json",
        "Authorization": token,    # 设置在请求头中
    }
    payload = {     # pagetotal总数,per_page一页中的数据, page第几页
        "pagetotal": 0, "total": 0, "per_page": 20, "page": 1, "type": 1, "scope": "", "sub_scope": "",
        "round": [], "valuation": [], "valuations": "", "ipo_platform": "", "equity_ratio": [""],
        "status": "", "prov": "", "city": [], "time": [], "selected": "", "location": "", "currency": [],
        "keyword": ""
    }
    response = requests.post(url=url, headers=headers, data=json.dumps(payload))
    info = json.loads(response.text, encoding="utf-8")
    # 查看数据条数
    count = info['data']['page']['total']
    if type(count / 20) is not int:
        page = int(int(count) / 20) + 1
    else:
        page = int(count) / 20

    for i in range(1, page + 1):
        payload = {
            "pagetotal": count, "total": 0, "per_page": 20, "page": i, "type": 1, "scope": "", "sub_scope": "",
            "round": [], "valuation": [], "valuations": "", "ipo_platform": "", "equity_ratio": [""],
            "status": "", "prov": "", "city": [], "time": [], "selected": "", "location": "", "currency": [],
            "keyword": ""
        }
        response = requests.post(url, headers=headers, data=json.dumps(payload))
        info = json.loads(response.text, encoding="utf-8")
        for j in range(0, 20):
            json_dict = {
            "id": info['data']['data'][j]['id'],
            "com_id": info['data']['data'][j]['com_id'],
            "name": info['data']['data'][j]['name'],
            "com_scope": info['data']['data'][j]['com_scope'],
            "com_sub_scope": info['data']['data'][j]['com_sub_scope'],
            "round": info['data']['data'][j]['round'],
            "money": info['data']['data'][j]['money'],
            "money_num": info['data']['data'][j]['money_num'],
            "valuation": info['data']['data'][j]['valuation'],
            "city": info['data']['data'][j]['city'],
            "agg_time": info['data']['data'][j]['agg_time'],
            "slogan": info['data']['data'][j]['slogan'],
            "com_registered_name": info['data']['data'][j]['com_registered_name'],
            "com_des": info['data']['data'][j]['com_des'],
            "invse_title": info['data']['data'][j]['invse_title'],
            "invse_des": info['data']['data'][j]['invse_des'],
            "currency": info['data']['data'][j]['currency']
            }
            with open("F://数据//IT橘子.json", 'a', encoding="utf-8") as f:
                json.dump(json_dict, f, ensure_ascii=False, indent=4)
                print(json_dict)

if __name__ == '__main__':
    url = "https://www.itjuzi.com/api/authorizations"
    html = get_html(url=url)
    get_info(content=html)


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值