python爬虫-＞北京新发地所有菜价源代码

小旷爷

已于 2024-09-18 21:06:09 修改

阅读量604

点赞数 10

分类专栏：爬虫文章标签： python 爬虫开发语言

于 2024-08-30 00:58:42 首次发布

本文链接：https://blog.csdn.net/2301_80851925/article/details/141691595

版权

爬虫专栏收录该内容

1 篇文章 0 订阅

订阅专栏

import requests
from concurrent.futures import ThreadPoolExecutor
import time
import csv


def get_data(page):
    url = 'http://www.xinfadi.com.cn/getPriceData.html'
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'
    }
    data = {
        'limit': 20,
        'current': page,
        'pubDateStartTime': '',
        'pubDateEndTime': '',
        'prodPcatid': '',
        'prodCatid': '',
        'prodName': ''
    }

    f = open('../../get_bug/csv/北京新发地all_vegetable_price', mode='a', newline='')
    csvwriter = csv.writer(f)
    resp = requests.post(url, headers=headers, data=data)
    lists = resp.json()['list']
    for item in lists:
        prodName = item['prodName']
        prodCat = item['prodCat']
        pubDate = item['pubDate']
        avgPrice = item['avgPrice']
        highPrice = item['highPrice']
        lowPrice = item['lowPrice']
        csvwriter.writerow([prodName, prodCat ,highPrice, lowPrice, avgPrice, pubDate])
        print('完成')
    resp.close()
    f.close()


if __name__ == '__main__':
    with ThreadPoolExecutor(80) as t:
        for i in range(1, 1 + 21651):
            t.submit(get_data, i)
            time.sleep(1)
        print('over!')
        t.shutdown()