#!/usr/bin/env python # -*- coding: utf-8 -*- import requests import json from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor class Vegetable_Price(object): def __init__(self): self.url = "http://www.xinfadi.com.cn/getPriceData.html" self.headers = {"User-Agent":"Mozilla/5.0"} def get_data_index(self,num): data = { "limit": "20", "current": num, "pubDateStartTime": "", "pubDateEndTime": "", "prodPcatid": "", "prodCatid": "", "prodName": "" } resp = requests.post(self.url,data=data,headers=self.headers) dict_data = json.loads(resp.text)["list"] for data in dict_data: prodCat = data["prodCat"] prodName = data["prodName"] lowPrice = data["lowPrice"] avgPrice = data["avgPrice"] highPrice = data["highPrice"] place = data["place"] unitInfo = data["unitInfo"] print(prodCat,prodName,lowPrice,avgPrice,highPrice,place,unitInfo) if __name__ == '__main__': spider = Vegetable_Price() with ProcessPoolExecutor(20) as q: for i in range(1,50): q.submit(spider.get_data_index, i) ---------------------------------------------------------------------------------------
努力学习python爬虫ing,多进程多线程爬取数据,数据快多了,期待scrapy框架的学习,听说是神一样存在的。