Python爬取小米众筹数据
#爬取小米众筹
import pymongo
import requests
import time
client = pymongo.MongoClient('localhost', 27017)
MI = client['MI']
zc_items = MI['zc_items']
def spider(the_url):
main_url = "https://home.mi.com/crowdfundinglist"
session = requests.Session()
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',
'Referer': 'https://home.mi.com/crowdfundinglist?id=78&title=%E4%BC%97%E7%AD%B9&trace=m1z1-6033-29',
'Host': 'home.mi.com',
'Origin': 'https://home.mi.com',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cookie': 'youpindistinct_id=164dc64d48c395-01dba0ddc4fe75-d35346d; mijiatrace=m1z1-6033-29; Hm_lvt_3d0145da4163eae34eb5e5c70dc07d97=1532705970; Hm_lpvt_3d0145da4163eae34eb5e5c70dc07d97=1532705970; youpin_sessionid=164dc6557ac-0dc2302fd1eeb3-207f; mjclient=m'
}
r = session.get(main_url, headers=header)
#print(r.status_code)
timestamp = session.get("https://tp.hd.mi.com/gettimestamp")
#print(timestamp.status_code)
time.sleep(1)
data = '{"request":{"model":"Homepage","action":"BuildHome","parameters":{"id":"78"}}}'
res = session.post(the_url, headers=header, data=data)
#print(res.status_code)
return res.json()
if __name__ == "__main__":
main_url = "https://home.mi.com/app/shopv3/pipe"
MI_json = spider(main_url)
for item in MI_json.get('result').get('request').get('data'):
name = item['name']
summary = item['summary']
url = item['url']
saled = item['saled']
saled_count = item['saled_count']
saled_fee = int(item['saled_fee']) / 100
price = int(item['price_min']) / 100
status = "已成功" if item['status'] else "众筹中"
progress = item['progress']
# 转换成localtime
time_local1 = time.localtime(int(item['start']))
time_local2 = time.localtime(int(item['end']))
time_local3 = time.localtime(int(item['ctime']))
time_local4 = time.localtime(int(item['utime']))
# 转换成新的时间格式(2016-05-05 20:28:54)
start = time.strftime("%Y-%m-%d %H:%M:%S", time_local1)
end = time.strftime("%Y-%m-%d %H:%M:%S", time_local2)
ctime = time.strftime("%Y-%m-%d %H:%M:%S", time_local3)
utime = time.strftime("%Y-%m-%d %H:%M:%S", time_local4)
print(name,summary,url,saled,saled_count,saled_fee,price,status,'{:.0%}'.format(progress/100),start,end,ctime,utime)
zc_items.insert_one({'name':name,'summary':summary,'url':url,'saled':saled,'saled_count':saled_count,'saled_fee':saled_fee,
'price':price,'status':status,'progress':progress,'start':start,'end':end,'ctime':ctime,'utime':utime})