python数据库抓取并保存_Pyspider实例之抓取数据并保存到MySQL数据库

[Python] 纯文本查看 复制代码#!/usr/bin/env python

# -*- encoding: utf-8 -*-

# Created on 2017-07-14 10:36:36

# Project: xiaomi

from pyspider.libs.base_handler import *

from pyspider.database.mysql.mysqldb import SQL

import urllib

import time

import json

class Handler(BaseHandler):

#配置通用的请求属性

crawl_config = {

'headers' : {'Connection':'keep-alive','Accept-Encoding':'gzip, deflate, br','Accept-Language':'zh-CN,zh;q=0.8','content-type':'application/x-www-form-urlencoded','Referer':'//home.mi.com/crowdfundinglist?id=78&title=%E4%BC%97%E7%AD%B9','User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}

}

@every(minutes=24 * 60)

def on_start(self):

#获取所有的产品详细产品地址

param = 'data=%7B%22HomeList%22%3A%7B%22model%22%3A%22Homepage%22%2C%22action%22%3A%22BuildHome%22%2C%22parameters%22%3A%7B%22id%22%3A12%7D%7D%7D'

self.crawl('https://home.mi.com/app/shopv3/pipe',method="GET",params=param,callback=self.index_page)

@config(age=60 * 60)

def index_page(self, response):

#获取单个产品的详细信息

for each in response.json['result']['HomeList']['data']:

gid = each['gid']

detailparm = "{\"detail\":{\"model\":\"Shopv2\",\"action\":\"getDetail\",\"parameters\":{\"gid\":\"%s\"}},\"comment\":{\"model\":\"Comment\",\"action\":\"getList\",\"parameters\":{\"goods_id\":\"%s\",\"orderby\":\"1\",\"pageindex\":\"0\",\"pagesize\":3}},\"activity\":{\"model\":\"Activity\",\"action\":\"getAct\",\"parameters\":{\"gid\":\"%s\"}}}" % (gid,gid,gid)

detailreq = urllib.quote(detailparm)

detailreq = "data=" + detailreq

detailurl = "https://home.mi.com/app/shop/pipe?gid=%s" % gid

#print detailurl

self.crawl(detailurl,method='POST',data=detailreq ,callback=self.detail_page)

@config(priority=2)

def detail_page(self, response):

#转换成Json格式的字符串

resultjsonstr = json.dumps(response.json)

result = json.loads(resultjsonstr)['result']['detail']['data']['good']

#将返回的结果保存到文件

resultfile = open("/tmp/xiaomi/%s.txt" % result['gid'].encode('utf-8'),'w')

resultfile.write(resultjsonstr)

resultfile.close()

#将返回的结果保存到MySQL数据库

return {

"original_id": result['gid'].encode('utf-8'),

"project_name": result['name'].encode('utf-8'),

"project_desc": result['summary'].encode('utf-8'),

"curr_money":result['saled'].encode('utf-8'),

"begin_date":time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(float(result['ctime'].encode('utf-8'))))

}

def on_result(self,result):

if not result or not result['original_id']:

return

sql = SQL()

sql.insert('t_dream_xm_project',**result)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值