【python 爬虫】linux 下 selenium+phantomjs 自动模拟登陆

#!/usr/bin/python
#-*-coding:utf-8-*-
import urllib
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import urllib2
import json
import pandas as pd
import time
time1 = time.time()



# yesterday="2014-09-25"
import datetime
today=datetime.date.today()
yesterday = today - datetime.timedelta(days=1)

##############自动模拟登陆###################
from selenium import webdriver
url="https://account.xiaomi.com/pass/serviceLogin?"
# 使用webdriver.PhantomJS
driver=webdriver.PhantomJS(executable_path='/home/hadoop/soft/phantomjs-1.9.7-linux-x86_64/bin/phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=tlsv1'])
driver.get(url)
print driver.current_url
username = driver.find_element_by_id('username')
password = driver.find_element_by_id('pwd')
sbtn = driver.find_element_by_id('login-button')
username.send_keys('XXX') #send username
password.send_keys('XXX') #send password
sbtn.submit() #提交表单
time.sleep(4) #等待页面加载
print driver.current_url
page = driver.page_source


try:

    new_url = "http://e.mi.com/effect/dataV2?begin=" + str(yesterday) + "&end=" + str(today) + "&subAccountType=2&subAccountId=2425&userId=576115055"
    driver.get(new_url)
    page2 = driver.page_source
    ###########用正则去匹配新数据
    html = re.findall('pre-wrap;">(.*?)</pre>', page2, re.S)
    html = html[0]
    json_data = json.loads(html)
    result = json_data["result"]
    data1 = result["data"]
    data2 = data1["data"]
    gmt_created1 = []
    exposeNum1 = []
    startDownloadNum1 = []
    cost1 = []
    costPerDownload1 = []
    for each in data2:
        gmt_created = each["recordDate"]
        gmt_created1.append(gmt_created)
        exposeNum = each["exposeNum"]
        exposeNum1.append(exposeNum)
        startDownloadNum = each["startDownloadNum"]
        startDownloadNum1.append(startDownloadNum)
        cost = round(each["cost"], 2)
        cost1.append(cost)
        costPerDownload = round(each["costPerDownload"], 2)
        costPerDownload1.append(costPerDownload)


except Exception, ex:
    print Exception, ":", ex


data=pd.DataFrame({"日期":gmt_created1,"下载量":startDownloadNum1,"推广费":cost1,"平台名称":"小米营销"})
Page=len(data)
print (data)


####写入mysql 数据库
##导入数据库模块
import MySQLdb as mdb
try:
    con = mdb.connect(host="172.16.34.48", user="spider", passwd="spider",db="spider",port=3306)
    with con:
        cursor = con.cursor()
        # 创建插入SQL语句
        query = """INSERT INTO  marketing_data(date,download_num,promotion_money,platform) VALUES (%s, %s, %s, %s)"""
        cursor.execute("DELETE from marketing_data where platform='xiaomi' and date='%s'" %yesterday)
        for i in range(0,1):
            values = (data.iloc[i, 3],data.iloc[i, 0],data.iloc[i, 2],"xiaomi")
            # 执行sql语句
            cursor.execute(query, values)

except mdb.Error, e:
    print "Error %d: %s" % (e.args[0], e.args[1])
    sys.exit(1)

finally:
    if con:
        con.close()

time2 = time.time()
print u'OK!,爬虫结束!'
print ('爬虫总共耗时:' + str(time2 - time1) + 's')
print time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东华果汁哥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值