一.selenium爬取好友QQ空间日志说说

#coding:utf-8
from selenium import webdriver
import time,os
import xlrd,xlwt
from xlutils.copy import copy
#使用selenium
#使用selenium的隐藏PhantimJS浏览器登陆账号后对内容获取
#注意frame与iframe的格式框切换
#driver = webdriver.PhantomJS(executable_path="E:\\mac\\id\\phantomjs-2.1.1-windows\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe")
driver=webdriver.Chrome()
#driver.set_preference('network.proxy.type', 1)
#driver.set_preference('network.proxy.http', '127.0.0.1')
#driver.set_preference('network.proxy.http_port', 17890)
driver.maximize_window()

def get_shuoshuo(qq,path):
    testexist(path)
    try:
        driver.set_page_load_timeout(10)
        driver.get('http://user.qzone.qq.com/{}/311'.format(qq))
        time.sleep(3)
    except:
        print u'网页启动异常,请重新打开'
        time.sleep(2)
        driver.quit()
    try:
        driver.find_element_by_id('login_div')
    except:
        print u"非好友无法进入空间无权限抓取内容"
        driver.quit()
    else:
        #登录QQ空间
        driver.switch_to.frame('login_frame')
        driver.find_element_by_id('switcher_plogin').click()
        driver.find_element_by_id('u').clear()#选择用户名框
        driver.find_element_by_id('u').send_keys('#####') #输入个人登录账号
        driver.find_element_by_id('p').clear()
        driver.find_element_by_id('p').send_keys('#####') #输入个人登录密码
        driver.find_element_by_id('login_button').click()
        time.sleep(3)
    driver.implicitly_wait(3)
    try:
        driver.find_element_by_id('QM_OwnerInfo_Icon')
    except:
        print u'空间加载异常,请重新打开'
        time.sleep(2)
        driver.quit()
    else:
        driver.switch_to.frame('app_canvas_frame')
    #    last_page=driver.find_element_by_css_selector('.mod_pagenav')
    #    page_num=re.findall('\d+',last_page.text)[-1]
        next_page='page'
        page=1
        try:
            while next_page:
                content = driver.find_elements_by_css_selector('.content')
                stime = driver.find_elements_by_css_selector('.c_tx.c_tx3.goDetail')
                for con,sti in zip(content,stime):
                    data = {
                        'time':sti.text,
                        'shuos':con.text
                    }
                    write_data(data['time'],data['shuos'],path)
                next_page=driver.find_element_by_link_text(u'下一页')
                page=page+1
                print u'正在抓取第%d页面内容······'%page
                next_page.click()

                time.sleep(3)
                driver.implicitly_wait(3)
            driver.quit()
        except:
            print u'抓取到%d页面结束'%page
            driver.quit()

def  testexist(path):
    if not os.path.exists(path):
        w= xlwt.Workbook()
        w.add_sheet('Sheet1')
        w.save(path)
    else:
        os.remove(path)
        w= xlwt.Workbook()
        w.add_sheet('Sheet1')
        w.save(path)

def write_data(data1,data2,path):
    f=xlrd.open_workbook(path)
    sheet=f.sheet_by_name('Sheet1')
    src=copy(f)
    row=sheet.nrows
    src.get_sheet(0).write(row,0,data1)
    src.get_sheet(0).write(row,1,data2)
    src.save(path)

if __name__ == '__main__':
   # work_path=raw_input(u'请输入存储数据路径--excle表格类型')2571278041
    work_path='E:\\0930\\WWWW.csv'
    get_shuoshuo('######',work_path)#输入好友QQ号
  • 4
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值