python抓取搜索微信

#coding:utf-8
import urllib
import re
from urllib import quote
import HTMLParser
import time

def decodeHtml(inhtml):
    h = HTMLParser.HTMLParser()
    s = h.unescape(inhtml)
    return s

def strip_tags(html):
    html = html.strip()    
    html = html.strip("\n")    
    result = []    
    parse = HTMLParser.HTMLParser()   
    parse.handle_data = result.append    
    parse.feed(html)    
    parse.close()    
    return "".join(result)

def saveImage(count,url):
    f = open('Images/%d.jpg'%count,'wb')
    f.write(urllib.urlopen(url).read())
    f.close()

def timeSwap(timeStamp):
    timeStamp = int(timeStamp)        
    timeArray = time.localtime(timeStamp)         
    otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)  
    return otherStyleTime 

def getWeixinInfo(keyword):
    print quote(keyword)
    url = "http://weixin.sogou.com/weixin?type=2&query="+quote(keyword)+"&ie=utf8&_ast=1404888960&_asf=null&w=01029901&cid=null"
    webcontent = urllib.urlopen(url).read()
    title = re.findall('id="\w+_title_\d+">(.*?)</a>',webcontent)
    summary = re.findall('id="\w+_summary_\d+">(.*?)</p>',webcontent)
    timeStamp = re.findall("vrTimeHandle552write\('(.*?)'\)",webcontent)
    imagesInfo = re.findall('<div class="img_box2">[\w\W]+?</div>',webcontent)
    link,imageSrc = [],[]
    for i in range(len(imagesInfo)):
        imgHtml = imagesInfo[i]
        link += re.findall('href="(.*?)"',imgHtml)
        imageSrc += re.findall('src="(.*?)"',imgHtml)
    
    for i in range(len(imageSrc)):
        print "save the %s image"%str(i+1)
        print decodeHtml(link[i])
        print strip_tags(title[i])
        print decodeHtml(imageSrc[i])
        print strip_tags(summary[i])
        print timeSwap(timeStamp[i])
        saveImage(i+1,decodeHtml(imageSrc[i]))
    
getWeixinInfo("世界杯")

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值