Python爬虫实现半自动发微博

最新推荐文章于 2024-09-13 18:11:35 发布

weixin_34099526

最新推荐文章于 2024-09-13 18:11:35 发布

阅读量300

点赞数

文章标签： python 爬虫操作系统

原文链接：https://yq.aliyun.com/articles/503317

版权

Python实现半自动发微博【用COOKIES代替模拟登录；用WAP版微博代替网页版微博；每次还要改post数据的URL。。。半半自动的感觉】

微博的内容是从糗事百科抓取的最热段子以及某网站的每日晚安。

 
           #!/usr/bin/python 
          
           #coding=utf-8 
          
           import  
           urllib 
          
           import  
           urllib2 
          
           import  
           time 
          
           import  
           requests 
          
           import  
           re 
          
           from  
           bs4  
           import  
           BeautifulSoup 
          
           import  
           sys 
          
           import  
           os 
          
           import  
           string 
          
           import  
           info.qiubai 
          
           import  
           info.wanan 
          
           ''' 
          
           编码方式的设置,在中文使用时用到中文时的处理方式 
          
           ''' 
          
           default_encoding  
           =  
           "utf-8" 
          
           if  
           sys.getdefaultencoding() ! 
           =  
           default_encoding: 
          
           reload 
           (sys) 
          
           sys.setdefaultencoding( 
           "utf-8" 
           ) 
          
           headers  
           =  
           { 
          
           'User-Agent' 
           :  
           'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36' 
           , 
          
           'Cookie' 
           : 
           "你自己的COOKIES。。。通过浏览器自己找吧" 
           , 
          
           'Accept' 
           : 
           "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" 
           , 
          
           'Referer' 
           : 
           "http://weibo.cn/" 
          
           } 
          
           def  
           getHtml(url): 
          
           response 
           = 
           urllib2.urlopen(url) 
          
           html 
           = 
           response.read() 
          
           return  
           html 
          
           qiubaiurl 
           = 
           'http://www.qiushibaike.com/text/' 
          
           request 
           = 
           urllib2.Request(qiubaiurl,headers 
           = 
           headers) 
          
           html 
           = 
           getHtml(request) 
          
           #可以 
          
           qiubaicontent 
           = 
           info.qiubai.getQiubai(html) 
          
           print  
           qiubaicontent 
          
           wananurl 
           = 
           'http://www.gxdxw.cn/wananxinyu/' 
          
           request 
           = 
           urllib2.Request(wananurl,headers 
           = 
           headers) 
          
           #可以 
          
           wanancontent 
           = 
           info.wanan.getWanan(getHtml(request)) 
          
           print  
           wanancontent 
          
           post_url 
           = 
           'http://weibo.cn/mblog/sendmblog?st=ef5d80' 
          
           currentime 
           = 
           time.localtime(time.time()).tm_hour 
          
           if  
           (currentime> 
           21 
           ) 
           and 
           (currentime< 
           24 
           ): 
          
           content 
           = 
           wanancontent 
          
           else 
           : 
          
           content 
           = 
           qiubaicontent 
          
           post_data 
           = 
           { 
           'rl' 
           : 
           '1' 
           , 
           'content' 
           :content} 
          
           r 
           = 
           requests.post(post_url,post_data,headers 
           = 
           headers) 
          
           print  
           r 
          
           if 
           (r.status_code 
           = 
           = 
           200 
           ): 
           #....不对 
          
           print  
           "发送微博成功" 
          
           else 
           : 
          
           print  
           "微博发送失败,请检查cookies是否过期"

PS:1、利用Python自带的时间函数控制哪个时间段发那些内容。。。

2、但是好多时候发送数据都不成功。。。我也不知道怎么知道我的微博发送成功了，只能一次一次试。。。有点尴尬。。知道的大神指点一下！！！

3、还有那个微博POST数据的参数，一天换一次的感觉

4、最后我把抓取糗百和晚安的程序分开了。在新建了一个info文件夹-------搞成了模块。。。貌似是这样

最后再附上糗百和晚安的程序吧。。。。。。。我也不知道格式对不对。。反正最后运行了

 
           糗百 
          
           #coding=utf-8 
          
           import  
           sys 
          
           from  
           bs4  
           import  
           BeautifulSoup 
          
           ''' 
          
           编码方式的设置,在中文使用时用到中文时的处理方式 
          
           ''' 
          
           default_encoding  
           =  
           "utf-8" 
          
           if  
           sys.getdefaultencoding() ! 
           =  
           default_encoding: 
          
           reload 
           (sys) 
          
           sys.setdefaultencoding( 
           "utf-8" 
           ) 
          
           def  
           getQiubai(html): 
          
           soup 
           = 
           BeautifulSoup(html, 
           'html.parser' 
           ) 
          
           div 
           = 
           soup.find_all( 
           'div' 
           ,attrs 
           = 
           { 
           'class' 
           : 
           'content' 
           }) 
          
           content 
           = 
           "#每日糗事#" 
           + 
           div[ 
           0 
           ].text.lstrip() 
          
           return  
           content

 
           晚安 
          
           #coding=utf-8 
          
           import  
           sys 
          
           from  
           bs4  
           import  
           BeautifulSoup 
          
           ''' 
          
           编码方式的设置,在中文使用时用到中文时的处理方式 
          
           ''' 
          
           default_encoding  
           =  
           "utf-8" 
          
           if  
           sys.getdefaultencoding() ! 
           =  
           default_encoding: 
          
           reload 
           (sys) 
          
           sys.setdefaultencoding( 
           "utf-8" 
           ) 
          
           def  
           getWanan(html): 
          
           soup 
           = 
           BeautifulSoup(html, 
           'html.parser' 
           ) 
          
           div 
           = 
           soup.find_all( 
           'div' 
           ,attrs 
           = 
           { 
           'class' 
           : 
           'listbox' 
           })[ 
           0 
           ] 
          
           a 
           = 
           div.find_all( 
           'a' 
           )[ 
           1 
           ] 
          
           content 
           = 
           a.text.lstrip() 
          
           # print content 
          
           return  
           content