Python

最新推荐文章于 2017-05-12 11:09:10 发布

AC_Dreameng

最新推荐文章于 2017-05-12 11:09:10 发布

阅读量1k

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/hurmishine/article/details/52261261

版权

Python 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

废话不多说，上代码：

FL1

#coding=utf-8
import webbrowser
import time
import urllib2
import re
import os
import thread


tabcount=1

def BlogFun(n,url,MaxVisitor):
    visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
    global tabcount
    while True:
        if tabcount >10:
            os.system('taskkill /F /IM chrome.exe')
            tabcount = 1
        else:
            tabcount = tabcount + 1
        webbrowser.open(url,new=1)
        request=urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
        opener = urllib2.build_opener()
        fblog = opener.open(request)
        htm = fblog.read()
        Ref=re.findall(visitcount,htm);
        print url+": "+str(int(Ref[0]))+"人阅读"
        if int(Ref[0])>MaxVisitor:
            break
        time.sleep(n)


if __name__=="__main__":


    Domain="http://blog.csdn.net"
    Blog_url = "http://blog.csdn.net/hurmishine/article/details/52128090"
    FreshSecond = 3
    MaxVisitor = 1000
    BlogFun(FreshSecond,Blog_url,MaxVisitor)

FL2:

#coding=utf-8
import webbrowser
import time
import urllib2
import re
import os
import thread
import threading
mylock = threading.RLock()

tabcount=1

def BlogFun(n,url,MaxVisitor,threadnumber):
    visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
    global tabcount
    while True:
        mylock.acquire()
        if tabcount >10:
            os.system('taskkill /F /IM chrome.exe')
            tabcount = 1
        else:
            tabcount = tabcount + 1
        mylock.release()
        webbrowser.open(url,new=1)
        request=urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
        opener = urllib2.build_opener()
        fblog = opener.open(request)
        htm = fblog.read()
        Ref=re.findall(visitcount,htm)
        time.sleep(n)
        if int(Ref[0])>MaxVisitor:
            break



if __name__=="__main__":

    main_url = "http://blog.csdn.net/hurmishine/article/details/52128090"

    threadSum= 5
    MaxVisitor = 1050
    timedelay=3
    print main_url+" 开启模式... "+"\n"
    for threadnumber in range(threadSum):
        thread.start_new_thread(BlogFun,(timedelay,main_url,MaxVisitor,threadnumber,))
        threadnumber=threadnumber+1

    print "Main Thread Over.............."

FL3:

#coding=utf-8
import webbrowser
import time
import urllib2
import re
import os
import thread
import threading
mylock = threading.RLock()

tabcount=1

def BlogFun(n,url,MaxVisitor):
    visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
    global tabcount
    while True:
        mylock.acquire()
        if tabcount >10:
            os.system('taskkill /F /IM chrome.exe')
            tabcount = 1
        else:
            tabcount = tabcount + 1
        mylock.release()
        webbrowser.open(url,new=1)
        request=urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
        opener = urllib2.build_opener()
        fblog = opener.open(request)
        htm = fblog.read()
        Ref=re.findall(visitcount,htm);
        #print url+": "+str(int(Ref[0]))+"人阅读"
        if int(Ref[0])>MaxVisitor:
            break
        time.sleep(n)


if __name__=="__main__":


    Domain="http://blog.csdn.net"
    main_url = "http://blog.csdn.net/hurmishine"
    patt_article = r'<span class="link_title"><a href="(.+)">'

    Mainrequest=urllib2.Request(main_url)
    Mainrequest.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
    opener = urllib2.build_opener()
    fMainblog = opener.open(Mainrequest)
    Mainhtml= fMainblog.read()
    article_urls = re.findall(patt_article,Mainhtml)
    threadnumber = 1
    MaxVisitor = 300
    timedelay=3
    for item in article_urls:
        Realurl =  Domain+item
        thread.start_new_thread(BlogFun,(timedelay,Realurl,MaxVisitor,))
        threadnumber=threadnumber+1

FL4

#coding:utf-8

import requests
import urllib2
import re
import time

def UpFun(Article_Id):
    url = 'http://blog.csdn.net/hurmishine/article/digg?ArticleId=%s' % str(Article_Id)
    ReferUrl = 'http://blog.csdn.net/hurmishine/article/details/%s' % str(Article_Id)
    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': """bdshare_firstime=1432804476444; uuid_tt_dd=5874665987725545185_20150528; __gads=ID=6dbe976f1091e0da:T=1432804509:S=ALNI_Ma63fnYTAMrtpIQLGTDWFf-V6ZC3w; __qca=P0-1924203405-1432804512723; CloudGuest=AKLRitQ5PsB15aH5SW7bSBsYURfFqOgge6ORO2QV4EUqDw+gnKXXMYKF78PeMcFwrqF02vQYkMcNxxWDCt6PMn7itnc2JpOZ4vtQmrTAXkZVoZ6odI9hV3SKm26L7oF6ABT7F5Y8sFlqWHpV8Nwmc9Om52vSXdiRWMWM+SmSF7cM/3eqFJWcBzVSQBY4AsCH; UserName=hurmishine; UserInfo=ILoGS%2FFKM8uT98%2F4tRSWZceYS3U6x7sg81CKDaKPgULoWdBwIN0RDSG7kKJ9%2BjvZo8PHr6Q6Vf%2BkmEcn9fk64XshqHUskkKdk%2BLIJ2wHGfF2mfOz%2FzhwkPxW3ny359eJg3MWVn4GGworZ8KOM7LAXw%3D%3D; UserNick=poetliu; AU=20B; UN=hurmishine; UE="1507026255@qq.com"; access-token=c1575c35-1129-414d-a864-6899eb18b274; _JQCMT_ifcookie=1; _JQCMT_browser=20b1c0690840df900086ad8af0cec07b; __message_district_code=510000; lzstat_uv=37708960411757802909|2671462@3016791@2955225@3587820@854@3595736@2675686@2819552@2939462@2942182@3496353@3560230@3429585@3525517; FullCookie=1; uuid=9ac219b6-c952-4127-bab4-1472ceca5c52; route=; __utmt=1; avh=46652285%2c46610115%2c41985309; __utma=17226283.539248632.1435383498.1435481685.1435484556.12; __utmb=17226283.6.10.1435484556; __utmc=17226283; __utmz=17226283.1435481685.11.6.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; dc_tos=nqnf3t; __message_sys_msg_id=0; __message_gu_msg_id=0; __message_cnel_msg_id=0; __message_in_school=0; dc_session_id=1435484556110""",
        'Host': 'blog.csdn.net',
        'Referer': ReferUrl,
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.65 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest'
       }
    r = requests.get(url = url,headers = headers)
    print "ID为:"+str(Article_Id)+"...已操作!\n"
    time.sleep(2)


def PagePuFun(BlogPageUrl):
    request=urllib2.Request(BlogPageUrl)
    request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
    opener = urllib2.build_opener()
    fblog = opener.open(request)
    htm = fblog.read()
    Patt=r'<span class="link_title"><a href="/hurmishine/article/details/(\d+)">'
    ArticleNums = re.findall(Patt,htm)
    for ArticleNum in ArticleNums:
        UpFun(ArticleNum)
    

if __name__=="__main__":
    for i in range(3,11):
        BlogPageUrl="http://blog.csdn.net/hurmishine/article/list/%s" % str(i+1);
        print "开始第:"+str(i+1)+"页\n"
        PagePuFun(BlogPageUrl)
        print "第:"+str(i+1)+"页结束\n"
    print "完!!!\n"

FL

#coding=utf-8
import time,urllib2,re,os,thread,time
cnt = 0
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36'}
req = urllib2.Request('http://blog.csdn.net/hurmishine?viewmode=contents', headers=headers)
response = urllib2.urlopen(req)
con = response.read()

def BlogFun(url):
    visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
    tabcount = 0
    cnt=0
    print url
    while cnt<=10:
        cnt = cnt + 1
        request=urllib2.Request(url)
        request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
        opener = urllib2.build_opener()
        fblog = opener.open(request)
        htm = fblog.read()
        Ref=re.findall(visitcount,htm);
        print Ref[0],
        time.sleep(1)#n
    print "\n********************************************************"

start1 = con.find(r' <span class="link_title"><a href="/hurmishine/article/details/')
url = "http://blog.csdn.net"+con[start1+35:start1+71]
BlogFun(url)
start2 = con.find(r' <span class="link_title"><a href="/hurmishine/article/details/',start1+100)
url = "http://blog.csdn.net"+con[start2+35:start2+71]
BlogFun(url)
i = 4
while i<=50:
	start1 = con.find(r' <span class="link_title"><a href="/hurmishine/article/details/',start2+100)
	url = "http://blog.csdn.net"+con[start1+35:start1+71]
	BlogFun(url)
	start2 = con.find(r' <span class="link_title"><a href="/hurmishine/article/details/',start1+100)
	url = "http://blog.csdn.net"+con[start2+35:start2+71]
	BlogFun(url)
	i= i+2

AC_Dreameng

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Python

废话不多说，上代码：FL1#coding=utf-8import webbrowserimport timeimport urllib2import reimport osimport threadtabcount=1def BlogFun(n,url,MaxVisitor): visitcount = r'(\d+)人阅读' global t
复制链接

扫一扫