线程池

异步爬虫

  • 基于线程池
  • 基于单线程+多任务的异步爬虫
Flask的基本使用
  • 环境安装:pip install flask

  • 创建一个py源文件

    from flask import Flask, render_template
    from time import sleep
    
    # 实例化一个app
    app = Flask(__name__)
    
    
    # 创建视图函数&路由地址
    @app.route('/Hz')
    def index_1():
        sleep(2)
        return render_template('test.html')
    
    
    @app.route('/jay')
    def index_2():
        sleep(2)
        return render_template('test.html')
    
    
    @app.route('/tom')
    def index_3():
        sleep(2)
        return render_template('test.html')
    
    
    if __name__ == "__main__":
        # debug=True开启调试模式:服务器代码被修改后按下保存键会自动重启
        app.run(debug=True)
    
    
  • test.html




<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">



<html xmlns="http://www.w3.org/1999/xhtml" id="html">

<head><meta http-equiv="Cache-Control" content="no-siteapp" /><meta http-equiv="Cache-Control" content="no-transform " /><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title>

	我的收藏_古诗文网

</title>

<script src="/js/jquery-3.2.1.min.js" type="text/javascript"></script>

<style type="text/css">

.mainshoucang{width:705px; height:34px; line-height:34px; border-bottom:1px solid #d7d5bc;overflow:hidden; float:left; clear:left;}

.mainshoucang span{float:right; margin-top:15px; margin-left:5px; color:#65645F;}

.mainshoucang a{float:right; margin-top:15px; margin-left:5px;}

</style>

<script type="text/javascript">

    if ((navigator.userAgent.match(/(phone|pad|pod|iPhone|iPod|ios|iPad|Android|Mobile|BlackBerry|IEMobile|MQQBrowser|JUC|Fennec|wOSBrowser|BrowserNG|WebOS|Symbian|Windows Phone)/i))) {

        window.location.href = "https://m.gushiwen.cn/user/collect.aspx?type=s&p=1&id=2070621&sort=t";

    } else {



    }

</script>

<link href="/css/skinSo20210826.css" rel="stylesheet" type="text/css" />

    <script src="/js/jquery-3.2.1.min.js" type="text/javascript"></script>

<script type="text/javascript">

    //取得cookie值

    function getCookie(name) {

        var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)");



        if (arr = document.cookie.match(reg))



            return unescape(arr[2]);

        else

            return null;

    }



    //判断收藏

    function selectLike(id) {

        document.getElementById('likeImg' + id).name = parseInt(document.getElementById('likeImg' + id).name) + 1;

        if (document.getElementById('likeImg' + id).name == '1') {

            var idsShigeLaiyo = getCookie('idsShiwen2017');

            if (idsShigeLaiyo != null && idsShigeLaiyo != '') {

                var ids = idsShigeLaiyo.split(',');

                for (var i = 0; i < ids.length; i++) {

                    if (ids[i] == id) {

                        document.getElementById('likeImg' + id).src = 'https://song.gushiwen.cn/siteimg/shou-cangok.png';

                        document.getElementById('likeImg' + id).alt = '已收藏';

                        break;

                    }

                }

            }

        }

    }



    //判断收藏名句

    function selectLikeMingju(id) {

        document.getElementById('likeImg' + id).name = parseInt(document.getElementById('likeImg' + id).name) + 1;

        if (document.getElementById('likeImg' + id).name == '1') {

            var idsShigeLaiyo = getCookie('idsMingju2017');

            if (idsShigeLaiyo != null && idsShigeLaiyo != '') {

                var ids = idsShigeLaiyo.split(',');

                for (var i = 0; i < ids.length; i++) {

                    if (ids[i] == id) {

                        document.getElementById('likeImg' + id).src = 'https://song.gushiwen.cn/siteimg/shou-cangok.png';

                        document.getElementById('likeImg' + id).alt = '已收藏';

                        break;

                    }

                }

            }

        }

    }



    //判断收藏作者

    function selectLikeAuthor(id) {

        document.getElementById('likeImg' + id).name = parseInt(document.getElementById('likeImg' + id).name) + 1;

        if (document.getElementById('likeImg' + id).name == '1') {

            var idsShigeLaiyo = getCookie('idsAuthor2017');

            if (idsShigeLaiyo != null && idsShigeLaiyo != '') {

                var ids = idsShigeLaiyo.split(',');

                for (var i = 0; i < ids.length; i++) {

                    if (ids[i] == id) {

                        document.getElementById('likeImg' + id).src = 'https://song.gushiwen.cn/siteimg/shou-cangok.png';

                        document.getElementById('likeImg' + id).alt = '已收藏';

                        break;

                    }

                }

            }

        }

    }



    //判断收藏古籍

    function selectLikeGuwen(id) {

        document.getElementById('likeImg' + id).name = parseInt(document.getElementById('likeImg' + id).name) + 1;

        if (document.getElementById('likeImg' + id).name == '1') {

            var idsShigeLaiyo = getCookie('idsGuji2017');

            if (idsShigeLaiyo != null && idsShigeLaiyo != '') {

                var ids = idsShigeLaiyo.split(',');

                for (var i = 0; i < ids.length; i++) {

                    if (ids[i] == id) {

                        document.getElementById('likeImg' + id).src = 'https://song.gushiwen.cn/siteimg/shou-cangok.png';

                        document.getElementById('likeImg' + id).alt = '已收藏';

                        break;

                    }

                }

            }

        }

    }

    </script>



    <script>

        var _hmt = _hmt || [];

        (function () {

            var hm = document.createElement("script");

            hm.src = "//hm.baidu.com/hm.js?9007fab6814e892d3020a64454da5a55";

            var s = document.getElementsByTagName("script")[0];

            s.parentNode.insertBefore(hm, s);

        })();

</script>



</head>

<body onclick="closeshowBos()">

<div class="main1">

    <div class="cont">

        <div class="left">

            <a href="https://www.gushiwen.cn/">古诗文网</a>

        </div>

        <div class="right">

            <div class="son1">

                <a style="margin-left:1px;" href="https://www.gushiwen.cn/">推荐</a>

                   

                  <a href="/shiwens/">诗文</a>

                   

                  <a href="/mingjus/">名句</a>

                   

                  <a href="/authors/">作者</a>

                   

                  <a href="/guwen/">古籍</a>

                  

                  <a href="/user/collect.aspx" rel="nofollow" style="background-color:#757863;border-bottom:3px solid #F0EFE2;line-height:43px; height:43px;">我的</a>

                  

                <a style="width:65px;" href="/app/DefaultGwd.aspx" target="_blank">手机版</a>

            </div>

            <div class="son2">

                <div class="search">

                <form action="/search.aspx" onsubmit="return selectSearch()" contentType="text/html; charset=utf-8">

                    <input onkeydown="noajaxkeyUp()" onfocus="setInterval('showBos()',1000)" id="txtKey" name="value" type="text" value="" maxlength="40" autocomplete="off" style="height:25px; line-height:25px; float:left; padding-left:5px; width:260px; font-size:14px; clear:left; border:0px;" />

                    <input type="submit" style="float:right; width:24px; height:24px; clear:right; margin-top:2px; margin-right:3px; background-image:url(https://song.gushiwen.cn/siteimg/docSearch.png); background-repeat:no-repeat; background-size:24px 24px; border:0px;cursor:pointer;" value="" />

                    <input id="b" style="display:none;" type="text" />

                </form>

                </div>

                <div id="box"></div>

            </div>

        </div>

    </div>

</div>

        



<div style="width:1000px; clear:both; margin-left:auto; margin-right:auto; margin-top:20px; overflow:hidden;">

<div class="mainshoucang">

<span style=" height:34px; line-height:34px;font-size:18px; font-weight:bold; float:left; margin:0px; color:#0F0F0F;">我的收藏</span>

<span style=" height:34px; line-height:34px;font-size:18px; font-weight:bold; float:left; margin:0px; color:#0F0F0F; margin-left:10px; margin-right:10px;">/</span>

<a style=" height:34px; line-height:34px;font-size:18px; font-weight:bold; float:left; margin:0px;" href="/user/collectbei.aspx?sort=t">我的背诵</a>

 

    <a href="/user/collect.aspx?type=s&id=2070621&sort=z">字母排序</a>

    <span>/</span>

    <span>时间排序</span>

  

</div>

<div style="width:265px;height:34px; line-height:34px; border-bottom:1px solid #C5C5C5;overflow:hidden; clear:right; float:right;font-size:14px;">

账号管理

</div>

</div>

<div id="mainSearch" class="mainSearch">

<div class="searchleft">

    

        <a style="color:#FFFFFF;background-image:url(https://song.gushiwen.cn/siteimg/seachimg.jpg); background-repeat:no-repeat;">诗文</a>

        <a href="/user/collect.aspx?type=m&id=2070621&sort=t">名句</a>

        <a href="/user/collect.aspx?type=a&id=2070621&sort=t">作者</a>

        <a href="/user/collect.aspx?type=d&id=2070621&sort=t">古籍</a>

       



    </div>

<div class="left">

     

            

                <img src="shoucangdemo.jpg" style=" margin-top:20px;" alt="收藏教程" width="620" height="556" />

            

         



        <form id="FromPage" method="get" action="/user/collect.aspx" onsubmit="return PageSubmit()">

            <div class="pagesright">

             

              <a class="amore" style=" color:#808080;background-color:#e7e6d8;width:380px;">下一页</a>

               

                                  <a style=" color:#808080;background-color:#e7e6d8;">上一页</a>

                                  

                      <span style=" background-color:#E1E0C7; border:0px; margin-top:22px; width:auto;">/ 1页</span>

                      <span class="curent"><input id="putpage" name="p" value="1" autocomplete="off" onblur="SubPage()" /></span>

                      <label id="temppage" style="display:none;">1</label>

                      <label id="sumPage" style="display:none;">1</label>

                      <input type="hidden" name="sort" value="t" />

                      <input type="hidden" name="id" value="2070621" />

         </div>

         </form>



</div>



<div class="right">

<div class="shisoncont">



  <div class="line"><a href="/user/modifypwd.aspx?from=http://so.gushiwen.cn/user/collect.aspx">设新密码</a><span>未设置</span></div>

 

    <div class="line"><a href="/user/bandemail.aspx?from=http://so.gushiwen.cn/user/collect.aspx">绑定邮箱</a><span>*****g@protonmail.com</span></div>

     

<div class="line"><a href="/user/bandphone.aspx?from=http://so.gushiwen.cn/user/collect.aspx">绑定手机号</a><span>未绑定</span></div>



    <div class="line"><a id="bwxhao" style="cursor:pointer;">绑定公众号</a><span id="bwxbool"></span></div>

<div class="line"><a href="/user/loginlose.aspx?from=http://so.gushiwen.cn/user/collect.aspx">退出登录</a></div>

<div class="line" style=" border-bottom:0px;"><a href="/user/userDel.aspx">删除账号</a></div>

</div>





<div id="threeWeixin" style="display:none;">

    <div class="hide-center" id="hide-center">

        <div id="formhead">

            <div id="formhead-title">

                绑定公众号(可扫码登录)

            </div>

            <button type="button" id="close" style="color:#bcba9e;">X</button>

        </div>

        <div id="formbody">

            <img id="erweimaCanshu" width="210" height="210" src="" alt="" />

        </div>

    </div>



</div>





<script type="text/javascript">

    //取得cookie值

    function getCookie(name) {

        var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)");



        if (arr = document.cookie.match(reg))



            return unescape(arr[2]);

        else

            return null;

    }



    $("#close").click(function () {

        $("#threeWeixin").fadeOut("slow");

        clearInterval(intervalErweima);

    })

    var timesRun = 0;

    var scene_id = Math.floor((Math.random() * 9999999) + 100000000);



    //判断是否为登录用户但未关注公众号

    var wxopenid = getCookie('wxopenid');

    if (wxopenid = null) {

        document.getElementById('bwxbool').innerHTML = '未绑定'

    }

    else {

        document.getElementById('bwxbool').innerHTML = '已绑定'

    }

    var threeWeixinID = document.getElementById('threeWeixin');

    var erweimaShow = 0;



    $("#bwxhao").click(function () {

        //仅改变定位

        if (threeWeixinID.style.display != 'none') {

            document.getElementById('hide-center').style.top = $(window).scrollTop() * 2 + "px";

            return;

        }

        setTimeout(showErweima, 1000);

    })



    function showErweima() {

        //获取二维码

        var xmlhttp;

        if (window.XMLHttpRequest) {// code for IE7+, Firefox, Chrome, Opera, Safari

            xmlhttp = new XMLHttpRequest();

        }

        else {// code for IE6, IE5

            xmlhttp = new ActiveXObject("Microsoft.XMLHTTP");

        }

        xmlhttp.onreadystatechange = function () {

            if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {

                document.getElementById('erweimaCanshu').src = "https://mp.weixin.qq.com/cgi-bin/showqrcode?ticket=" + xmlhttp.responseText;

            }

        }

        xmlhttp.open("GET", "/getTicket.aspx?scene_id=" + scene_id, false);

        xmlhttp.send();



        document.getElementById('hide-center').style.top = $(window).scrollTop() * 2 + "px";

        $("#threeWeixin").fadeIn("slow");

        timesRun = 0;



        //判断码是否被扫

        var intervalErweima = setInterval("selectErweima()", "2000");

    }



    function selectErweima() {

        //60秒后停止

        timesRun = timesRun + 1;

        if (timesRun > 60) {

            $("#threeWeixin").fadeOut("slow");



            clearInterval(intervalErweima);

        }



        var xmlhttp;

        if (window.XMLHttpRequest) {// code for IE7+, Firefox, Chrome, Opera, Safari

            xmlhttp = new XMLHttpRequest();

        }

        else {// code for IE6, IE5

            xmlhttp = new ActiveXObject("Microsoft.XMLHTTP");

        }

        xmlhttp.onreadystatechange = function () {

            if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {

                if (xmlhttp.responseText != "未扫码") {



                    $("#threeWeixin").fadeOut("slow");

                    erweimaShow = 1;

                    clearInterval(intervalErweima);

                }

            }

        }

        xmlhttp.open("POST", "/getEventKey.aspx?&scene_id=" + scene_id, false);

        xmlhttp.send();

    }



</script>





<div class="juzioncont">

    <img style=" float:left; margin:10px; margin-right:0px;" src="https://song.gushiwen.cn/siteimg/app/appdownGwd2021.png" width="80" height="80" /><p><center style="font-size:18px; margin-top:14px;">扫码下载</center></p><p><center style="font-size:18px; margin-top:5px;">古文岛客户端</center></p>

</div>



<div class="juzioncont">

    <img style=" float:left; margin:10px; margin-right:0px;" src="https://song.gushiwen.cn/siteimg/app/erma_guwendao.png" width="80" height="80" /><p><center style="font-size:18px; margin-top:14px;">扫码关注</center></p><p><center style="font-size:18px;margin-top:5px;">古文岛公众号</center></p>

</div>



</div>



</div>







<div class="main4">

    © 2021 <a href="https://www.gushiwen.cn/">古诗文网</a> | <a href="https://so.gushiwen.cn/shiwens/">诗文</a> | <a href="https://so.gushiwen.cn/mingjus/">名句</a> | <a href="https://so.gushiwen.cn/authors/">作者</a> | <a href="https://so.gushiwen.cn/guwen/">古籍</a> | <a href="/jiucuo.aspx?u=" target="_blank" rel="nofollow">纠错</a>

</div>





<script type="text/javascript">

    window.onload = function () {

        setIframeHeight(document.getElementById('external-frame'));

    };

        </script>



<script defer="defer" src="/js/skinso20210709.js" type="text/javascript"></script>

</body>

</html>


同步代码

在这里插入图片描述

import time
import requests


def get_request(url):
    response = requests.get(url=url).text
    return len(response)


# 同步代码
if __name__ == '__main__':
    # 开始时间
    start = time.time()

    urls = [
        'http://127.0.0.1:5000/Hz',
        'http://127.0.0.1:5000/jay',
        'http://127.0.0.1:5000/tom',
    ]

    for url in urls:
        res = get_request(url)
        print(res)
    print('总耗时:', time.time() - start)

异步代码

在这里插入图片描述

import time
import requests

# 线程池
from multiprocessing.dummy import Pool


def get_request(url):
    response = requests.get(url=url).text
    return len(response)


# 同步代码
# if __name__ == '__main__':
#     # 开始时间
#     start = time.time()
#
#     urls = [
#         'http://127.0.0.1:5000/Hz',
#         'http://127.0.0.1:5000/jay',
#         'http://127.0.0.1:5000/tom',
#     ]
#
#     for url in urls:
#         res = get_request(url)
#         print(res)
#     print('总耗时:', time.time() - start)


# 异步代码
if __name__ == '__main__':
    urls = [
        'http://127.0.0.1:5000/Hz',
        'http://127.0.0.1:5000/jay',
        'http://127.0.0.1:5000/tom',
    ]
    start = time.time()
    pool = Pool(3)  # 开启线程数量

    # 使用get_request作为回调函数,需要基于异步的形式对urls列表中的元素进行操作
    # 保证回调函数必须要有一个参数和返回值
    result_list = pool.map(get_request, urls)
    print(result_list)
    print('总耗时:', time.time() - start)

线程池
# 线程池
from multiprocessing.dummy import Pool
# 开启线程数量
pool = Pool(3) 
#可以使用callback对alist中的每一个元素进行指定形式的一步操作
 result_list = pool.map(callback, alist)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值