Python 爬虫 Web微信开发

22 篇文章 0 订阅
7 篇文章 0 订阅

一.3种请求方式
1.轮询:
浏览器向服务器发送(HTTP)请求,如果没有收到回复则断开,然后再发送1个新的请求,如此循环直到收到回复,这种请求方式称为轮询.这种请求方式的代码简单,都是请求过于频繁,对服务器的压力很大.网页版微信的登录和聊天使用的都是长轮询

2.长轮询:
浏览器向服务器发送(HTTP)请求,如果收到回复则立即返回;否则就停留在服务器,但最多停留1分钟(或其他长度),超时则断开,然后重新发送请求.这种请求方式称为长轮询.这种请求方式减少了发送请求的次数,从而降低了对服务器的要求

3.Web Socket:
客户端和服务端创建连接,然后可以互相发送请求(而不像普通的HTTP请求那样只能从客户端发到服务端;连接始终不断开),这种请求方式称为Web Socket.这种请求方式可以减少用于建立连接的时间,但目前其对浏览器的支持还不够完善,因此使用较少

二.网页版微信流程
在这里插入图片描述
在这里插入图片描述
三.单请求版(串行版)的实现
在这里插入图片描述

#apps.py:

from django.apps import AppConfig

class App01Config(AppConfig):
    name = 'app01'
#views.py:

from django.shortcuts import render,HttpResponse
import requests,time,re,json,bs4

# Create your views here.

CTIME=None
QCODE=None
TIP=1
TICKET_DICT= {}
UI_DICT={}
ALL_COOKIE_DICT={}

def login(req):
    '''
    进入登陆页面
    :param req:
    :return:
    '''
    global CTIME
    global QCODE
    CTIME=time.time()
    response=requests.get(
        'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&fun=new&lang=zh_CN&_=%s'%CTIME
        #最后是1个时间戳;通过浏览器访问Web微信时向服务器发送的请求(头)中还会有1个redirect_url参数,应去除(否则存在转译的问题)
    )
    QCODE=re.findall(r'uuid = "(.*)";',response.text)[0]
    return render(req,'login.html',{'qcode':QCODE})

def check_login(req):
    '''
     获得用于登陆的二维码,等待用户扫码并确认登陆
    :param req:
    :return:
    '''
    global TIP
    ret={'code':408,'data':None}
    r1=requests.get(
        'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=%s&tip=%s&r=-622092199&_=%s'%(QCODE,TIP,CTIME)
        #其中的tip参数表示是否扫过,未扫过为0,已扫过为1
    )
    if 'window.code=408' in r1.text:#长时间无人扫码,请求超时
        #print(r1.text)
        return HttpResponse(json.dumps(ret))
    elif 'window.code=201' in r1.text:#扫码
        #print(r1.text)
        ret['code']=201
        avatar=re.findall(r'window.userAvatar = \'(.*)\';',r1.text)[0]
        ret['data']=avatar
        TIP=0
        #print(ret)
        return HttpResponse(json.dumps(ret))
    elif 'window.code=200' in r1.text:#用户确认登陆
        #print(r1.text)
        ALL_COOKIE_DICT.update(r1.cookies.get_dict())

        #结果:
        #window.code=200;
        #window.redirect_uri="https://wx2.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=A9J6H-BhyYc-y3bO1x4o2VrI@qrticket_0&uuid=4YmQfZ88bA==&lang=zh_CN&scan=1602646604";
        #注意:redirect_uri可能有不同,如有可能是...wx.qq.com...
        redirect_uri=re.findall(r'window.redirect_uri="(.*)";',r1.text)[0]+"&fun=new&version=v2"

        #获取凭证:
        r2=requests.get(
            redirect_uri
        )
        #print(r2.text)#返回凭证,为1个HTML str
        ALL_COOKIE_DICT.update(r2.cookies.get_dict())
        s=bs4.BeautifulSoup(r2.text,features="html.parser")
        for tag in s.find('error').children:#获取凭证中的各个属性(每个属性是1个子标签,值是子标签中的文本)
            TICKET_DICT[tag.name]=tag.get_text()
        #print(TICKET_DICT)

        ret['code']=200
        return HttpResponse(json.dumps(ret))
    else:
        #print(r1.text)
        return HttpResponse("OKOK")

def user(req):
    '''
    获取用户信息
    :param req:
    :return:
    '''
    gui_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=88828930&lang=zh_CN&pass_ticket' + TICKET_DICT['pass_ticket']
    gui_data = {
        'BaseRequest': {
            'DeviceID': 'e402310790089148',
            'Sid':TICKET_DICT['wxsid'],#有的设备是sid
            'Uin':TICKET_DICT['wxuin'],#有的设备是wxuin
            'Skey':TICKET_DICT['skey']
        }
    }
    r3 = requests.post(
        gui_url,
        json=gui_data,  # 不能用data,否则只能发送gui_data中的key(即BaseRequest)
    )
    r3.encoding = r3.apparent_encoding
    ui_dict = json.loads(r3.text)  # 这是首屏信息(即显示在用户的首页的信息)
    ALL_COOKIE_DICT.update(r3.cookies.get_dict())
    UI_DICT.update(ui_dict)
    #for k,v in ui_dict.items():
    #    print(k,v)
    #contact_list = ui_dict['ContactList']  # 近期有过聊天的联系人列表
    #user_dict = ui_dict['User']  # 个人信息字典
    #for indiv in contact_list:
    #    print(indiv['NickName'])  # 获取这些联系人的昵称
    return render(req,'user.html',{'ui_dict':ui_dict})

def contactList(req):
    '''
    获取所有联系人,并在页面上显示
    :param req:
    :return:
    '''
    ctime=time.time()
    base_url='https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?pass_ticket=%s&r=%s&seq=0&skey=%s'
    url=base_url%(TICKET_DICT['pass_ticket'],ctime,TICKET_DICT['skey'])
    res=requests.get(
        url=url,
        cookies=ALL_COOKIE_DICT
    )
    res.encoding=res.apparent_encoding
    #print(res.text)
    contact_list_dict=json.dumps(res.text)
    return render(req,'contact_list.html',{'contact_list_dict':contact_list_dict})

def sendMsg(req):
    '''
    发送消息
    :param req:
    :return:
    '''
    to_user=req.GET.get('toUser')
    msg=req.GET.get('msg')
    url='https://www.wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg/lang=zh_CN&pass_ticket=%s'%(TICKET_DICT['pass_ticket'])
    ctime=str(int(time.time())*1000)
    post_dict={
        'BaseRequest':{
            'DeviceID': 'e402310790089148',
            'Sid':TICKET_DICT['wxsid'],#有的设备是sid
            'Uin':TICKET_DICT['wxuin'],#有的设备是wxuin
            'Skey':TICKET_DICT['skey']
        },
        'Msg':{
            'ClientMsgId':ctime,
            'Content':msg,
            'FromUserName':UI_DICT['User']['UserName'],
            'LocalID':ctime,
            'ToUserName':to_user.strip(),
            'Type':1
        },
        'Scene':0
    }
    res=requests.post(
        url=url,
        data=bytes(json.dumps(post_dict,ensure_ascii=False),encoding='utf-8')
        #ensure_ascii=False表示.dumps()的返回值中允许非ASCII码字符,这样会直接发送中文而不转换成Unicode
        #如果不加请求头"Conten-Type":"application/json"(加上则使用utf-8进行编码),默认用latin-1字符集进行编码,因此最好手动转换成字节,否则可能出现编码问题
    )
    print(res.text)
    return HttpResponse('OK')

def getMsg(req):
    '''
    获取消息
    :param req:
    :return:
    '''
    #1.检查是否有消息到来(从初始化消息中获取synckey):https://www.webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?r=%s&skey=%s&sid=%s&uin=%s&deviceid=%s&synckey=%s
    #2.如果window.synccheck={retcode:'0',selector:'2'},有消息到来
    #  ①获取消息:https://www.webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck?r=%s&skey=%s&sid=%s&uin=%s&deviceid=%s&synckey=%s
    #  ②更新synckey
    synckey_list=UI_DICT['SyncKey']['List']
    sync_list=[]
    for item in synckey_list:
        temp='%s_%s'%(item['Key'],item['Val'])
        sync_list.append(temp)\
    synckey='|'.join(sync_list)
    base_url='https://www.webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck'
    r1=requests.get(
        url=base_url,
        params={
            'r':time.time(),
            'skey':TICKET_DICT['skey'],
            'sid':TICKET_DICT['wxsid'],
            'uin':TICKET_DICT['wxuin'],
            'deviceid':'e402310790089148',
            'synckey':synckey
        },
        cookies=ALL_COOKIE_DICT
    )
    if "retcode:'0',selector:'2'" in r1.text:
        post_dict={
        'BaseRequest':{
            'DeviceID': 'e402310790089148',
            'Sid':TICKET_DICT['wxsid'],#有的设备是sid
            'Uin':TICKET_DICT['wxuin'],#有的设备是wxuin
            'Skey':TICKET_DICT['skey']
        },
        'SyncKey':UI_DICT['SyncKey'],
        'rr':1
    }
        r2=requests.post(
            url=base_url,
            params={
                'skey': TICKET_DICT['skey'],
                'sid': TICKET_DICT['wxsid'],
                'pass_ticket':TICKET_DICT['pass_ticket'],
                'lang':'zh_CN'
            },
            json=post_dict
        )
        r2.encoding=r2.apparent_encoding
        msg_res_dict=json.loads(r2.text)
        for info in msg_res_dict['AddMsgList']:
            print(info['Conetent'])#之后可以将收到的消息显示在浏览器中
        UI_DICT['SyncKey']=msg_res_dict['SyncKey']
    return HttpResponse('...')
  • ui_dict:

在这里插入图片描述

//contact_list.html:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>ContactList</title>
</head>
<body>
    <div style="float: left;width:20%">
        <ul>
            {% for item in contact_list_dict.MemberList %}
                <li username="{{ item.UserName }}">{{ item.NickName }}</li>
            {% endfor %}
        </ul>
    </div>
    <div style="float: left;width: 80%">
        <input id="toUser" type="text" placeholder="收信人">
        <p><textarea id="msg" placeholder="请输入要发送的消息"></textarea></p>
        <p><input id="btn" type="button" value="发送"></p>
    </div>
    <script src="/static/jquery-3.5.0.js"></script>
    <script>
        $(function () {
            bindSendMsg();
            getMsg();
        });
        function bindSendMsg() {
            $("#btn").click(function () {
                $.ajax({
                    url:'send-msg.html',
                    type:'GET',
                    data:{'toUser':$('#toUser').val(),'msg':$('#msg').val()},
                    dataType:'JSON',
                    success:function (arg) {
                        ;
                    }
                })
            })
        };
        function bindLi() {
            $('li').dblclick(function () {
                var v=$(this).attr('username');
                $('#toUser').val(v);
            })
        }
        function getMsg() {
            $.ajax({
                url:'get-msg.html',
                type:'GET',
                success:function (arg) {
                    console.log(arg);
                    getMsg();
                }
            })
        }
    </script>
</body>
</html>
//login.html:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Login</title>
</head>
<body>
    <img id="qrcode" height="340px" width="340px" src="https://login.weixin.qq.com/qrcode/{{qcode}}">
    <script src="/static/jquery-3.5.0.js"></script>
    <script>
        $(function () {
            checkLogin();//监听用户是否扫码/确认登陆
        });
        function checkLogin() {
            $.ajax({
                url:'check-login.html',
                type:'GET',
                dataType:'json',
                success:function (arg) {
                    if (arg.code==408) {//如果超时
                        checkLogin();
                    } else if (arg.code==201) {//如果已经扫码
                        $("#qrcode").attr('src',arg.data);
                        checkLogin();
                    } else if (arg.code==200) {//如果已经确认登陆
                        location.href='/user.html'//跳转到个人首页
                    }
                }
            })
        };
    </script>
</body>
</html>
//user.html:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>User</title>
</head>
<body>
    <div>个人信息</div>
    <ul>
        {% for item in ui_dict.ContactList %}//近期有进行过聊天的联系人
            <li>{{ item.NickName }}</li>//联系人的昵称
        {% endfor %}
    </ul>
    <a href="/contact-list.html">查看更多联系人</a>
    <div>公众号信息</div>
    {% for item in ui_dict.MPSubscribeMsgList %}//近期给该用户发送过推送的公众号
        <h3>{{ item.NickName }}</h3>//公众号的名称
        {% for msg in item.MPArticleList %}//该公众号近期的推送
            <div style="color: #dddddd"><a href="{{ msg.Url }}">{{ msg.Title }}</a></div>//推送的标题,href为推送的URL
        {% endfor %}
    {% endfor %}
</body>
</html>
#asgi.py:

"""
ASGI config for WebWX project.

It exposes the ASGI callable as a module-level variable named ``application``.

For more information on this file, see
https://docs.djangoproject.com/en/3.1/howto/deployment/asgi/
"""

import os

from django.core.asgi import get_asgi_application

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'WebWX.settings')

application = get_asgi_application()
#settings.py:

"""
Django settings for WebWX project.

Generated by 'django-admin startproject' using Django 3.1.1.

For more information on this file, see
https://docs.djangoproject.com/en/3.1/topics/settings/

For the full list of settings and their values, see
https://docs.djangoproject.com/en/3.1/ref/settings/
"""

import os
from pathlib import Path

# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent


# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/3.1/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '(io-u6!cfyfk=_5n#&adv7($)e@!7vdc-jrfc4knos)gtic-hd'

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

ALLOWED_HOSTS = []


# Application definition

INSTALLED_APPS = [
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'app01.apps.App01Config',
]

MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
]

ROOT_URLCONF = 'WebWX.urls'

TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
        'DIRS': [os.path.join(BASE_DIR, 'templates')]
        ,
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
                'django.template.context_processors.debug',
                'django.template.context_processors.request',
                'django.contrib.auth.context_processors.auth',
                'django.contrib.messages.context_processors.messages',
            ],
        },
    },
]

WSGI_APPLICATION = 'WebWX.wsgi.application'


# Database
# https://docs.djangoproject.com/en/3.1/ref/settings/#databases

DATABASES = {
    'default': {
        'ENGINE': 'django.db.backends.sqlite3',
        'NAME': BASE_DIR / 'db.sqlite3',
    }
}


# Password validation
# https://docs.djangoproject.com/en/3.1/ref/settings/#auth-password-validators

AUTH_PASSWORD_VALIDATORS = [
    {
        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
    },
]


# Internationalization
# https://docs.djangoproject.com/en/3.1/topics/i18n/

LANGUAGE_CODE = 'en-us'

TIME_ZONE = 'UTC'

USE_I18N = True

USE_L10N = True

USE_TZ = True


# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.1/howto/static-files/

STATIC_URL = '/static/'

STATICFILES_DIRS=(
    os.path.join(BASE_DIR,'static'),
)
#urls.py:

"""WebWX URL Configuration

The `urlpatterns` list routes URLs to views. For more information please see:
    https://docs.djangoproject.com/en/3.1/topics/http/urls/
Examples:
Function views
    1. Add an import:  from my_app import views
    2. Add a URL to urlpatterns:  path('', views.home, name='home')
Class-based views
    1. Add an import:  from other_app.views import Home
    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
Including another URLconf
    1. Import the include() function: from django.urls import include, path
    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.conf.urls import url
from app01 import views

urlpatterns = [
    url(r'^admin/', admin.site.urls),
    url(r'^login.html$',views.login),
    url(r'^check-login.html$',views.check_login),
    url(r'^user.html$',views.user),
    url(r'^contact-list.html$',views.contactList),
    url(r'^send-msg.html$',views.sendMsg),
    url(r'^get-msg.html$',views.getMsg),
]
#wsgi.py:

"""
WSGI config for WebWX project.

It exposes the WSGI callable as a module-level variable named ``application``.

For more information on this file, see
https://docs.djangoproject.com/en/3.1/howto/deployment/wsgi/
"""

import os

from django.core.wsgi import get_wsgi_application

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'WebWX.settings')

application = get_wsgi_application()
#manage.py:

#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys


def main():
    """Run administrative tasks."""
    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'WebWX.settings')
    try:
        from django.core.management import execute_from_command_line
    except ImportError as exc:
        raise ImportError(
            "Couldn't import Django. Are you sure it's installed and "
            "available on your PYTHONPATH environment variable? Did you "
            "forget to activate a virtual environment?"
        ) from exc
    execute_from_command_line(sys.argv)


if __name__ == '__main__':
    main()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值