项目三:微信爬虫机器人(再开发)
项目目录
1.config.py
配置接口函数类
TOKEN = ‘weixin’
XML_STR = ‘’’
<![CDATA[%s]]>
<![CDATA[%s]]>
%s
<![CDATA[%s]]>
<![CDATA[%s]]>
‘’’
joke.py
笑话爬虫
import requests
from lxml import etree
from random import randint
def get_joke():
url = “https://www.qiushibaike.com/text/page/” + str(randint(1,5))
r = requests.get(url)
tree = etree.HTML(r.text)
contentlist = tree.xpath(’//div[@class=“content”]/span’)
jokes = []
for content in contentlist:
content = content.xpath(‘string(.)’)
jokes.append(content)
joke = jokes[randint(1,len(jokes))].strip()
return joke
if name == “main”:
content = get_joke()
print(content)
main.py
import sys
import time
import os
try:
import flask
except ModuleNotFoundError:
print(‘正在安装必须模块,请稍等…’)
os.system(‘pip install -r requirements’)
__MAJOR, __MINOR, __MICRO = sys.version_info[0], sys.version_info[1], sys.version_info[2]
if __MAJOR < 3:
print(‘Python版本号过低,当前版本为 %d.%d.%d, 请重装Python解释器’ % (__MAJOR, __MINOR, __MICRO))
time.sleep(2)
exit()
if name == “main”:
from wechat_robot import app
print(‘正在打开服务器…’)
app.run(host=‘0.0.0.0’, port=8888,debug=True)
weather.py
import requests
from lxml import etree
def get_weather(keyword):
url = ‘https://www.tianqi.com/tianqi/search?keyword=’ + keyword
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36’
}
response = requests.get(url,headers=headers)
tree = etree.HTML(response.text)
# 检测城市天气是否存在
try:
city_name = tree.xpath(’//dd[@class=“name”]/h2/text()’)[0]
except:
content = ‘没有该城市天气信息,请确认查询格式’
return content
week = tree.xpath(’//dd[@class=“week”]/text()’)[0]
now = tree.xpath(’//p[@class=“now”]’)[0].xpath(‘string(.)’)
temp = tree.xpath(’//dd[@class=“weather”]/span’)[0].xpath(‘string(.)’)
shidu = tree.xpath(’//dd[@class=“shidu”]/b/text()’)
kongqi = tree.xpath(’//dd[@class=“kongqi”]/h5/text()’)[0]
pm = tree.xpath(’//dd[@class=“kongqi”]/h6/text()’)[0]
content = “【{0}】{1}天气\n当前温度:{2}\n今日天气:{3}\n{4}\n{5}\n{6}”.format(city_name, week.split(’\u3000’)[0], now, temp, ‘\n’.join(shidu),kongqi,pm)
return content
if name == “main”:
keyword = ‘北京’
content = get_weather(keyword)
print(content)
wechat_rebot.py
from config import TOKEN,XML_STR
from flask import Flask, request, make_response
import hashlib
import xml.etree.ElementTree as ET
from weather import get_weather
from joke import get_joke
app = Flask(name) # 实例化一个Flask app
@app.route(’/message’, methods=[‘GET’, ‘POST’]) # 路由
def chatme(): # 定义控制器函数gf
if request.method == ‘GET’: # GET请求
data = request.args # 获取GET请求的参数
token = TOKEN # 微信接口调用的token
signature = data.get(‘signature’, ‘’) # 微信接口调用的签名
timestamp = data.get(‘timestamp’, ‘’) # 微信接口相关时间戳参数
nonce = data.get(‘nonce’, ‘’) # 微信接口相关nonce参数
echostr = data.get(‘echostr’, ‘’) # 微信接口相关echostr参数
s = [timestamp, nonce, token]
s = ‘’.join(s).encode(“utf-8”) # 连接字符串用来校验签名
if hashlib.sha1(s).hexdigest() == signature: # 校验签名
return make_response(echostr)
else: # 响应签名错误
return make_response("signature validation error")
if request.method == 'POST':
xml_str = request.stream.read()
xml = ET.fromstring(xml_str)
toUserName = xml.find('ToUserName').text
fromUserName = xml.find('FromUserName').text
createTime = xml.find('CreateTime').text
msgType = xml.find('MsgType').text
# 判断是否文本消息
if msgType != 'text':
reply = XML_STR % (
fromUserName,
toUserName,
createTime,
'text',
'Unknow Format, Please check out'
)
return reply
content = xml.find('Content').text
msgId = xml.find('MsgId').text
if u'笑话' in content: # 输出笑话
content = get_joke()
elif content[-2:] == "天气": # 输出天气
keyword = content[:-2]
if len(keyword) < 2:
content = '请输入正确的城市名称'
return XML_STR % (fromUserName, toUserName, createTime, msgType, content)
content = get_weather(keyword)
else:
# 输出反话
if type(content).__name__ == "unicode":
content = content[::-1]
content = content.encode('UTF-8')
elif type(content).__name__ == "str":
print(type(content).__name__)
content = content
content = content[::-1]
# 返回xml文件
reply = XML_STR % (fromUserName, toUserName, createTime, msgType, content)
return reply
if name == “main”:
app.run(host=‘0.0.0.0’, port=8888,debug=True)
后台可使用内网穿透和服务器
中间需要在微信公众平台申请