下面的代码可以运行,根据我亲自测试,只不过得到的数据类似html代码,还需要用正则表达式提取想要的链接而已。
import requests
import json
class wechatArticle:
def __init__(self, _biz, _pass_ticket, _appmsg_token, _cookie, _offset=0):
self.offset = _offset # 不同公众号不一样
self.biz = _biz
self.pass_ticket = _pass_ticket
self.appmsg_token = _appmsg_token
self.headers = {
'cookie': _cookie,
'User-Agent': 'Mozilla/5.0 (Linux; Android 8.0; FRD-AL00 Build/HUAWEIFRD-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132'
}
def get_article_list(self):
offset = self.offset
while True:
api = 'https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz={0}&f=json&offset={1}&count=10&is_ok=1&scene=126&uin=777&key=777&pass_ticket={2}&wxtoken=&appmsg_token={3}&x5=1&f=json'.format(
self.biz, offset, self.pass_ticket, self.appmsg_token)
resp = requests.get(api, headers=self.headers).json()
ret, status = resp.get('ret'), resp.get('errmsg') # 状态信息
if ret == 0 or status == 'ok':
offset = resp['next_offset']
general_msg_list = resp['general_msg_list']
print(type(general_msg_list),general_msg_list) # json类型
if __name__ == '__main__':
biz = 'MzU0NDEwMTc1MA==' # 浅黑科技
pass_ticket = 'YDiylNY2QnOm5HbjBMJo%2Bve%2FatcYQxoOv9xbGY9iUcA%3D'
app_msg_token = '1011_FR%252B8YC8KhPETmcJssqk5XhGw3ba-PJ6gP5Tuxw~~' # 历史文章
wap_sid2 = 'COPr9gkScExNVTRXelRYQkJ4LVFCTGdtaWkzYXduaDFsajVSa2VVUTZDR1BNZXphaHhLQ3ZvR1ZxVi00YUxCaVd0YmVYRXhuVF9wRE4yc1VMSXBsdDY0WmNQZDdMdGU2LVJHeTBnOW95TnU4cFc3aHBmekF3QUEw16/Y5wU4DUCVTg=='
cookie = 'wxuin=1581282621; version=2607033b; pass_ticket={}; wap_sid2={}'.format(pass_ticket, wap_sid2) #我复制的是作者的微信相关id,竟然也可以?都没用我的啊。
# 以上信息不同公众号每次抓取都需要借助抓包工具做修改,通过fiddler在请求头里就可以找得到。
wxarticles = wechatArticle(biz, pass_ticket, app_msg_token, cookie)
wxarticles.get_article_list()
此时,输入的内容是这样的!
image.png