首先自己需要注册一个微信公众号,在浏览器中登录自己的公众号,F12调试功能获取登录的cookie和token,下边代码中会用到
还需要喜欢的公众号的key,可通过公众号分析的文章,分享到qq中,查看连接如下(参考),__biz=MjM5MTEwMjg3OQ==参数中MjM5MTEwMjg3OQ==就是key了
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File : articleCollection.py
# datetime:2020/11/5 10:58
import time
import random
import requests
import json
import pandas as pd
# from cookie import getCookie, getFakeId
# 毫秒数转日期
def getDate(times):
# print(times)
timearr = time.localtime(times)
date = time.strftime("%Y-%m-%d %H:%M:%S", timearr)
return date
def listAllArticle(fakeId):
# with open('cookie.txt', 'r', encoding='utf-8') as f:
# cookie = f.read()
# cookies = json.loads(cookie)
# 目标url
url = "https://mp.weixin.qq.com/cgi-bin/appmsg"
# 使用Cookie,跳过登陆操作
headers = {
#公众号cookie
"Cookie": "pgv_pvid=3456747162; _clck=1690t1n|1|f2w|0; rewardsn=; wxtokenkey=777; wwapp.vid=; wwapp.cst=; wwapp.deviceid=; pgv_info=ssid=s1397771680; ua_id=IiLNtvCuz4ANqKgLAAAAAP6f5nxLvQkREgshZm91KBU=; wxuin=57011350107536; mm_lang=zh_CN; cert=PltRVZJFwDVVGoyK4BGpZN06yPIYe6xt; sig=h01408121ff9c7ae5071e04633dba23bfa4557bb4f04690e6c696044cc7c310da9695196544ab2b6cd7; ptui_loginuin=1058664513; uin=o1058664513; skey=@G2XsS7WLH; RK=6ZFRo5ZURT; ptcz=e11cb70127dd967ffbba629350b001d9a01e0cf3527621a418fbcbc1bcfea42e; master_key=anl4iAFrM84vz6qebDiEeYJz8tDjd9gaZQKfvPLYdMc=; pac_uid=1_1058664513; iip=0; o_cookie=1058664513; uuid=2434718c19afbfb5560eedaf940f78c5; rand_info=CAESIHpcc+3KLdYLaVdjLRasfQlfI2w4b3H/jzBZ3nsO2tFa; slave_bizuin=3933389211; data_bizuin=3933389211; bizuin=3933389211; data_ticket=EVwPkCAE9u13gfKr/+hk7VH+Occ967/k5bcvPMCM9RxHstMwuhwzgbEd2fc99eMo; slave_sid=UWJDTFlJTW1iTXg3Nmg2MlpSRjJGUVlNdEx3Q1lBS01FWmR3RGNuVmJCUFFEeDhnY2ZHb2pHU0hmVVA1UVJrVzhaS2dFTmdXV3RINk54aGE2TGpWWWw5WEJJQlV0U3JXQW9QdHY4WW1ja2pza0xYRzJ5cW04TnRtcGZhZFJGNTdTV1hCYjlxRll3ZVZ2YjZE; slave_user=gh_8c64241c1f49; xid=7f0446b9ab13c46f2c5e13a7f623aaef",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
data = {
"token": "1302392423",#公众号token
"lang": "zh_CN",
"f": "json",
"ajax": "1",
"action": "list_ex",
"begin": "0",
"count": "5",
"query": "",
"fakeid": fakeId,
"type": "9",
}
for i in range(2):
data["begin"] = i * 5
# 生成3-10的随机整数,方便下面程序间隔时间
sleepTime = random.randint(3, 10)
print(sleepTime)
time.sleep(sleepTime)
content_json = requests.get(url, headers=headers, params=data).json()
for item in content_json["app_msg_list"]:
# 提取每页文章的标题及对应的url
items = [item["title"], item["link"], item["cover"], getDate(item["create_time"]), item["digest"],
item["item_show_type"], getDate(item["update_time"]), ''.join(fakeId)]
print(items)
# mysql.saveWeChatArticle(items)
if __name__ == '__main__':
#MjM5MTEwMjg3OQ==是喜欢公众号的key
listAllArticle("MjM5MTEwMjg3OQ==")
后续如果需要下载,补充下载代码即可
微信公众号精彩博文:号:wy15010267