快手
1.爬个人列表下的所有作品
# 作者:robin_aerfa
# QQ:1514149460
# wx:Stbz528403
import json # 导入模块 json
import requests # 导入模块 requests
def sub_data(ginseng_id, ginseng_pcursor=''): # 定义函数组合请求数据
local_post_data = {"operationName": "publicFeedsQuery",
"variables": {"principalId": ginseng_id, "pcursor": ginseng_pcursor, "count": 24},
"query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\n publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n live {\n user {\n id\n kwaiId\n eid\n profile\n name\n living\n __typename\n }\n watchingCount\n src\n title\n gameId\n gameName\n categoryId\n liveStreamId\n playUrls {\n quality\n url\n __typename\n }\n followed\n type\n living\n redPack\n liveGuess\n anchorPointed\n latestViewed\n expTag\n __typename\n }\n list {\n photoId\n caption\n thumbnailUrl\n poster\n viewCount\n likeCount\n commentCount\n timestamp\n workType\n type\n useVideoPlayer\n imgUrls\n imgSizes\n magicFace\n musicName\n location\n liked\n onlyFollowerCanComment\n relativeHeight\n width\n height\n user {\n id\n eid\n name\n profile\n __typename\n }\n expTag\n __typename\n }\n __typename\n }\n}\n"}
local_post_data = str(local_post_data).replace("'", '"').replace(', ', ',').replace(' ', '', 5)
return local_post_data
def sub_head(ginseng_id): # 定义函数组合请求头
local_headers = {
"Host": "live.kuaishou.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://live.kuaishou.com/profile/" + ginseng_id,
"content-type": "application/json",
"Cookie": "clientid=3; did=web_79aee2e860f5438d897ddb18f84c1365; client_key=65890b29; didv=1565316325000; kuaishou.live.bfb1s=3e261140b0cf7444a0ba411c6f227d88",
}
return local_headers
def data_analysis(ginseng_data): # 解析返回json为 dcti
local_json_data = json.loads(ginseng_data)['data']['publicFeeds']['list']
works_list = []
works = {}
for lin_data in local_json_data:
works = {
'作品ID:': lin_data['photoId'],
'标题:': lin_data['caption'],
}
works_list.append(works)
return works_list
if __name__ == '__main__':
url = 'https://live.kuaishou.com/graphql'
local_id = 'CFNF88888888'
local_pcursor = ''
local_works = []
# 下面代码进行爬取数据
state = True
while state:
json_data = requests.post(url, headers=sub_head('local_id'), data=sub_data(local_id, local_pcursor))
json_data.encoding = 'UTF-8-SIG' # 乱码解决办法
local_works += data_analysis(json_data.text)
local_pcursor = json.loads(json_data.text)['data']['publicFeeds']['pcursor']
# print(local_pcursor)
if local_pcursor == 'no_more': # 数据爬取完后,结束循环。
state = False
with open('个人列表作品.txt', 'w', encoding='utf8') as fileobject: # 保存数据
for d_key in local_works:
# print(d_key)
fileobject.write(str(d_key) + "\n")
fileobject.close()
print(len(local_works))
1.爬个人列表下的所有作品 (2019/8/4更新)
2019/8/4更新 增加读入ID
根据需求增加读入文件ID,批量爬取。
# 作者:robin_aerfa
# QQ:1514149460
# wx:Stbz528403
import json # 导入模块 json
import requests # 导入模块 requests
def sub_data(ginseng_id, ginseng_pcursor=''): # 定义函数组合请求数据
local_post_data = {"operationName": "publicFeedsQuery",
"variables": {"principalId": ginseng_id, "pcursor": ginseng_pcursor, "count": 24},
"query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\n publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n live {\n user {\n id\n kwaiId\n eid\n profile\n name\n living\n __typename\n }\n watchingCount\n src\n title\n gameId\n gameName\n categoryId\n liveStreamId\n playUrls {\n quality\n url\n __typename\n }\n followed\n type\n living\n redPack\n liveGuess\n anchorPointed\n latestViewed\n expTag\n __typename\n }\n list {\n photoId\n caption\n thumbnailUrl\n poster\n viewCount\n likeCount\n commentCount\n timestamp\n workType\n type\n useVideoPlayer\n imgUrls\n imgSizes\n magicFace\n musicName\n location\n liked\n onlyFollowerCanComment\n relativeHeight\n width\n height\n user {\n id\n eid\n name\n profile\n __typename\n }\n expTag\n __typename\n }\n __typename\n }\n}\n"}
local_post_data = str(local_post_data).replace("'", '"').replace(', ', ',').replace(' ', '', 5)
return local_post_data
def sub_head(ginseng_id): # 定义函数组合请求头
local_headers = {
"Host": "live.kuaishou.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://live.kuaishou.com/profile/" + ginseng_id,
"content-type": "application/json",
"Cookie": "clientid=3; did=web_79aee2e860f5438d897ddb18f84c1365; client_key=65890b29; didv=1565316325000; kuaishou.live.bfb1s=3e261140b0cf7444a0ba411c6f227d88",
}
return local_headers
def data_analysis(ginseng_data): # 解析返回json为 dcti
local_json_data = json.loads(ginseng_data)['data']['publicFeeds']['list']
works_list = []
works = {}
for lin_data in local_json_data:
works = {
'作品ID:': lin_data['photoId'],
'标题:': lin_data['caption'],
}
works_list.append(works)
return works_list
if __name__ == '__main__':
url = 'https://live.kuaishou.com/graphql'
print("例如:C:1.TXT文件或者运行目录下 1.txt")
file_path = input("请输入ID文件路径:")
with open(file_path.replace('\\', '/')) as file_object: # 打开文件
local_data = file_object.read() # 读入内容
file_object.close() # 关闭文件
for file in file_path.split():
# local_id = input("请输入用户ID:")
# local_id = 'CFNF88888888'
local_id = file
local_pcursor = ''
local_works = []
local_headers = sub_head(local_id)
local_data = sub_data(local_id, local_pcursor)
# 下面代码进行爬取数据
state = True
while state:
json_data = requests.post(url, headers=local_headers, data=local_data)
json_data.encoding = 'UTF-8-SIG' # 乱码解决办法
local_works += data_analysis(json_data.text)
local_pcursor = json.loads(json_data.text)['data']['publicFeeds']['pcursor']
# print(local_pcursor)
if local_pcursor == 'no_more': # 数据爬取完后,结束循环。
state = False
with open('个人列表作品.txt', 'a', encoding='utf8') as fileobject: # 保存数据 a附加插入模式
for d_key in local_works:
# print(d_key)
fileobject.write(str(d_key) + "\n")
fileobject.close()
print(len(local_works))
input("按任意字符退出~!")
2.爬作品所有评论
# 作者:robin_aerfa
# QQ:1514149460
# wx:Stbz528403
import json
import requests
def sub_data(ginseng_id, ginseng_pcursor=''):
local_post_data = {"operationName": "publicFeedsQuery",
"variables": {"principalId": ginseng_id, "pcursor": ginseng_pcursor, "count": 24},
"query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\n publicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n live {\n user {\n id\n kwaiId\n eid\n profile\n name\n living\n __typename\n }\n watchingCount\n src\n title\n gameId\n gameName\n categoryId\n liveStreamId\n playUrls {\n quality\n url\n __typename\n }\n followed\n type\n living\n redPack\n liveGuess\n anchorPointed\n latestViewed\n expTag\n __typename\n }\n list {\n photoId\n caption\n thumbnailUrl\n poster\n viewCount\n likeCount\n commentCount\n timestamp\n workType\n type\n useVideoPlayer\n imgUrls\n imgSizes\n magicFace\n musicName\n location\n liked\n onlyFollowerCanComment\n relativeHeight\n width\n height\n user {\n id\n eid\n name\n profile\n __typename\n }\n expTag\n __typename\n }\n __typename\n }\n}\n"}
local_post_data = str(local_post_data).replace("'", '"').replace(', ', ',').replace(' ', '', 5)
return local_post_data
def sub_head(ginseng_id):
local_headers = {
"Host": "live.kuaishou.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0",
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://live.kuaishou.com/profile/" + ginseng_id,
"content-type": "application/json",
"Cookie": "clientid=3; did=web_79aee2e860f5438d897ddb18f84c1365; client_key=65890b29; didv=1565316325000; kuaishou.live.bfb1s=3e261140b0cf7444a0ba411c6f227d88",
}
return local_headers
def data_analysis(ginseng_data):
local_json_data = json.loads(ginseng_data)['data']['publicFeeds']['list']
works_list = []
works = {}
for lin_data in local_json_data:
works = {
'作品ID:': lin_data['photoId'],
'标题:': lin_data['caption'],
}
works_list.append(works)
return works_list
if __name__ == '__main__':
url = 'https://live.kuaishou.com/graphql'
local_id = 'CFNF88888888'
local_pcursor = ''
local_works = []
# no_more
state = True
while state:
json_data = requests.post(url, headers=sub_head('local_id'), data=sub_data(local_id, local_pcursor))
json_data.encoding = 'UTF-8-SIG' # 乱码解决办法
local_works += data_analysis(json_data.text)
local_pcursor = json.loads(json_data.text)['data']['publicFeeds']['pcursor']
# print(local_pcursor)
if local_pcursor == 'no_more':
state = False
with open('个人列表作品.txt', 'a', encoding='utf8') as fileobject:
for d_key in local_works:
# print(d_key)
fileobject.write(str(d_key) + "\n")
fileobject.close()
print(len(local_works))
源码使用常见错误
1.提示模块requests不存在
# 因为暂时没有虚拟机,无法上图。有虚拟机再上图更新。~
# CMD界面执行
pip install requests
# 模块安装完毕即可