某音太难还没解决 ,有大佬 麻烦提示我一下 hhhhhh 互相学习
3xwrbnfsfzpq83k其中为博主id (爬漂亮的东西更有动力😈)
初步代码默认爬博主前三个视频并且爬第二个的全部评论,先记录一下
import re
import time
import requests
from numpy import random
class TikTok():
def __init__(self):
self.url = "https://www.kuaishou.com/graphql"
self.header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57",
"Cookie": ""}
def get_information(self, userId):
datas = {}
json = {
"operationName": "visionProfile",
"variables": {
"userId": userId
},
"query": "query visionProfile($userId: String) {\n visionProfile(userId: $userId) {\n result\n hostName\n userProfile {\n ownerCount {\n fan\n photo\n follow\n photo_public\n __typename\n }\n profile {\n gender\n user_name\n user_id\n headurl\n user_text\n user_profile_bg_url\n __typename\n }\n isFollowing\n __typename\n }\n __typename\n }\n}\n"
}
response = requests.post(url=self.url, headers=self.header, json=json)
data = response.json()['data']['visionProfile']['userProfile']
fan = data['ownerCount']['fan']
datas['fan'] = fan
follow = data['ownerCount']['follow']
datas['follow'] = follow
photo_public = data['ownerCount']['photo_public']
datas['photo_public'] = photo_public
user_name = data['profile']['user_name']
datas['user_name'] = user_name
user_id = data['profile']['user_id']
datas['user_id'] = user_id
user_text = data['profile']['user_text']
datas['user_text'] = user_text
return datas
def get_vide(self, userId):
datas = {}
json = {
"operationName": "visionProfilePhotoList",
"variables": {
"userId": userId,
"pcursor": "",
"page": "profile"
},
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
}
response = requests.post(url=self.url, headers=self.header, json=json)
for i in range(3):
data = response.json()['data']["visionProfilePhotoList"]["feeds"][i]["photo"]
caption = data["caption"]
datas[f'caption{i}'] = caption
likeCount = data["likeCount"]
datas[f'likeCount{i}'] = likeCount
viewCount = data["viewCount"]
datas[f'viewCount{i}'] = viewCount
realLikeCount = data["realLikeCount"]
datas[f'realLikeCount{i}'] = realLikeCount
photoUrl = data["photoUrl"]
datas[f'photoUrl{i}'] = photoUrl
id = data['id']
datas[f'id{i}'] = id
return datas
def get_comment(self, photoId,pcursor):
json = {
"operationName": "commentListQuery",
"variables": {
"photoId": photoId,
"pcursor": pcursor
},
"query": "query commentListQuery($photoId: String, $pcursor: String) {\n visionCommentList(photoId: $photoId, pcursor: $pcursor) {\n commentCount\n pcursor\n rootComments {\n commentId\n authorId\n authorName\n content\n headurl\n timestamp\n likedCount\n realLikedCount\n liked\n status\n authorLiked\n subCommentCount\n subCommentsPcursor\n subComments {\n commentId\n authorId\n authorName\n content\n headurl\n timestamp\n likedCount\n realLikedCount\n liked\n status\n authorLiked\n replyToUserName\n replyTo\n __typename\n }\n __typename\n }\n __typename\n }\n}\n"
}
response = requests.post(url=self.url, headers=self.header, json=json)
pcursor=response.json()['data']["visionCommentList"]['pcursor']
data=response.json()['data']["visionCommentList"]["rootComments"]
for d in data:
name=d["authorName"]
content=re.sub('[(]O3x[a-z0-9]{13}[)]','',d['content'])
print("{0}:{1}".format(name,content))
return pcursor
if __name__ == '__main__':
tik = TikTok()
inf = tik.get_vide("3xwrbnfsfzpq83k")
photoId=inf['id1']
pcursor=''
while pcursor!="no_more":
pcursor=tik.get_comment(photoId,pcursor)