'https://m.weibo.cn/api/container/getIndex?type=uid&value=2970452952&containerid=1076032970452952''https://m.weibo.cn/api/container/getIndex?type=uid&value=2970452952&containerid=1076032970452952&since_id=4519809071656587'import requests
from pyquery import PyQuery as pq
classweibo():
url ='https://m.weibo.cn/api/container/getIndex?type=uid&value=2970452952&containerid=1076032970452952&'
headers ={'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',}
params ={'since_id':None}
items ={}
t=1defcatch_page(self,num):for i inrange(num):
res = requests.get(url=self.url, headers=self.headers, params=self.params)
length =len(res.json()['data']['cards'])
self.params['since_id']= res.json()['data']['cardlistInfo']['since_id']print(self.params['since_id'])print(length)for l inrange(self.t, length):
self.items[pq(res.json()['data']['cards'][l]['mblog']['text']).text()]= pq(res.json()['data']['cards'][l]['scheme'])
self.t=self.t-1
self.pri()defpri(self):for key in self.items.keys():print(key)print('---------------------------------------------------------------\n')if __name__ =='__main__':
weibo().catch_page(5)
'https://m.weibo.cn/api/container/getIndex?type=uid&value=2970452952&containerid=1076032970452952''https://m.weibo.cn/api/container/getIndex?type=uid&value=2970452952&containerid=1076032970452952&since_id=4519809071656587'import random
import time
import pymongo
from bson.objectid import ObjectId
import requests
from pyquery import PyQuery as pq
classWeibo():
url ='https://m.weibo.cn/api/container/getIndex?uid=2970452952&t=0&luicode=10000011&lfid=100103type%3D1%26amp%3Bq%3D%E6%9D%8E%E5%AD%90%E6%9F%92&containerid=1076032970452952&'#url= 'https://m.weibo.cn/api/container/getIndex?uid=5295256115&t=0&luicode=10000011&lfid=100103type%3D1%26amp%3Bq%3D%E6%9D%8E%E7%88%86%E9%A6%99L&type=uid&value=5295256115&containerid=1076035295256115'
headers ={'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',#'cookie': 'WEIBOCN_FROM=1110006030; _T_WM=42849003436; SCF=Ah2TsZ-I-daaRVDNYTfpUcMA67a2LjSOFNGINsVFKeKqLnfrCHjB6pU1uTBiG8Ei1nyDIkurdgZ6ZQ_QRqwfat0.; SUB=_2A25yoGAyDeRhGeNP6VYU-S7KwzSIHXVuawB6rDV6PUJbktAKLXP3kW1NSfmE-wYTQtGREDJEB1YMZCV2eQqewd63; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W5MIQ8x.zBYcQZcxy6gf6VJ5JpX5K-hUgL.Fo-peoBf1K5c1hn2dJLoI7puIc_uIcLVMcLrIGBt; SSOLoginState=1604587618; ALF=1607179618; XSRF-TOKEN=b477ce; MLOGIN=1; M_WEIBOCN_PARAMS=luicode%3D10000011%26lfid%3D100103type%253D1%2526amp%253Bq%253D%25E6%259D%258E%25E5%25AD%2590%25E6%259F%2592%26uicode%3D10000011%26fid%3D1076032970452952'}
params ={'since_id':None}
count=0defcatch_page(self):#根据分析,since_id为空时爬取微博结束while(True):
time.sleep(random.randint(2,5))
res = requests.get(url=self.url, headers=self.headers, params=self.params)#print(res.text)
length =len(res.json()['data']['cards'])if('since_id'notin res.json()['data']['cardlistInfo']):break
self.params['since_id']= res.json()['data']['cardlistInfo']['since_id']try:for l inrange(0, length):
items ={}
items['text']=pq(res.json()['data']['cards'][l]['mblog']['text']).text()print(pq(res.json()['data']['cards'][l]['mblog']['text']).text())
items['origin_url']= res.json()['data']['cards'][l]['scheme']
items['time']=res.json()['data']['cards'][l]['mblog']['created_at']#img_length = res.json()['data']['cards'][l]['mblog']['pic_num']# for l1 in range(0,img_length):# time.sleep(random.randint(2,5))# items[str(l1)]=requests.get(url=pq(res.json()['data']['cards'][l]['mblog']['pics'])[l1]['large']['url'],headers=self.headers).content
self.count=self.count+1yield items
except Exception as e:print(e)passprint(f"一共爬取了{self.count}条数据",)defset_mongodb(self,dict_):
connection = pymongo.MongoClient('81.69.9.31')
db = connection.weibo
try:
db.liziqi_up.insert_one(dict_
)#print(f"成功插入第{self.count}条数据")except Exception as e1:print(e1)defget_mongodb(self):
connection = pymongo.MongoClient('81.69.xx.xx')
db = connection.weibo
my_set=db.up
#res=my_set.find({time:'01-01'},{0:1})
data=my_set.find_one({'_id':ObjectId('5fa49acf6c51fac720c5fb3a')})print(data['0'])withopen('./demo01.jpg','wb')asfile:file.write(data['0'])if __name__ =='__main__':
weibo=Weibo()for dict_ in weibo.catch_page():
weibo.set_mongodb(dict_)#weibo.get_mongodb()