荔枝直播(分享页):
分享页主页直播列表:(抓包分析)
- 接口:https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1
- 支持获取n页数据,注意请求间隔;
- get请求,获取分享页必须的 liveId 字段
直播分享页:
- html:https://appweb.lizhi.fm/live/share?liveId=5190925580233002038&njId=2552360964061657132&duserId=138542e7ea551a918c42396e0488695b&from=iosBrowser
- 中间两个参数非必须
- get请求,获取 uid 及 liveUrl 字段
- 该页面请求时需设置移动端User-Agent
- ‘User-Agent’:‘Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148’
编码实现:
import requests
import time
import json
import re
class LiveList(object):
def __init__(self, pageLimit=10):
self.liveList = []
self.pageLimit = pageLimit
def getLiveListPage(self, pageNum=1):
url = 'https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=' + str(pageNum)
print(url)
time.sleep(1)
response = requests.get(url)
response.encoding = 'utf-8'
infos = {}
try:
infos = json.loads(response.text)
except Exception as e:
print(e)
infos = {}
infos['ret'] = {}
infos['ret']['dataList'] = []
return (len(infos['ret']['dataList']), infos['ret']['dataList'])
def getLiveListPages(self):
cur_page = 1
cur_len = 10
while cur_len > 0:
if cur_page > self.pageLimit:
break
page_len, page_liveList = self.getLiveListPage(cur_page)
cur_len = page_len
self.liveList.extend(page_liveList)
cur_page = cur_page + 1
return self.liveList
def parseRegular(param="liveUrl"):
liveUrl_r = param + r' = \"([^;]*)\";'
liveUrl_b = re.compile(liveUrl_r , re.DOTALL)
return liveUrl_b
def parseShareURL(liveId):
url1 = "https://appweb.lizhi.fm/live/share?liveId=" + liveId + "&from=iosBrowser"
headers = {
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
}
response_1=requests.get(url1, headers=headers)
time.sleep(1)
response_1.encoding='utf-8'
liveUrl = parseRegular('liveUrl').findall(response_1.text)
userId = parseRegular('userId').findall(response_1.text)
if len(liveUrl) > 0:
return {"liveUrl":liveUrl[0],"userId":userId[0]}
else:
return None
if __name__ == '__main__':
# 建议:每隔1h进行一次刷新
# 获取1页数据
liveListObj = LiveList(1)
liveList = liveListObj.getLiveListPages()
# 打印5页数据的直播记录数
print('liveList count = ',len(liveList))
# 最终结果保存数组
final_res = []
# 遍历直播记录
for liveItem in liveList:
print(liveItem['liveId'])
# 解析当前直播分享页
userInfo = parseShareURL(liveItem['liveId'])
if userInfo is None:
print("直播已结束!")
else:
print('直播中...')
final_item = {**liveItem, **userInfo}
final_res.append(final_item)
# 打印最终结果
print(len(final_res),final_res)
# 结果存入本地json
with open('liveList.json','w') as file_obj:
json.dump(final_res,file_obj)
print('over!!!')
Print:
https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1
liveList count = 10
5190925580233002038
直播中...
5386445300058660864
直播中...
5190898481141075510
直播中...
5190925580232704054
直播中...
5190925580233034294
直播中...
5190898481140796470
直播中...
5342496960865640448
直播中...
5386670725150109696
直播中...
5190898481141102646
直播中...
5190899559177836086
直播中...
10 [{'radioConver': 'http://cdnimg103.lizhi.fm/studio/2020/09/21/2829150432296549942.jpg', 'liveStatus': 1, 'totalCount': '25309', 'liveName': '大凯故事会', 'userName': '大凯说', 'liveId': '5190925580233002038', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2017/05/22/2603223407410991618_80x80.jpg', 'liveStartTime': 1626847200000, 'liveEndTime': 1626861600000, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/6d944f6ab72b3d069517146587a23c39/playlist.m3u8?only-audio=1', 'userId': '2552360964061657132'}, {'radioConver': 'http://cdnimg103.lizhi.fm//studio/2021/07/14/2884134180267993654.jpg', 'liveStatus': 1, 'totalCount': '3154', 'liveName': '日常的午后尬聊', 'userName': '搞事儿ღ 养声糖', 'liveId': '5386445300058660864', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/07/26/2818635229221560322_80x80.jpg', 'liveStartTime': 1626856200000, 'liveEndTime': 1626861600000, 'tag': '脱口秀', 'liveUrl': 'http://pull102.gzlz307.com/home/94aa9ab2951090660da928e2418e5a76/playlist.m3u8?only-audio=1', 'userId': '14298657'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2020/10/19/2834415944099236918.jpg', 'liveStatus': 1, 'totalCount': '10128', 'liveName': '性感男神在线直播', 'userName': '王帅帅☀幸好有你', 'liveId': '5190898481141075510', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/04/25/2869209908380339202_80x80.jpg', 'liveStartTime': 1626854400000, 'liveEndTime': 1626856200000, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/aef22384abe68f32729d855ee12b99bc/playlist.m3u8?only-audio=1', 'userId': '2545439804331933740'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/07/05/2882414548083745334.jpg', 'liveStatus': 1, 'totalCount': '11968', 'liveName': '百变老舅正在直播', 'userName': '老舅⁹ 招主播', 'liveId': '5190925580232704054', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/07/05/2882441263372247554_80x80.jpg', 'liveStartTime': 1626854419000, 'liveEndTime': 1626861619000, 'tag': '脱口秀', 'liveUrl': 'http://pull102.gzlz307.com/home/2b5aa92a75c9a6b76fafebd6e38d218d/playlist.m3u8?only-audio=1', 'userId': '5086955461592188972'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/01/04/2848695138400300598.jpg', 'liveStatus': 1, 'totalCount': '2794', 'liveName': '夺宝、塔罗牌好运直播间❤️', 'userName': '墨子轩🍄招主播', 'liveId': '5190925580233034294', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/12/02/2842569104138020866_80x80.jpg', 'liveStartTime': 1626856200000, 'liveEndTime': 1626859800000, 'tag': '古风', 'liveUrl': 'http://pull102.gzlz307.com/home/02e57ae2959d3a3104caf23561438501/playlist.m3u8?only-audio=1', 'userId': '2555204161544508972'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/03/16/2861859265127815734.jpg', 'liveStatus': 1, 'totalCount': '1279', 'liveName': '今天也是圆气满满的一天鸭', 'userName': '٩🥳۶圆气满满鸭~🥀', 'liveId': '5190898481140796470', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/05/25/2874894204829581314_80x80.jpg', 'liveStartTime': 1626948000000, 'liveEndTime': 1626951600000, 'tag': '', 'liveUrl': 'http://pull102.gzlz307.com/home/6c9eec0f07b9d9c31ba9a63a090e42ad/playlist.m3u8?only-audio=1', 'userId': '5037691755063110188'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/06/18/2879319034757152310.jpg', 'liveStatus': 1, 'totalCount': '10569602', 'liveName': '恋行男友', 'userName': '恋行-高福利招人💝', 'liveId': '5342496960865640448', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/05/21/2874104725109944834_80x80.jpg', 'liveStartTime': 1621590400000, 'liveEndTime': 1624182400000, 'tag': '男友', 'liveUrl': 'http://pull102.gzlz307.com/home/3ceb87599a87bc1e7152ad4419faf21b/playlist.m3u8?only-audio=1', 'userId': '5023457267080509996'}, {'radioConver': 'http://cdnimg103.lizhi.fm//studio/2021/07/07/2882818898805649974.jpg', 'liveStatus': 1, 'totalCount': '1414', 'liveName': '甜妹求带飞上星星✨', 'userName': 'dy.奶糖_Jenny', 'liveId': '5386670725150109696', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2021/07/07/2882783128169811970_80x80.jpg', 'liveStartTime': 1626856323000, 'liveEndTime': 1626859923000, 'tag': '情感', 'liveUrl': 'http://pull102.gzlz307.com/home/1c158cec050b10c5545fef19be825eed/playlist.m3u8?only-audio=1', 'userId': '5022849964157813292'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/07/20/2885259126691449398.jpg', 'liveStatus': 1, 'totalCount': '24076', 'liveName': '♬.星辰音乐电台 --温暖治愈', 'userName': '星辰✨冠名LuLu👑', 'liveId': '5190898481141102646', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/09/14/2827924329089217026_80x80.jpg', 'liveStartTime': 1626856200000, 'liveEndTime': 1626867000000, 'tag': '连线', 'liveUrl': 'http://pull102.gzlz307.com/home/160aaf1435dd8720a197c69e98632035/playlist.m3u8?only-audio=1', 'userId': '2679352497711647276'}, {'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/05/24/2874608674803885622.jpg', 'liveStatus': 1, 'totalCount': '237', 'liveName': '🍷别拿豆包 不当干粮🍷', 'userName': 'DJ安哥🍷天籁', 'liveId': '5190899559177836086', 'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/08/31/2825278312823928834_80x80.jpg', 'liveStartTime': 1626858000000, 'liveEndTime': 1626861600000, 'tag': '情感', 'liveUrl': 'http://pull102.gzlz307.com/home/c45cbdb813f2a0521b3a4d077033b373/playlist.m3u8?only-audio=1', 'userId': '5130700645405291820'}]
over!!!
得到记录信息:
{'radioConver': 'http://cdnimg103.lizhi.fm/studio/2021/05/24/2874608674803885622.jpg',
'liveStatus': 1,
'totalCount': '237',
'liveName': '🍷别拿豆包 不当干粮🍷',
'userName': 'DJ安哥🍷天籁',
'liveId': '5190899559177836086',
'userPortrait': 'http://cdnimg103.lizhi.fm/user/2020/08/31/2825278312823928834_80x80.jpg',
'liveStartTime': 1626858000000,
'liveEndTime': 1626861600000,
'tag': '情感',
'liveUrl': 'http://pull102.gzlz307.com/home/c45cbdb813f2a0521b3a4d077033b373/playlist.m3u8?only-audio=1',
'userId': '5130700645405291820'}
多进程的优化
import concurrent.futures
group_max_workers = 5
def evaluate_item(tests):
return tests
item_x_list = []
with concurrent.futures.ProcessPoolExecutor(
max_workers=group_max_workers
) as executor:
results = executor.map(evaluate_item, [[1,2,3,4,5],[2,2,3,4,5],[3,2,3,4,5],[4,2,3,4,5],[5,2,3,4,5],[6,2,3,4,5],[7,2,3,4,5],[8,2,3,4,5],[9,2,3,4,5],[10,2,3,4,5]])
for result in results:
item_x_list.extend(result)
print(item_x_list)
# [1, 2, 3, 4, 5, 2, 2, 3, 4, 5, 3, 2, 3, 4, 5, 4, 2, 3, 4, 5, 5, 2, 3, 4, 5, 6, 2, 3, 4, 5, 7, 2, 3, 4, 5, 8, 2, 3, 4, 5, 9, 2, 3, 4, 5, 10, 2, 3, 4, 5]
import requests
import time
import json
import re
import concurrent.futures
class LiveList(object):
def __init__(self, pageLimit=10):
self.liveList = []
self.pageLimit = pageLimit
def getLiveListPage(self, pageNum=1):
url = 'https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=' + str(pageNum)
print(url)
time.sleep(1)
response = requests.get(url)
response.encoding = 'utf-8'
infos = {}
try:
infos = json.loads(response.text)
except Exception as e:
print(e)
infos = {}
infos['ret'] = {}
infos['ret']['dataList'] = []
return (len(infos['ret']['dataList']), infos['ret']['dataList'])
def getLiveListPages(self):
cur_page = 1
cur_len = 10
while cur_len > 0:
if cur_page > self.pageLimit:
break
page_len, page_liveList = self.getLiveListPage(cur_page)
cur_len = page_len
self.liveList.extend(page_liveList)
cur_page = cur_page + 1
return self.liveList
def parseRegular(param="liveUrl"):
liveUrl_r = param + r' = \"([^;]*)\";'
liveUrl_b = re.compile(liveUrl_r , re.DOTALL)
return liveUrl_b
def parseShareURL(liveId):
url1 = "https://appweb.lizhi.fm/live/share?liveId=" + liveId + "&from=iosBrowser"
headers = {
'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
}
response_1=requests.get(url1, headers=headers)
# time.sleep(0.5)
response_1.encoding='utf-8'
liveUrl = parseRegular('liveUrl').findall(response_1.text)
userId = parseRegular('userId').findall(response_1.text)
if len(liveUrl) > 0:
return {"liveUrl":liveUrl[0],"userId":userId[0]}
else:
return None
def dealLiveItemForUserInfo(liveItem):
userInfo = parseShareURL(liveItem['liveId'])
if userInfo is None:
print("直播已结束!")
return None
else:
print('直播中...')
final_item = {**liveItem, **userInfo}
return final_item
def dealLiveListAsFinalResWithWokers(liveList, max_works=5):
# 默认开启5个进程 并行处理 当前直播分享页 的解析
# 最终结果保存数组
final_res = []
with concurrent.futures.ProcessPoolExecutor(
max_workers=max_works
) as executor:
results = executor.map(dealLiveItemForUserInfo,liveList)
for result in results:
if result is not None:
final_res.append(result)
return final_res
def saveFinaleResJsonToLocalPath(final_res, l_path='liveList.json'):
with open('liveList.json','w') as file_obj:
json.dump(final_res,file_obj)
if __name__ == '__main__':
pageLimit = 1
liveListObj = LiveList(pageLimit)
liveList = liveListObj.getLiveListPages()
final_res = dealLiveListAsFinalResWithWokers(liveList, max_works=5)
saveFinaleResJsonToLocalPath(final_res,'liveList.json')
print('共请求 ' + str(pageLimit) + '页数据\n',
'共获取 ' + str(len(liveList)) + '条主播记录\n',
'共保存 ' + str(len(final_res)) + '条有效记录\n',
'over!!!')
Log:
https://appweb.lizhi.fm/smallApp/getLiveList?pageNum=1
直播中...
直播中...
直播中...
直播中...
直播中...
直播中...
直播中...
直播中...
直播中...
直播中...
共请求 1页数据
共获取 10条主播记录
共保存 10条有效记录
over!!!