"""
@author:zhaocuixa
@date:2025/6/5
@function: 多线程实现
"""
# 获取userID
import requests
import json
import jmespath
import time
from multiprocessing import Process,Lock
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表某一页userid
"""
def getUserids(env,startDate,endDate,accessToken, pageNum, pageSize):
url = env + "/api/bss/internal/bomp/bomp-logdata-adapter/nlpData/getNlpList"
payload = json.dumps({
"pageType": "nlp_detail",
"startDate": startDate,
"endDate": endDate,
"dateRange": {
"startDate": startDate,
"endDate":endDate
},
"startPage":pageNum,
"endPage": pageSize
})
headers = {
'Content-Type': 'application/json',
'access-token': accessToken
}
response = requests.request("POST", url, headers=headers, data=payload)
ans = json.loads(response.text)
# print(ans['data'])
total = ans['data']['total'] # 获取总条数
userids = jmespath.search('data[*].userId',ans['data'])
return total, userids
file_lock = Lock()
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表所有userid
"""
def getUseridList(env, startDate, endDate,accessToken,pageSize,page,i,UseridList,total):
while i<page:
# print(i)
if i != page -1:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
else:
if total - total // pageSize * pageSize != 0:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
file_lock.acquire()
try:
UseridList.extend(userids)
finally:
i = i + 1
file_lock.release()
return
if __name__=="__main__":
env = 'http://xxxx'
accessToken = 'xxx'
startDate = "2025-06-04 09:00"
endDate = "2025-06-04 10:00"
pageNum = 0
pageSize = 10
total, userids = getUserids(env, startDate, endDate, accessToken,pageNum, pageSize)
print('未去重userID数量',total)
UseridList = []
start = time.time()
print("********************** 开始计时 **********************")
if total == 0:
pass
elif total <= pageSize:
UseridList = userids
else:
page = total // pageSize + 1
UseridList = userids
t_list = []
for i in range(1,5):
t = Process(target=getUseridList, args=(env, startDate, endDate,accessToken,pageSize,page,i,UseridList,total)) # 多进程
t_list.append(t)
t.start()
print(t)
for t in t_list:
t.join()
print(t)
print(len(UseridList))
# UseridList1 = list(set(UseridList))
# print('去重userID数量',len(UseridList1))
# print(UseridList1)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:", end - start) # 46
只打印出10个呢,难道数据没有共享吗
用queue试试,成功了
"""
@author:zhaocuixa
@date:2025/6/5
@function: 多线程实现
"""
# 获取userID
import requests
import json
import jmespath
import time
from multiprocessing import Process,Queue,Lock
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表某一页userid
"""
def getUserids(env,startDate,endDate,accessToken, pageNum, pageSize):
url = env + "/api/bss/internal/bomp/bomp-logdata-adapter/nlpData/getNlpList"
payload = json.dumps({
"pageType": "nlp_detail",
"startDate": startDate,
"endDate": endDate,
"dateRange": {
"startDate": startDate,
"endDate":endDate
},
"startPage":pageNum,
"endPage": pageSize
})
headers = {
'Content-Type': 'application/json',
'access-token': accessToken
}
response = requests.request("POST", url, headers=headers, data=payload)
ans = json.loads(response.text)
# print(ans['data'])
total = ans['data']['total'] # 获取总条数
userids = jmespath.search('data[*].userId',ans['data'])
return total, userids
file_lock = Lock()
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表所有userid
"""
def getUseridList(env, startDate, endDate,accessToken,pageSize,page,i,q,total):
while i<page:
# print(i)
if i != page -1:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
else:
if total - total // pageSize * pageSize != 0:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
file_lock.acquire()
try:
q.put(userids)
print('当前线程',i,'现在userid数量',q.qsize())
finally:
i = i + 4
file_lock.release()
return
if __name__=="__main__":
env = 'http://xxx'
accessToken = 'xxx'
startDate = "2025-06-04 09:00"
endDate = "2025-06-04 10:00"
pageNum = 0
pageSize = 10
total, userids = getUserids(env, startDate, endDate, accessToken,pageNum, pageSize)
print('未去重userID数量',total)
UseridList = []
q = Queue()
start = time.time()
print("********************** 开始计时 **********************")
if total == 0:
pass
elif total <= pageSize:
UseridList = userids
else:
page = total // pageSize + 1
UseridList = userids
t_list = []
for i in range(1,5):
t = Process(target=getUseridList, args=(env, startDate, endDate,accessToken,pageSize,page,i,q,total)) # 多进程
t_list.append(t)
t.start()
for t in t_list:
t.join()
print(t)
print(q.qsize())
lst = []
while not q.empty():
lst.extend(q.get())
print(lst+UseridList)
print(len(lst+UseridList))
# flat_list = [item for sublist in lst for item in sublist]
# print(len(flat_list))
# UseridList1 = list(set(UseridList))
# print('去重userID数量',len(UseridList1))
# print(UseridList1)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:", end - start) # 46
from multiprocessing import Manager, Process
def worker(shared_list,i):
shared_list.extend([i])
if __name__ == '__main__':
manager = Manager()
shared_list = manager.list()
# shared_list = [1] # 赋值后就不共享了
processes = [Process(target=worker, args=(shared_list,i)) for i in range(3)]
for p in processes: p.start()
for p in processes: p.join()
print(shared_list) # 输出为[0, 1, 2]
import multiprocessing
def increment(num, lock):
with lock:
num.value += 1
if __name__ == '__main__':
counter = multiprocessing.Value('i', 0)
lock = multiprocessing.Lock()
processes = [multiprocessing.Process(target=increment, args=(counter, lock)) for _ in range(5)]
for p in processes: p.start()
for p in processes: p.join()
print(counter.value) # 输出5
"""
@author:zhaocuixa
@date:2025/6/5
@function: 多进程实现
"""
# 获取userID
import requests
import json
import jmespath
import time
from multiprocessing import Process,Manager,Lock
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表某一页userid
"""
def getUserids(env,startDate,endDate,accessToken, pageNum, pageSize):
url = env + "/api/bss/internal/bomp/bomp-logdata-adapter/nlpData/getNlpList"
payload = json.dumps({
"pageType": "nlp_detail",
"startDate": startDate,
"endDate": endDate,
"dateRange": {
"startDate": startDate,
"endDate":endDate
},
"startPage":pageNum,
"endPage": pageSize
})
headers = {
'Content-Type': 'application/json',
'access-token': accessToken
}
response = requests.request("POST", url, headers=headers, data=payload)
ans = json.loads(response.text)
# print(ans['data'])
total = ans['data']['total'] # 获取总条数
userids = jmespath.search('data[*].userId',ans['data'])
return total, userids
file_lock = Lock()
"""
@author:zhaocuixa
@date:2025/6/5
@function:朱雀用户日志列表接口,获取朱雀列表所有userid
"""
def getUseridList(env, startDate, endDate,accessToken,pageSize,page,i,UseridList1):
while i<page:
if i != page -1:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
else:
if total - total // pageSize * pageSize != 0:
total, userids = getUserids(env,startDate,endDate,accessToken, i, pageSize)
# UseridList1.extend(userids)
UseridList1.extend(list(set(userids)))
print('当前进程',i,'现在数量',len(UseridList1))
file_lock.acquire()
i = i + 4
file_lock.release()
return
if __name__=="__main__":
env = 'http://xxx'
accessToken = 'xxx'
startDate = "2025-06-04 09:00"
endDate = "2025-06-04 10:00"
pageNum = 0
pageSize = 50
total, userids = getUserids(env, startDate, endDate, accessToken,pageNum, pageSize)
print('未去重userID数量',total)
manager = Manager()
UseridList1 = manager.list()
UseridList = []
start = time.time()
print("********************** 开始计时 **********************")
if total == 0:
pass
elif total <= pageSize:
UseridList = userids
else:
page = total // pageSize + 1
UseridList = userids
p_list = []
for i in range(1, 5):
p = Process(target=getUseridList, args=(env, startDate, endDate,accessToken,pageSize,page,i,UseridList1)) # 多进程
p_list.append(p)
p.start()
print(p)
for p in p_list:
p.join()
print(p)
# print(len(UseridList1))
# print(len(UseridList))
# print(len(UseridList1+UseridList))
UseridList0 = list(set(UseridList1+UseridList))
print('去重userID数量',len(UseridList0))
print(UseridList0)
end = time.time()
print("********************** 结束计时 **********************")
print("总耗时:", end - start)