背景:适用于IO密集型任务。
应用举例:
-
数据库查询。
-
文件系统操作。
-
网络服务器处理请求。
比如大批量从某个服务获取信息。
代码举例:
def worker_core(json_data): #调用GPT获取数据
response = send_chat_request(
system="You are a helpful assistant.",
examples=[],
question=json_data["conversations"][0]["value"], # prompt,
temperature=0.7,
top_p=0.8,
engine="xxxxxxxxxxxxxxxxx",
max_tokens=2048,
priority=5,
)
import copy
response["data"] = copy.deepcopy(json_data)
return response
def worker(json_data, max_try=10): #每条数据获取3次,并对获取的数据进行处理
for iteration in range(0, 3):
save_path = json_data["meta"]["save_path"]
save_dpath = os.path.dirname(save_path)
save_fname = os.path.basename(save_path)
newd = os.path.join(save_dpath, "gpt4", str(iteration))
new_save_path = os.path.join(newd, save_fname)
if os.path.exists(new_save_path):
return
safe_mkdir(newd)
for i in range(max_try):
result = worker_core(json_data)
print(json_data)
try:
response = check_format(result)
# response = result["response"]
if response != None:
result["raw_response"] = result["response"]
result["response"] = response
with open(new_save_path, "w") as f:
json.dump(result, f, indent=4, ensure_ascii=False)
except Exception as e:
print(e)
if response == None:
result["raw_response"] = result["response"]
result["response"] = None
with open(new_save_path, "w") as f:
json.dump(result, f, indent=4, ensure_ascii=False)
def parallel_execution(demo_json_list, n_jobs=100):
n_jobs = min(n_jobs, len(demo_json_list))
with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
list(tqdm(executor.map(worker, demo_json_list), total=len(demo_json_list)))
parallel_execution(demo_json_list, n_jobs=50) #demo_json_list是json.load的数据