# 如果通过多卡去部署13b会发现两个卡会占用两个进程 rank=0 rank=1,这种情况 会导致程序死掉,所以无法直接部署
class Config(BaseModel):
prompts: List[str]=["""\
import socket
def ping_exponential_backoff(host: str):"""]
max_gen_len: Optional[int] = None
temperature: float = 0.2
top_p: float = 0.90
if dist.get_rank() == 0:
@app.post("/llama/")
def generate(config: Config):
prompts = [config.prompts[0]]
print(prompts)
max_gen_len = config.max_gen_len
temperature = config.temperature
top_p = config.top_p
dist.broadcast_object_list([config.prompts, config.max_gen_len,
config.temperature, config.top_p])
#print(instructions,max_gen_len,temperature,top_p)
results = generator.text_completion(
prompts, # type: ignore
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
print(results)
return {"responses": results}
uvicorn.run(app, host="127.0.0.1", port=5000)
else:
while True:
config = [None] * 4
try:
dist.broadcast_object_list(config)
generator.text_completion(
config[0], max_gen_len=config[1], temperature=config[2],
top_p=config[3]
)
except:
pass
通过判断rank进行部署,这样就不会报错了^_^