import os
import platform
import signal
from transformers import AutoTokenizer, AutoModel,AutoModelForCausalLM
import readline
import torch.nn as nn
# os.environ["CUDA_VISIBLE_DEVICES"] = '1'
max_memory_mapping = {0 : "10GB", 1 :"11GB"}
tokenizer = AutoTokenizer.from_pretrained('your_model_path', trust_remote_code=True)
model = AutoModel.from_pretrained("your_model_path",
device_map= 'auto',
load_in_8bit = False,
max_memory = max_memory_mapping,
trust_remote_code=True).half().cuda()
model = model.eval()