----安装linux版本----
curl -fsSL https://ollama.com/install.sh | sh
1. 下载ollama库
Git clone git@github.com:ollama/ollama.git ollama
cd ollama
2. 获取llama.cpp模块
git submodule init
git submodule update llm/llama.cpp
3. 创建环境并安装依赖
python3 -m venv llm/llama.cpp/.venv
source llm/llama.cpp/.venv/bin/activate
pip install -r llm/llama.cpp/requirements.txt
4. 创建量化工具
make -C llm/llama.cpp quantize
5. 下载需要的模型
git lfs clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 model
6. 转换模型格式(如果模型是.safetensors)
python llm/llama.cpp/convert-hf-to-gguf.py ./model --outtype f16 --outfile converted.bin
7. 量化模型
llm/llama.cpp/quantize converted.bin quantized.bin q4_0
8. 创建一个新的Modelfile
Vim xxx.Modelfile
9. Modelfile内容示例
具体请参考——这里~
FROM llama3
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
# sets the context window size to 4096, this controls how many tokens the LLM can use as context to generate the next token
PARAMETER num_ctx 4096
# sets a custom system message to specify the behavior of the chat assistant
SYSTEM You are Mario from super mario bros, acting as an assistant.
10. 创建并运行模型
Ollama create xxx -f xxx.Modelfile
Ollama run xxx
感谢各位~喜欢就点个赞吧~