1、环境、模型准备
conda create -n llamaindex python=3.10
conda activate llamaindex
conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 pytorch-cuda=11.7 -c pytorch -c nvidia
pip install llama-index==0.10.38 llama-index-llms-huggingface==0.2.0 "transformers[torch]==4.41.1" "huggingface_hub[inference]==0.23.1" huggingface_hub==0.23.1 sentence-transformers==2.7.0 sentencepiece==0.2.0
2、下载 Sentence Transformer 模型
cd ~
mkdir llamaindex_demo
mkdir model
cd ~/llamaindex_demo
touch download_hf.py
# 在llamaindex环境下
# 在/root/llamaindex_demo目录下
python download_hf.py
3、下载 NLTK 相关资源
cd /root
git clone https://gitee.com/yzy0612/nltk_data.git --branch gh-pages
cd nltk_data
mv packages/* ./
cd tokenizers
unzip punkt.zip
cd ../taggers
unzip averaged_perceptron_tagger.zip
4、 LlamaIndex HuggingFaceLLM
cd ~/model
ln -s /root/share/new_models/Shanghai_AI_Laboratory/internlm2-chat-1_8b/ ./
cd ~/llamaindex_demo
touch llamaindex_internlm.py
# 增加依赖项
pip install protobuf
pip install einops
conda activate llamaindex
cd ~/llamaindex_demo/
python llamaindex_internlm.py
结果明显不对。
5、 LlamaIndex RAG
# 安装依赖
conda activate llamaindex
pip install llama-index-embeddings-huggingface llama-index-embeddings-instructor
# 创建文件
cd ~/llamaindex_demo
touch llamaindex_RAG.py
conda activate llamaindex
cd ~/llamaindex_demo/
python llamaindex_RAG.py