chatglm2-6b-int4的使用困难

一只天蝎

已于 2024-08-16 16:21:17 修改

阅读量101

点赞数 5

分类专栏：编程语言---Python 我 want offers. 大模型学习文章标签： python

于 2024-08-16 14:42:31 首次发布

本文链接：https://blog.csdn.net/weixin_45880844/article/details/141250967

版权

我 want offers. 同时被 3 个专栏收录

32 篇文章 1 订阅

订阅专栏

编程语言---Python

27 篇文章 2 订阅

订阅专栏

大模型学习

1 篇文章 0 订阅

订阅专栏

开始：报错信息如下

(venv_net) PS D:\mydatapro\myweb> python SmartVector.py
Failed to load cpm_kernels:Kernel.__init__() missing 2 required positional arguments: 'code' and 'function_names'
c:/mingw/bin/../lib/gcc/mingw32/6.3.0/../../../../mingw32/bin/ld.exe: cannot find -lpthread
collect2.exe: error: ld returned 1 exit status
Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c -shared -o C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so failed.
Load cpu kernel C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels.so failed: Traceback (most recent call last):
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization.py", line 165, in __init__    
    kernels = ctypes.cdll.LoadLibrary(kernel_file)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\ctypes\__init__.py", line 454, in LoadLibrary
    return self._dlltype(name)
           ^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\ctypes\__init__.py", line 376, in __init__
    self._handle = _dlopen(self._name, mode)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^
OSError: [WinError 193] %1 不是有效的 Win32 应用程序。

D:\mydatapro\venv_net\Lib\site-packages\langchain_core\_api\deprecation.py:141: LangChainDeprecationWarning: The method `BaseLLM.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(
Traceback (most recent call last):
  File "D:\mydatapro\myweb\SmartVector.py", line 51, in <module>
    response = llm("你好")
               ^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\_api\deprecation.py", line 170, in warning_emitting_wrapper
    return wrapped(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\language_models\llms.py", line 1190, in __call__
    self.generate(
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\language_models\llms.py", line 880, in generate
    output = self._generate_helper(
             ^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\language_models\llms.py", line 738, in _generate_helper
    raise e
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\language_models\llms.py", line 725, in _generate_helper
    self._generate(
  File "D:\mydatapro\venv_net\Lib\site-packages\langchain_core\language_models\llms.py", line 1431, in _generate
    else self._call(prompt, stop=stop, **kwargs)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\myweb\SmartVector.py", line 37, in _call
    response, _ = self.model.chat(self.tokenizer,prompt,history=self.history,temperature=self.temperature,top_p=self.top_p)        
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^        
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 1028, in chat   
    outputs = self.generate(**inputs, **gen_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\generation\utils.py", line 1648, in generate
    return self.sample(
           ^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\generation\utils.py", line 2730, in sample
    outputs = self(
              ^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 932, in forward 
    transformer_outputs = self.transformer(
                          ^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 828, in forward 
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
                                                                      ^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 638, in forward 
    layer_ret = layer(
                ^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 542, in forward 
    attention_output, kv_cache = self.self_attention(
                                 ^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\modeling_chatglm.py", line 374, in forward 
    mixed_x_layer = self.query_key_value(hidden_states)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization.py", line 500, in forward     
    output = W8A16LinearCPU.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\torch\autograd\function.py", line 574, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization.py", line 246, in forward     
    weight = extract_weight_to_float(quant_w, scale_w, weight_bit_width, quantization_cache=quantization_cache)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization.py", line 228, in extract_weight_to_float
    func(
TypeError: 'NoneType' object is not callable

后来（反正是可以用了）

(venv_net) PS D:\mydatapro\myweb> python SmartVector.py
Load parallel cpu kernel failed C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so: Traceback (most recent call last):
  File "C:\Users\shuhu/.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization.py", line 148, in __init__
    kernels = ctypes.cdll.LoadLibrary(kernel_file)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\ctypes\__init__.py", line 454, in LoadLibrary
    return self._dlltype(name)
           ^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\ctypes\__init__.py", line 376, in __init__
    self._handle = _dlopen(self._name, mode)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: Could not find module 'C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so' (or one of its dependencies). Try using the full path with constructor syntax.

D:\mydatapro\venv_net\Lib\site-packages\langchain_core\_api\deprecation.py:141: LangChainDeprecationWarning: The method `BaseLLM.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(

在这里插入图片描述

这个报错没什么道理啊

因为我的这个目录下面是有这个文件的：
在这里插入图片描述
我后来又试了很多办法：手动加载文件/清除缓存/下载最新的模型文件等等，都没用。

chatglm2-6b却不报错

但是运行非常非常缓慢，截止到写csdn这会，还没有运行出结果
在这里插入图片描述

很久没有这么无语过了

又开始报错？？？？？？？？？？

Traceback (most recent call last):
  File "D:\mydatapro\myweb\SmartVector.py", line 81, in <module>
    llm.load_model(model_path = r'F:\\THUDM\\chatglm2-6b-int4')
  File "D:\mydatapro\myweb\SmartVector.py", line 35, in load_model
    self.tokenizer = AutoTokenizer.from_pretrained(model_path,trust_remote_code=True)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\models\auto\tokenization_auto.py", line 882, in from_pretrained       
    return tokenizer_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\tokenization_utils_base.py", line 2271, in from_pretrained
    return cls._from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\tokenization_utils_base.py", line 2505, in _from_pretrained
    tokenizer = cls(*init_inputs, **init_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\tokenization_chatglm.py", line 69, in __init__
    super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=False, **kwargs)
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\tokenization_utils.py", line 436, in __init__
    self._add_tokens(
  File "D:\mydatapro\venv_net\Lib\site-packages\transformers\tokenization_utils.py", line 544, in _add_tokens
    current_vocab = self.get_vocab().copy()
                    ^^^^^^^^^^^^^^^^
  File "C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\tokenization_chatglm.py", line 112, in get_vocab
    vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
                                                            ^^^^^^^^^^^^^^^
  File "C:\Users\shuhu\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\tokenization_chatglm.py", line 108, in vocab_size
    return self.tokenizer.n_words
           ^^^^^^^^^^^^^^
AttributeError: 'ChatGLMTokenizer' object has no attribute 'tokenizer'. Did you mean: 'tokenize'?