参考博客:http://deo.im/2016/09/23/Using-scikit-learn-in-Celery/
博主在使用django+celery+scikit-learn搭建web页面管理的机器学习项目时发现在tasks中导入sklearn就会报错,如下所示:
Traceback (most recent call last):
File "D:\program\Python35\lib\site-packages\billiard\process.py", line 292, in _bootstrap
self.run()
File "D:\program\Python35\lib\site-packages\billiard\pool.py", line 292, in run
self.after_fork()
File "D:\program\Python35\lib\site-packages\billiard\pool.py", line 395, in after_fork
self.initializer(*self.initargs)
File "D:\program\Python35\lib\site-packages\celery\concurrency\prefork.py", line 58, in process_initializer
app.loader.init_worker()
File "D:\program\Python35\lib\site-packages\celery\loaders\base.py", line 128, in init_worker
self.import_default_modules()
File "D:\program\Python35\lib\site-packages\celery\loaders\base.py", line 116, in import_default_modules
signals.import_modules.send(sender=self.app)
File "D:\program\Python35\lib\site-packages\celery\utils\dispatch\signal.py", line 166, in send
response = receiver(signal=self, sender=sender, **named)
File "D:\program\Python35\lib\site-packages\celery\fixups\django.py", line 73, in on_import_modules
self.worker_fixup.validate_models()
File "D:\program\Python35\lib\site-packages\celery\fixups\django.py", line 173, in validate_models
cmd.check()
File "D:\program\Python35\lib\site-packages\django\core\management\base.py", line 426, in check
include_deployment_checks=include_deployment_checks,
File "D:\program\Python35\lib\site-packages\django\core\checks\registry.py", line 75, in run_checks
new_errors = check(app_configs=app_configs)
File "D:\program\Python35\lib\site-packages\django\core\checks\urls.py", line 13, in check_url_config
return check_resolver(resolver)
File "D:\program\Python35\lib\site-packages\django\core\checks\urls.py", line 23, in check_resolver
for pattern in resolver.url_patterns:
File "D:\program\Python35\lib\site-packages\django\utils\functional.py", line 33, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "D:\program\Python35\lib\site-packages\django\core\urlresolvers.py", line 417, in url_patterns
patterns = getattr(self.urlconf_module, "urlpatterns", self.urlconf_module)
File "D:\program\Python35\lib\site-packages\django\utils\functional.py", line 33, in __get__
res = instance.__dict__[self.name] = self.func(instance)
File "D:\program\Python35\lib\site-packages\django\core\urlresolvers.py", line 410, in urlconf_module
return import_module(self.urlconf_name)
File "D:\program\Python35\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 986, in _gcd_import
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 673, in exec_module
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "D:\work_place\mygit\stock\stock\urls.py", line 21, in <module>
url(r'^',include("website.urls")),
File "D:\program\Python35\lib\site-packages\django\conf\urls\__init__.py", line 52, in include
urlconf_module = import_module(urlconf_module)
File "D:\program\Python35\lib\importlib\__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 986, in _gcd_import
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 673, in exec_module
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "D:\work_place\mygit\stock\website\urls.py", line 3, in <module>
from website import views
File "D:\work_place\mygit\stock\website\views.py", line 14, in <module>
from website import tasks
File "D:\work_place\mygit\stock\website\tasks.py", line 11, in <module>
from sklearn.cluster import KMeans
File "D:\program\Python35\lib\site-packages\sklearn\__init__.py", line 134, in <module>
from .base import clone
File "D:\program\Python35\lib\site-packages\sklearn\base.py", line 13, in <module>
from .utils.fixes import signature
File "D:\program\Python35\lib\site-packages\sklearn\utils\__init__.py", line 11, in <module>
from .validation import (as_float_array,
File "D:\program\Python35\lib\site-packages\sklearn\utils\validation.py", line 23, in <module>
from ..externals.joblib import Memory
File "D:\program\Python35\lib\site-packages\sklearn\externals\joblib\__init__.py", line 127, in <module>
from .parallel import Parallel
File "D:\program\Python35\lib\site-packages\sklearn\externals\joblib\parallel.py", line 25, in <module>
from ._multiprocessing_helpers import mp
File "D:\program\Python35\lib\site-packages\sklearn\externals\joblib\_multiprocessing_helpers.py", line 24, in <module>
_sem = mp.Semaphore()
File "D:\program\Python35\lib\multiprocessing\context.py", line 81, in Semaphore
return Semaphore(value, ctx=self.get_context())
File "D:\program\Python35\lib\multiprocessing\synchronize.py", line 126, in __init__
SemLock.__init__(self, SEMAPHORE, value, SEM_VALUE_MAX, ctx=ctx)
File "D:\program\Python35\lib\multiprocessing\synchronize.py", line 59, in __init__
kind, value, maxvalue, self._make_name(),
File "D:\program\Python35\lib\multiprocessing\synchronize.py", line 117, in _make_name
return '%s-%s' % (process.current_process()._config['semprefix'],
AttributeError: 'Worker' object has no attribute '_config'
可以看到错误是生成的worker里缺少了 _config 这个属性,因此需要在tasks.py中增加如下代码:
from celery.signals import worker_process_init
@worker_process_init.connect
def fix_multiprocessing(**kwargs):
from multiprocessing import current_process
try:
current_process()._config
except AttributeError:
current_process()._config = {'semprefix': '/mp'}
并且在导入sklearn时需要在函数内部导入,如下:
@shared_task
def cluster(center_num):
from sklearn.cluster import KMeans
之后就能够成功的在celery中运行scikit-learn了,不过遗憾的是scikit-learn中的机器学习算法的参数n_jobs不能设置了,一旦设置就会报错。