docker 构建scrapyd
Dockerfile
`
FROM selenium/standalone-chrome:85.0-chromedriver-85.0-20200907
USER root
RUN apt-get update && \
apt-get install -y xvfb && \
apt-get install -y python3-distutils && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3 get-pip.py
ENV TimeZone=Asia/Shanghai
RUN ln -snf /usr/share/zoneinfo/$TimeZone /etc/localtime && echo $TimeZone > /etc/timezone
WORKDIR /app
COPY requirements.txt .
COPY ./scrapyd.conf /etc/scrapyd/
EXPOSE 6800
RUN python3 -m pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
COPY . .
CMD scrapyd
`
requirements.txt
`
attrs
Automat
backports.zoneinfo
certifi
cffi
charset-normalizer
colorama
constantly
cryptography
cssselect
dateparser
Deprecated
Distance
environs
Faker
filelock
gerapy-auto-extractor
gne
hyperlink
idna
incremental
itemadapter
itemloaders
jmespath
joblib
loguru
lxml
marshmallow
numpy
packaging
parsel
Protego
pyasn1
pyasn1-modules
pycparser
PyDispatcher
pymongo
pyOpenSSL
pyparsing
python-dateutil
python-dotenv
pytz
pytz-deprecation-shim
PyYAML
queuelib
redis
regex
requests
requests-file
scikit-learn
scipy
Scrapy
scrapy-redis
service-identity
six
threadpoolctl
tldextract
Twisted
typing_extensions
tzdata
tzlocal
urllib3
w3lib
win32-setctime
wrapt
zope.interface
requests
selenium
aiohttp
beautifulsoup4
pyquery
pymysql
redis
pymongo
flask
django
scrapy
scrapyd
scrapyd-client
scrapy-redis
scrapy-splash
`
scrapyd.conf
`
[scrapyd]
eggs_dir = eggs
logs_dir = logs
items_dir =
jobs_to_keep = 5
dbs_dir = dbs
max_proc = 0
max_proc_per_cpu = 10
finished_to_keep = 100
poll_interval = 5.0
bind_address = 0.0.0.0
http_port = 6800
debug = off
runner = scrapyd.runner
application = scrapyd.app.application
launcher = scrapyd.launcher.Launcher
webroot = scrapyd.website.Root
[services]
schedule.json = scrapyd.webservice.Schedule
cancel.json = scrapyd.webservice.Cancel
addversion.json = scrapyd.webservice.AddVersion
listprojects.json = scrapyd.webservice.ListProjects
listversions.json = scrapyd.webservice.ListVersions
listspiders.json = scrapyd.webservice.ListSpiders
delproject.json = scrapyd.webservice.DeleteProject
delversion.json = scrapyd.webservice.DeleteVersion
listjobs.json = scrapyd.webservice.ListJobs
daemonstatus.json = scrapyd.webservice.DaemonStatus
`
创建scrapyd文件夹,将三个文件放进去,然后编译就可以了