以虚拟环境引入project_demo项目包,并向spark yarn提交spark任务为例:
1)virtualenv local_venv #创建虚拟环境
2)source local_venv/bin/activate #运行虚拟环境
3)pip install -U git+ssh://git@gitlab.xxx.project_demo.git -i http://pypi.xxx.com/simple/ --trusted-host pypi.xxx.com #将project_demo包安装到虚拟环境中
4)zip -r local_venv.zip local_venv #将虚拟环境打包
HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
spark2-submit
–conf spark.pyspark.driver.python=/data/home/xxx/local_venv/bin/python
–conf spark.pyspark.python=./local_venv/local_venv/bin/python
–master yarn
–num-executors 8
–executor-cores 4
–executor-memory 5g