- 对地址的管理
- 使用env.hosts,所有执行的任务都会自动使用hosts中的主机
- 使用env.roledefs来定义角色
def set_all_hosts():
env.roledefs = {
'namenode': [
'10.60.0.100:22',
],
"datanode": [
'10.60.0.101:22',
'10.60.0.102:22',
],
}
env.password = "123"
env.user = "root"
env.roledefs.update({"all":list(itertools.chain.from_iterable(env.roledefs.values()))})
set_all_hosts()
注:使用env.roledefs.update来手动增加env.roledefs[“all”]角色,表示所有host
- 对用户名、账号和密码的管理
如下设置,是所有hosts使用相同的用户和密码
env.password = "123"
env.user = "root"
如果每个host的用户名不同,可以写在地址中:
user1@10.60.0.100:22
user2@10.60.0.101:22
如果每个host的密码不同,可以采用如下写法:
env.passwords = {
"user1@10.60.0.100:22": "password1",
"user2@10.60.0.101:22": "password2",
}
- task介绍
- 每个task在执行时,会在env.hosts每个机器上,开始执行task,如果env.hosts为空,则会提示输入主机地址。
- 如果在task定义前加了@roles(“namenode”),那么会读取env.roledefs[“namenode”],对其中的所有机器,开始执行task。
- 如果task执行时没有定义env.hosts,也没有使用env.roledefs,但是会调用其他的task(有指定主机),那么也是可以的。这时需要使用execute(other_task)来调用task(execute是fabric提供的)。
def setup():
execute(upload_files)
execute(install)
@roles("namenode")
def init_namenode():
run("hdfs namenode -format")
@roles("all")
def get_hadoop_version():
run("hdfs version")
- 示例代码
#-*- coding:gbk -*-
from fabric.api import *
from fabric.operations import *
from fabric.tasks import *
from fabric.decorators import *
import os
import itertools
remote_root_path = "~/test/"
local_root_path = "./"
res_folder_name = "res/"
src_folder_name = "src/"
dis_folder_name = "dis/"
remote_res_folder_path = remote_root_path + res_folder_name
remote_src_folder_path = remote_root_path + src_folder_name
remote_dis_folder_path = remote_root_path + dis_folder_name
local_res_folder_path = local_root_path + res_folder_name
local_src_folder_path = local_root_path + src_folder_name
def set_all_hosts():
env.roledefs = {
'namenode': [
'10.60.0.100:2121',
],
"datanode": [
"10.60.0.100:2122",
"10.60.0.100:2123",
"10.60.0.100:2124",
"10.60.0.100:2125",
],
}
env.password = "123"
env.user = "root"
env.roledefs.update({"all": list(itertools.chain.from_iterable(env.roledefs.values()))})
set_all_hosts()
def other_using_samples():
hosts = [
'10.60.0.100:2121',
"10.60.0.100:2122",
"10.60.0.100:2123",
"10.60.0.100:2124",
"10.60.0.100:2125",
]
passwords = [
"123",
"123",
"123",
"123",
"123",
]
user = 'root'
for i,host in enumerate(hosts):
password = passwords[i]
addr = user + "@" + host
#对env.hosts进行更新,必须
env.hosts.append(addr)
#对env.passwords进行更新,必须
env.passwords.update({addr: password})
@task
def setup():
#execute(download_files)
execute(upload_files)
execute(dispatch_files)
execute(source_invironment)
execute(set_host_name)
execute(close_firewall)
execute(install)
execute(init_hadoop)
@task
def init_hadoop():
execute(prepare_for_init_hadoop)
execute(init_namenode)
@roles("all")
def prepare_for_init_hadoop():
run("rm -rf /usr/local/hadoop/tmp")
run("rm -rf /usr/local/hadoop/logs")
run("rm -rf /usr/local/hadoop/etc/hadoop/mapred-site.xml.template")
@roles("namenode")
def init_namenode():
run("hdfs namenode -format")
@task
@roles("namenode")
def start_hadoop():
with settings(hide('warnings', 'stdout', 'stderr')):
run("start-dfs.sh")
run("start-yarn.sh")
run("mr-jobhistory-daemon.sh start historyserver")
run("hdfs dfsadmin -report")
@task
@roles("namenode")
def stop_hadoop():
with settings(hide('warnings', 'stdout', 'stderr')):
run("mr-jobhistory-daemon.sh stop historyserver")
run("stop-dfs.sh")
run("stop-yarn.sh")
@task
@roles("all")
def get_hadoop_version():
run("hdfs version")
@roles("all")
def set_host_name():
hostname = run("hostname")
run("echo " + hostname + " > /etc/hostname")
@roles("all")
def close_firewall():
# 关闭firewall
run("systemctl stop firewalld.service")
# 禁止firewall开机启动
run("systemctl disable firewalld.service")
@task
@roles("all")
def upload_files():
run("rm -rf " + remote_res_folder_path)
run("rm -rf " + remote_src_folder_path)
run("mkdir -p " + remote_res_folder_path)
run("mkdir -p " + remote_src_folder_path)
for file_name in os.listdir(local_res_folder_path):
put(local_res_folder_path + file_name, remote_res_folder_path)
for file_name in os.listdir(local_src_folder_path):
put(local_src_folder_path + file_name, remote_src_folder_path)
#@task
@roles("all")
def clean_files():
run("rm -rf ~/testres/ ~/testsrc/ ~/test/*.xml ~/test/profile ~/test/hosts ~/test/slaves")
@roles("all")
def download_files():
with cd(remote_root_path):
run("rm -rf hadoop-2.7.3.tar.gz")
run("wget http://mirrors.cnnic.cn/apache/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz")
run('''wget --no-cookies --no-check-certificate --header "Cookie: gpw_e24=http%3A%2F%2Fwww.oracle.com%2F; oraclelicense=accept-securebackup-cookie" "http://download.oracle.com/otn-pub/java/jdk/7/jdk-7-linux-x64.rpm"
rpm -ivh jdk-7-linux-x64.rpm''')
@task
@roles("all")
def dispatch_files():
with cd(remote_res_folder_path):
run("mv -f profile /etc/profile")
run("mv -f .bashrc ~/.bashrc")
run("mv -f hosts /etc/")
run("mv -f slaves /usr/local/hadoop/etc/hadoop/")
run("mv -f core-site.xml /usr/local/hadoop/etc/hadoop/")
run("mv -f hdfs-site.xml /usr/local/hadoop/etc/hadoop/")
run("mv -f mapred-site.xml /usr/local/hadoop/etc/hadoop/")
run("mv -f yarn-site.xml /usr/local/hadoop/etc/hadoop/")
@task
@roles("all")
def source_invironment():
run("source /etc/profile")
run("source ~/.bashrc")
@roles("all")
def install():
#取消部分输出
#with settings(hide('warnings', 'running', 'stdout', 'stderr'), warn_only=True):
with cd(remote_root_path):
run("tar zxf hadoop-2.7.2.tar.gz -C /usr/local/")
run("chown -R root:root /usr/local/hadoop-2.7.2")
run("rm -rf /usr/bin/java")
with settings(hide('warnings', 'running', 'stdout', 'stderr'), warn_only=True):
run("rpm -e jdk-1.7*")
with cd(remote_root_path):
run("rpm -ivh jdk-7-linux-x64.rpm")
run("rm -rf /usr/local/hadoop")
run("ln -s /usr/local/hadoop-2.7.2 /usr/local/hadoop")
run("rm -rf $HOME/anaconda")
run("bash ~/test/res/Anaconda3-4.3.0-Linux-x86_64.sh -b -p $HOME/anaconda")
run("pip install pyquery")
@task
@roles("all")
def tmp():
run("rm -rf /usr/local/hadoop/logs")
@task
@roles("namenode")
def start_job():
res_files = [
"url_list.txt",
"url_config.txt",
"query_list.txt",
"query_config.txt",
]
src_files = [
"url_mapper.py",
"url_reducer.py",
"query_mapper.py",
"query_reducer.py",
"jobs.py",
"config_loader.py",
"Web.py",
"Util.py",
]
# add execute authority
run("chmod +x {}*.py".format(remote_src_folder_path))
# copy jobs.py, config file, mapper, reducer to remote_root
for file in res_files:
run("cp {}{} {}".format(remote_res_folder_path, file, remote_root_path))
for file in src_files:
run("cp {}{} {}".format(remote_src_folder_path, file, remote_root_path))
#run("python3 {}jobs.py".format(remote_root_path))
@task
@roles("namenode")
def fetch_hadoop_output():
run("hdfs dfs -cat /python/output/*")