现有的任务基本上都是在shell脚本中写入:hive -e "$sql" 或者 hadoop jar ...
通过这种形式提交到hadoop集群上,若想在脚本运行中kill掉任务,需要进行2步操作
1. kill掉当前shell脚本所在进程(及其启动的子进程)
2. 在hadoop集群上kill掉相应的mapreduce任务。
其中第一步可以通过用脚本实现,前提是获取该shell脚本的pid,下面是使用java启动和停止进程的代码
abstract public class Shell {
public static final Log LOG = LogFactory.getLog(Shell.class);
/** If or not script finished executing */
private volatile AtomicBoolean completed;
private boolean successed = false;
private File dir;
private Process process; // sub process used to execute the command
private int exitCode;
/** return an array containing the command name & its parameters */
protected abstract String[] getExecString();
/**
* set the working directory
*
* @param dir The directory where the command would be executed
*/
protected void setWorkingDirectory(File dir) {
this.dir = dir;
}
/** get current process pid */
public String getPid() throws Exception {
String pid;
/** 通过反射获取pid */
Class clazz = Class.forName("java.lang.UNIXProcess");
Field pidField = clazz.getDeclaredField("pid");
pidField.setAccessible(true);
pid = String.valueOf(pidField.get(process));
LOG.info("Pid:" + pid + " Command:" + getExecString());
return pid;
}
/**
* Run a command
*
* @return errMessage
* @throws IOException
*/
public String runCommand() throws IOException {
Map<String, String> customerEnv = new HashMap<String, String>();
return runCommand(customerEnv);
}
/**
* Run a command
*
* @return errMessage
* @throws IOException
*/
public String runCommand(Map<String, String> customerEnv) throws IOException {
LOG.info("start to run:" + StringUtils.join(" ", getExecString()));
ProcessBuilder builder = new ProcessBuilder(getExecString());
Map<String, String> env = builder.environment();
StringBuilder sb = new StringBuilder();
for (Map.Entry<String, String> entry : customerEnv.entrySet()) {
env.put(entry.getKey(), entry.getValue());
sb.append(entry.getKey()).append(":").append(entry.getValue())
.append(";");
}
if (customerEnv.size() > 0) {
LOG.info("Upload envionment variables size: " + customerEnv.size()
+ "value -> " + sb.toString());
}
completed = new AtomicBoolean(false);
if (dir != null) {
builder.directory(this.dir);
}
process = builder.start();
final BufferedReader errReader = new BufferedReader(
new InputStreamReader(process.getErrorStream()));
BufferedReader inReader = new BufferedReader(
new InputStreamReader(process.getInputStream()));
final StringBuffer errMsg = new StringBuffer();
// read error and input streams as this would free up the buffers
// free the error stream buffer
Thread errThread = new Thread() {
@Override
public void run() {
try {
String line = errReader.readLine();
while ((line != null) && !isInterrupted()) {
errMsg.append(line);
errMsg.append(System.getProperty("line.separator"));
line = errReader.readLine();
}
} catch (IOException ioe) {
LOG.warn("Error reading the error stream", ioe);
}
}
};
try {
errThread.start();
} catch (IllegalStateException ise) {
}
try {
String line = inReader.readLine();
while (line != null) {
line = inReader.readLine();
}
// wait for the process to finish and check the exit code
exitCode = process.waitFor();
try {
// make sure that the error thread exits
errThread.join();
} catch (InterruptedException ie) {
LOG.warn("Interrupted while reading the error stream", ie);
}
completed.set(true);
//the timeout thread handling
//taken care in finally block
if (exitCode != 0) {
throw new IOException(errMsg.toString());
}
successed = true;
} catch (InterruptedException ie) {
throw new IOException(ie.toString());
} finally {
// close the input stream
try {
inReader.close();
} catch (IOException ioe) {
LOG.warn("Error while closing the input stream", ioe);
}
if (!completed.get()) {
errThread.interrupt();
}
try {
errReader.close();
} catch (IOException ioe) {
LOG.warn("Error while closing the error stream", ioe);
}
process.destroy();
}
return errMsg.toString();
}
public boolean isSuccessed() {
return successed;
}
public void setSuccessed(boolean successed) {
this.successed = successed;
}
}
public class StartTaskSh extends Shell {
private String startBashName;
private String userName;
private String timeParameter;
public String getLogFileName() {
return logFileName;
}
public String getUserName() {
return userName;
}
private String logFileName;
public StartTaskSh(String executeDir, String startBashName, String userName,
String timeParameter, String logFileName) {
super();
setWorkingDirectory(new File(executeDir));
this.startBashName = executeDir + "/" + startBashName;
this.userName = userName;
this.timeParameter = timeParameter;
this.logFileName = logFileName;
}
@Override
protected String[] getExecString() {
/** 字符串数组得每个元素都被加了引号 */
String command =
"/bin/sh " + startBashName + " " + timeParameter + " >> " + logFileName
+ " 2>&1";
return new String[] { "su", "-", userName, "-c", command };
}
/**
* Execute the shell command.
*
* @return errMessage
* @throws IOException
*/
public String execute(Map<String, String> customerEnv) throws IOException {
return this.runCommand(customerEnv);
}
}
public class KillTaskSh extends Shell {
private String utilDir;
private String killShellName;
private String pid;
public KillTaskSh(String utilDir, String killShellName, String pid) {
this.utilDir = utilDir;
this.killShellName = killShellName;
this.pid = pid;
}
@Override
protected String[] getExecString() {
return new String[] { "/bin/sh", utilDir + "/" + killShellName, pid };
}
public String execute() throws IOException {
return this.runCommand();
}
}
其中的killShell的脚本如下:
#!/bin/sh
###################
. /etc/profile
. ~/.bash_profile
##################
echo =====================================`date`===============================================
SCRIPT_NAME=$(readlink -f "$0")
dir=`dirname ${SCRIPT_NAME}`
cd $dir
function get_child_pids ()
{
c_pid=$1
while [ ! -z ${c_pid} ]; do
c_pids=${c_pids}" "${c_pid}
c_pid=`ps -ef|awk '{print $2" "$3}'|grep " ${c_pid}"|awk '{print $1}'`
[ -z "${c_pid}" ] && return
for i in ${c_pid}; do
get_child_pids $i
done
done
}
if [[ -z $1 ]]
then
pid=`cat $dir/conf/pid1 | awk -F ':' '{print $2}'`
else
pid=$1
fi
get_child_pids "$pid"
echo $c_pids
for item in ${c_pids};do
kill -9 ${item}
done
public static void killJob(final String applicationId,
final String userName) {
final String command = "yarn application -kill " + applicationId;
Shell shell = new Shell() {
@Override
protected String[] getExecString() {
return new String[] { "su", "-", userName, "-c", command };
}
};
try {
shell.runCommand();
} catch (IOException e) {
LOG.error("Kill application " + applicationId + " error:", e);
}
}
/**
* 读log文件获取applicationId
*/
public static List<String> getApplicationIdFromLog(String fileName)
throws IOException {
File file = new File(fileName);
BufferedReader reader = null;
List<String> applicationList = new ArrayList<>();
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null;
String tag = "Submitted application ";
while ((tempString = reader.readLine()) != null) {
if (tempString.contains(tag)) {
String applicationId = tempString
.substring(tempString.indexOf(tag) + tag.length());
LOG.info("Need to kill " + applicationId);
if (applicationId.length() > 0) {
applicationList.add(applicationId);
}
}
}
reader.close();
} finally {
if (reader != null) {
reader.close();
}
}
return applicationList;
}
则整体的kill操作调用方式如下:
/** call one shell to kill all process */
public void kill() {
final String pid;
final String userName;
try {
pid = startTaskSh.getPid();
userName = startTaskSh.getUserName();
/** kill 进程 */
KillTaskSh killTaskSh = new KillTaskSh(Executor.utilDir,
Executor.killShellName, pid);
killTaskSh.execute();
/** kill job (只针对mapreduce)*/
List<String> applicationList = FileUtils
.getApplicationIdFromLog(startTaskSh.getLogFileName());
for (String application : applicationList) {
FileUtils.killJob(application, userName);
}
} catch (Exception e) {
LOG.error(
"Can not get the pid of this process:",e);
return;
}
}