Storm IScheduler的初次尝试

最新推荐文章于 2022-07-12 23:54:12 发布

1. 介绍

本章主要介绍 IScheduler 接口的作用以及相关功能。Storm 默认的调度机制非常成熟，它能把 Topology 里面的每一个 Component 平均分配到 Worker 以及 Node 上，但是并不能充分地利用节点资源，导致 Node 上有过多的空闲资源。而 IScheduler 接口能自定义调度方式，用户可以根据业务以及资源的情况来分配具体的 Component 所在的位置。

2. Storm.yaml的配置

1.在 nimbus 节点的 `storm.yaml` 添加配置：

storm.scheduler: "storm.CustomScheduler"

其中CustomScheduler是用户自定义的类名，需要把这个类打成 Jar 包放在 /storm/lib 文件夹里面

2.在 supervisor 节点的 `storm.yaml` 添加配置（非必要，只是方便选择 Node 节点）：

 supervisor.scheduler.meta:
     name: "supervisor01"

3. IScheduler()

此调度是优先把Component放置到第一台机器的Worker上，当第一台机器的worker已全部占用则放置第二台上。

Scheduler.java

package storm;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import backtype.storm.scheduler.Cluster;
import backtype.storm.scheduler.EvenScheduler;
import backtype.storm.scheduler.ExecutorDetails;
import backtype.storm.scheduler.IScheduler;
import backtype.storm.scheduler.SupervisorDetails;
import backtype.storm.scheduler.Topologies;
import backtype.storm.scheduler.TopologyDetails;
import backtype.storm.scheduler.WorkerSlot;

public class DefaultScheduler implements IScheduler {
//  private AssignmentTracker assignmentTracker = new AssignmentTracker();
    private EvenScheduler evenScheduler = new EvenScheduler();
    public void prepare(Map config) {

    }
    public void schedule(Topologies topologies, Cluster cluster) {
//      evenScheduler.schedule(topologies, cluster);
        System.out.println("-------------------------------------offline begin------------------------------------");
        System.out.println("                                              ---------------                                              ");
        System.out.println("                                              ---------------                                              ");
        System.out.println("                                              ---------------                                              ");
        System.out.println("-----------------------------------------------------------------------------------------");

        Collection <WorkerSlot> usedslots = cluster.getUsedSlots();   // 已用的slots列表
        Collection<SupervisorDetails> supervisors = cluster.getSupervisors().values();  // 所有supervisors的信息
        List<String> supervisorsList = new ArrayList<String>();  // 存储supervisor的name
        Map<String, Integer> supervisorSlots = new HashMap<String, Integer>();  // 存储supervisor可用slots的数量
        Map<String, List<WorkerSlot>> supervisorToAvailableslots = new HashMap<String, List<WorkerSlot>>();  // 存储可被使用的slots<WorkerSlot>

        /*
         * 以下检查 supervisors 的 used slot 和 available slot ， 并进行存储及统计
         */
        // 查看已被使用的WorkerSlot
        System.out.println("The used slot: " + usedslots);      
        //获取每个supervisor的可用slot
        for (SupervisorDetails supervisor : supervisors) {
            Map meta = (Map) supervisor.getSchedulerMeta();
            String supervisorName = (String) meta.get("name");
            supervisorsList.add(supervisorName);    
            List<WorkerSlot> availableslots = cluster.getAvailableSlots(supervisor);
            supervisorToAvailableslots.put(supervisorName, availableslots);
            int slotSize = availableslots.size();
            supervisorSlots.put(supervisorName ,slotSize);
        }
        for(String name : supervisorsList) {
            System.out.println("------------------supervisor named " + name + " ------------------");
            System.out.println(name + " have  < " + supervisorSlots.get(name) + " > availableslots and the slots list :" + supervisorToAvailableslots.get(name));
            System.out.println("-------------------------------------end------------------------------------");
        }
        System.out.println("-------------------------------------Check superivisor available slots is over !------------------------------------");     
        /*
         * 以下是topology中component的处理
         */
        System.out.println("-------------------------------------Assign the Component of Topology------------------------------------");
        for ( TopologyDetails topology : topologies.getTopologies()) {

            if (cluster.needsScheduling(topology)) {
//              new EvenScheduler().schedule(topologies, cluster);
                System.out.println("-----The topology :" + topology.getId() + " schedulering now.-----");
//              Collection<ExecutorDetails> executors =topology.getExecutors();   // 获得线程数：getExecutors : [[3, 3], [5, 5], [4, 4], [2, 2], [1, 1]]
                System.out.println(cluster.getNeedsSchedulingComponentToExecutors(topology));
                Collection<ExecutorDetails> UnassignExecutorsUp = cluster.getUnassignedExecutors(topology);
                System.out.println("Executors: " + UnassignExecutorsUp);
                @SuppressWarnings("unchecked") List<String> componentList = (List<String>) topology.getConf().get("components");  // 获取每一个component的的名称：TestSpout、FilterBolt、PrintBolt
                @SuppressWarnings("unchecked") Map<String, Integer> parallelisms = (Map<String,Integer>) topology.getConf().get("parallelisms"); // 获取每个component的并行度

//              List<List<ExecutorDetails>> executorsToSlots = new ArrayList<List<ExecutorDetails>>(); // 每一个slot对应存储的executors;
                // 获取进程的数量
                int workers = topology.getNumWorkers();  
                System.out.println("The Topology need works :" + workers);
                 //获取component之间的比例
                @SuppressWarnings("unchecked") Map<String, Integer> models = (Map<String, Integer>)topology.getConf().get("model"); 
                System.out.println("The basic model : " + models);  
                // 返回需要使用到的supervisor列表
                List<String> useSupervisorList = Utils.WorkerToSlots(workers, supervisorSlots); 
                System.out.println("The list of supervisors which we need to use " + useSupervisorList);
                SupervisorDetails specialSupervisor = null;
                for ( SupervisorDetails supervisor : supervisors) {
                    Map meta = (Map) supervisor.getSchedulerMeta(); 
                    if (meta.get("name") == useSupervisorList.get(0)){
                        specialSupervisor = supervisor;
                    }
                }
                List<WorkerSlot> availableSlots = cluster.getAvailableSlots(specialSupervisor);
                // 
                for ( int i = 0; i < workers; i ++){
                    /*
                     * 产生一个已经分配好executors的slot
                     */
                    // 每分配一次都轮询一下还没被分配的executors
                    Map<String, List<ExecutorDetails>> componentToExecutors = cluster.getNeedsSchedulingComponentToExecutors(topology);  // 获得每个component对应的executors的映射关系
                    Collection<ExecutorDetails> tempExecutors = new ArrayList<ExecutorDetails>();
                    System.out.println("It'is the " + (i + 1) + " worker to scheduling...");
                    for (String component : componentList){
                        List<ExecutorDetails> executors = componentToExecutors.get(component);
                        // 获得component之间的比例系数
                        Number componentModelUp = (Number) models.get(component);
                        int componentModel = componentModelUp.intValue();
                        if ( executors.size() > componentModel){
                            for ( int j =0; j < componentModel; j ++ ) {
                                tempExecutors.add(executors.get(j));
                            }
                        } else {
                            for ( int j =0; j < executors.size(); j ++){
                                tempExecutors.add(executors.get(j));
                            }
                        }
                    }
                    cluster.assign(availableSlots.get(i), topology.getId(), tempExecutors);
                    if ( i == (workers - 1)){
                        System.out.println("The remain executors:" + cluster.getUnassignedExecutors(topology) + "assgin to " + (i+1));
                        Collection<ExecutorDetails> UnassignExecutors = cluster.getUnassignedExecutors(topology);
                        for(ExecutorDetails UnassignExecutor : UnassignExecutors){
                            tempExecutors.add(UnassignExecutor);
                        }
                        cluster.freeSlot(availableSlots.get(i));
                        try {
                            cluster.assign(availableSlots.get(i), topology.getId(), tempExecutors);
                        } catch (Exception e) {
                            // TODO: handle exception
                            System.out.println(e);
                        }

                    }
                    System.out.println("We assign" + tempExecutors + "to" + availableSlots.get(i));
//                  executorsToSlots.add(tempExecutors);
                }

            } else {
                System.out.println("-----The topology :" + topology.getId() + " dosen't need scheduler.-----");
            }
        }
//      new EvenScheduler().
    }
}

Utils.java

package storm;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import java.util.List;

public class Utils {

    public static List<String> WorkerToSlots (int worker, Map<String, Integer> supervisorSlots){
        int _worker = worker;
        int slots = 0;
        int i = 1;
        Map<String, Integer> _supervisorSlots = supervisorSlots;
        List <Integer> list = new ArrayList<Integer>();  //对supervisor里面的slots数量进行存储用来排序用
        List <String> supervisorName = new ArrayList<String>();   //要返回的supervisor列表 
        for (String key : _supervisorSlots.keySet()){
            list.add(_supervisorSlots.get(key));
        }
        Collections.sort(list);  // 对列表进行排序
        while(_worker > slots){
            int n = list.get(list.size()-i);
            slots += n;
            for (String key : _supervisorSlots.keySet()){
                if (_supervisorSlots.get(key) == n) {
                    supervisorName.add(key);
                    _supervisorSlots.put(key, 0); // 如果这个supervisor下的slots够用，则跳出循环，如果不够用至0也是合理的
                    break;
                }
            }
            i ++;
        }


        return supervisorName; 
    }

}