Xen的调度分析 (三) ——credit调度算法细节

在上一文中,分析了Xen的schedule()函数的主要四个步骤。

(一)首先是消耗信任值函数:

static void burn_credits(struct csched_vcpu *svc, s_time_t now)
{
    s_time_t delta;
    uint64_t val;
    unsigned int credits;

    /* Assert svc is current */
    ASSERT( svc == CSCHED_VCPU(curr_on_cpu(svc->vcpu->processor)) );

    if ( (delta = now - svc->start_time) <= 0 )
        return;

    val = delta * CSCHED_CREDITS_PER_MSEC + svc->residual;
    svc->residual = do_div(val, MILLISECS(1));
    credits = val;
    ASSERT(credits == val); /* make sure we haven't truncated val */
    atomic_sub(credits, &svc->credit);
    svc->start_time += (credits * MILLISECS(1)) / CSCHED_CREDITS_PER_MSEC;
}

delta是用来计算该VCPU已经调度了多久。可以看到,now减去start time就是调度了多久。

val = delta * CSCHED_CREDITS_PER_MSEC + svc->residual;

这句是用来计算需要消耗多少credit值的。

后面加上的是一个余差,不用管他,CSCHED_CREDITS_PER_MSEC 是10。也就是说,val值大概是调度的时间乘以10。

接下来是计算余差的,do_div返回余数,同时将结果保存在val里,可以看到计算的credits值是就是val的1/1000。

然后计算下次start_time的开始时间。

(二)接下来分析VCPU的队列插入

static inline void
__runq_insert(unsigned int cpu, struct csched_vcpu *svc)
{
    /*获取对应cpu的运行队列*/ 
    const struct list_head * const runq = RUNQ(cpu);
    struct list_head *iter;

    BUG_ON( __vcpu_on_runq(svc) );
    BUG_ON( cpu != svc->vcpu->processor );
    /*遍历runq,从runq->next开始,每次循环之后iter指向下一个,直到下一个是runq(循环链表)*/ 
    list_for_each( iter, runq )
    {
        /*获取runq中VCPU节点的地址*/ 
        const struct csched_vcpu * const iter_svc = __runq_elem(iter);
        /*判断svc的优先级是否比当前iter所在vcpu的优先级大*/ 
        if ( svc->pri > iter_svc->pri )
            break;
    }

    /* If the vcpu yielded, try to put it behind one lower-priority
     * runnable vcpu if we can.  The next runq_sort will bring it forward
     * within 30ms if the queue too long. */
    
    if ( test_bit(CSCHED_FLAG_VCPU_YIELD, &svc->flags)
         && __runq_elem(iter)->pri > CSCHED_PRI_IDLE )
    {
        iter=iter->next;

        /* Some sanity checks */
        BUG_ON(iter == runq);
    }
    /*将svc插入到第一个优先级比它小的vcpu的前面*/
    list_add_tail(&svc->runq_elem, iter);
}

VCPU队列采取的是双向循环链表组成,该队列使用的库函数为list.h,来自linux 2.6内核文件。该队列在使用的时候需要注意链表不包含节点的信息。所以需要使用__runq_elem来通过获得节点的信息。

对于每一个iter节点判断节点的优先级是否大于该节点,若是则继续后面的工作。

最后将该节点插入到该节点的前面。

插入过程只与优先级有关,不比较credit的值。

(三)负载平衡

//负载平衡,返回要调度的vcpu的调度信息
static struct csched_vcpu *
csched_load_balance(struct csched_private *prv, int cpu,
    struct csched_vcpu *snext, bool_t *stolen)
{
    
    struct csched_vcpu *speer;
    cpumask_t workers;
    cpumask_t *online;
    int peer_cpu, peer_node, bstep;
    int node = cpu_to_node(cpu);

    BUG_ON( cpu != snext->vcpu->processor );
    online = cpupool_scheduler_cpumask(per_cpu(cpupool, cpu));

    /* If this CPU is going offline we shouldn't steal work. */
    //如果这个CPU已离线我们不该抢他的任务
    if ( unlikely(!cpumask_test_cpu(cpu, online)) )
        goto out;

    if ( snext->pri == CSCHED_PRI_IDLE )
        SCHED_STAT_CRANK(load_balance_idle);
    else if ( snext->pri == CSCHED_PRI_TS_OVER )
        SCHED_STAT_CRANK(load_balance_over);
    else
        SCHED_STAT_CRANK(load_balance_other);

    /*
     * Let's look around for work to steal, taking both vcpu-affinity
     * and node-affinity into account. More specifically, we check all
     * the non-idle CPUs' runq, looking for:
     *  1. any node-affine work to steal first,
     *  2. if not finding anything, any vcpu-affine work to steal.
     */
    //看看有哪些任务可以偷。无论是否有亲和性。
    //注意,我们要检查所有的非空闲状态的CPU运行队列:
    //1 任何非亲和性的工作首先去偷
    //2 没有非亲和性的VCPU,那就无所谓了
    for_each_csched_balance_step( bstep )
    {
        /*
         * We peek at the non-idling CPUs in a node-wise fashion. In fact,
         * it is more likely that we find some node-affine work on our same
         * node, not to mention that migrating vcpus within the same node
         * could well expected to be cheaper than across-nodes (memory
         * stays local, there might be some node-wide cache[s], etc.).
         */
        //我们偷看一下所有的非空闲cpu,使用节点方式。
        //事实上,在同一个node中很容易找到一些对节点亲和性的任务
        //更不用提同一个node上的迁移vcpu会开销少(跨node会带来开销)
        
        peer_node = node;
        do
        {
            /* Find out what the !idle are in this node */
            //查看一下有哪些非空闲的在这个节点中
            cpumask_andnot(&workers, online, prv->idlers);
            cpumask_and(&workers, &workers, &node_to_cpumask(peer_node));
            cpumask_clear_cpu(cpu, &workers);

            peer_cpu = cpumask_first(&workers);
            if ( peer_cpu >= nr_cpu_ids )
                goto next_node;
            do
            {
                /*
                 * Get ahold of the scheduler lock for this peer CPU.
                 *
                 * Note: We don't spin on this lock but simply try it. Spinning
                 * could cause a deadlock if the peer CPU is also load
                 * balancing and trying to lock this CPU.
                 */
                //锁住cpu
                //注意,我们不旋转该锁,我们只是试一下。
                //旋锁会导致死锁,这种情况发生在并行cpu也处于负载平衡中
                //并且尝试锁住该cpu时
                
                spinlock_t *lock = pcpu_schedule_trylock(peer_cpu);

                if ( !lock )
                {
                    SCHED_STAT_CRANK(steal_trylock_failed);
                    peer_cpu = cpumask_cycle(peer_cpu, &workers);
                    continue;
                }

                /* Any work over there to steal? */
                //这边有任何人物可以偷吗
                speer = cpumask_test_cpu(peer_cpu, online) ?
                    csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL;
                pcpu_schedule_unlock(lock, peer_cpu);

                /* As soon as one vcpu is found, balancing ends */
                //只要找到一个VCPU,平衡停止
                if ( speer != NULL )
                {
                    *stolen = 1;
                    return speer;
                }

                peer_cpu = cpumask_cycle(peer_cpu, &workers);

            } while( peer_cpu != cpumask_first(&workers) );

 next_node:
            peer_node = cycle_node(peer_node, node_online_map);
        } while( peer_node != node );
    }

 out:
    /* Failed to find more important work elsewhere... */
    //在其他地方寻找更重要的任务失败
    __runq_remove(snext);
    return snext;
}

这里比较复杂,涉及到了CPU MASK部分的知识。如果不考虑MASK细节,只要关注第二层的do while()函数。第二层判断结束是通过peer_node != node,而peer_node是通过next_node: cycle_node()来实现的。也就是说将每一个node都遍历了。对每一个node,都调用了speer = cpumask_test_cpu(peer_cpu, online) ? csched_runq_steal(peer_cpu, cpu, snext->pri, bstep) : NULL;如果偷成功了,则返回被偷来的speer。

 

转载于:https://www.cnblogs.com/linanwx/p/5369109.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用CloudSim实现基于多维QoS的资源调度算法的示例代码: ```java import org.cloudbus.cloudsim.Cloudlet; import org.cloudbus.cloudsim.CloudletSchedulerSpaceShared; import org.cloudbus.cloudsim.Datacenter; import org.cloudbus.cloudsim.DatacenterBroker; import org.cloudbus.cloudsim.DatacenterCharacteristics; import org.cloudbus.cloudsim.Host; import org.cloudbus.cloudsim.Pe; import org.cloudbus.cloudsim.Storage; import org.cloudbus.cloudsim.Vm; import org.cloudbus.cloudsim.VmAllocationPolicySimple; import org.cloudbus.cloudsim.core.CloudSim; import java.util.ArrayList; import java.util.Calendar; import java.util.List; public class QoSResourceScheduling { public static void main(String[] args) { int numBrokers = 1; // 创建一个代理商 Calendar calendar = Calendar.getInstance(); boolean traceFlag = false; // 关闭日志跟踪 CloudSim.init(numBrokers, calendar, traceFlag); Datacenter datacenter = createDatacenter("Datacenter_0"); DatacenterBroker broker = createBroker(); int brokerId = broker.getId(); List<Vm> vms = createVms(brokerId); List<Cloudlet> cloudlets = createCloudlets(brokerId); broker.submitVmList(vms); broker.submitCloudletList(cloudlets); CloudSim.startSimulation(); List<Cloudlet> finishedCloudlets = broker.getCloudletReceivedList(); CloudSim.stopSimulation(); printCloudletResults(finishedCloudlets); } private static Datacenter createDatacenter(String name) { List<Host> hostList = new ArrayList<Host>(); List<Pe> peList = new ArrayList<Pe>(); int mips = 1000; peList.add(new Pe(0, new PeProvisionerSimple(mips))); int hostId = 0; int ram = 4096; // 主机内存(以MB为单位) long storage = 1000000; // 主机存储容量(以MB为单位) int bw = 10000; // 主机带宽 hostList.add(new Host(hostId, new RamProvisionerSimple(ram), new BwProvisionerSimple(bw), storage, peList, new VmSchedulerSpaceShared(peList))); String arch = "x86"; // 主机架构 String os = "Linux"; // 主机操作系统 String vmm = "Xen"; // 主机监视程序 double time_zone = 10.0; // 主机时区 double cost = 3.0; // 主机每秒的成本 double costPerMem = 0.05; // 主机每MB内存的成本 double costPerStorage = 0.1; // 主机每MB存储的成本 double costPerBw = 0.1; // 主机每Mbps带宽的成本 LinkedList<Storage> storageList = new LinkedList<Storage>(); DatacenterCharacteristics characteristics = new DatacenterCharacteristics(arch, os, vmm, hostList, time_zone, cost, costPerMem, costPerStorage, costPerBw); Datacenter datacenter = null; try { datacenter = new Datacenter(name, characteristics, new VmAllocationPolicySimple(hostList), storageList, 0); } catch (Exception e) { e.printStackTrace(); } return datacenter; } private static DatacenterBroker createBroker() { DatacenterBroker broker = null; try { broker = new DatacenterBroker("Broker"); } catch (Exception e) { e.printStackTrace(); return null; } return broker; } private static List<Vm> createVms(int brokerId) { List<Vm> vms = new ArrayList<Vm>(); int vmId = 0; int mips = 1000; int size = 10000; // 虚拟机大小(以字节为单位) int ram = 512; // 虚拟机内存(以MB为单位) long bw = 1000; // 虚拟机带宽 int pesNumber = 1; // 虚拟机处理单元数量 String vmm = "Xen"; // 虚拟机监视程序 for (int i = 0; i < 10; i++) { Vm vm = new Vm(vmId, brokerId, mips, pesNumber, ram, bw, size, vmm, new CloudletSchedulerSpaceShared()); vms.add(vm); vmId++; } return vms; } private static List<Cloudlet> createCloudlets(int brokerId) { List<Cloudlet> cloudlets = new ArrayList<Cloudlet>(); int cloudletId = 0; long length = 40000; // 执行云任务所需的MI数 long fileSize = 300; // 输入文件大小(以字节为单位) long outputSize = 300; // 输出文件大小(以字节为单位) UtilizationModel utilizationModel = new UtilizationModelFull(); for (int i = 0; i < 10; i++) { Cloudlet cloudlet = new Cloudlet(cloudletId, length, 1, fileSize, outputSize, utilizationModel, utilizationModel, utilizationModel); cloudlet.setUserId(brokerId); cloudlets.add(cloudlet); cloudletId++; } return cloudlets; } private static void printCloudletResults(List<Cloudlet> cloudlets) { System.out.println("Cloudlet ID\tStatus\tData center ID\tVM ID\tTime\tStart Time\tFinish Time"); for (Cloudlet cloudlet : cloudlets) { System.out.println(cloudlet.getCloudletId() + "\t" + cloudlet.getStatusString() + "\t" + cloudlet.getResourceId() + "\t" + cloudlet.getVmId() + "\t" + cloudlet.getActualCPUTime() + "\t" + cloudlet.getExecStartTime() + "\t" + cloudlet.getFinishTime()); } } } ``` 这是一个基于多维QoS的资源调度算法的简单示例代码。你可以根据自己的需求进行调整和扩展。希望对你有所帮助!

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值