这个学期初参加了华为软件精英挑战赛,经过了一个月的努力,最终取得了一个差强人意的成绩。在杭厦赛区初赛位列40名,20-40名的成绩实在是太接近,离晋级差了1分,无奈技不如人,分数上的微小差距可能是技术上的巨大差距。这次比赛门槛比较低,很多人说是华为比赛最水的一届,也有人称这是一届华为调参大赛。我能进入赛区的64强也是侥幸。
赛题:
简单描述,根据2-3月的虚拟机使用情况,预测接下来1-2星期的虚拟机使用情况(这一部分需要用机器学习的知识建模),然后将预测的虚拟机装入物理服务器中,要求资源使用率达到最大。(这部分便是装箱问题的解决)。
赛题详细文档链接:http://codecraft.devcloud.huaweicloud.com/home/detail
大致思路:
预测部分使用简单的多元线性回归,将前50天的数据加上时间参数进行多元回归,采用梯度下降法拟合模型。数据预处理是一个关键点,最终使用了加权平均法处理。最近几天发现自己写代码的指数平滑公式写错了,导致使用了指数平滑比使用加权平均法分数低很多。听说使用3次指数平滑就达能到了240+。这也算是给我一个教训,阅读文档要仔细,不要只看一半就迫不及待开始
写程序。最终打入决赛的同学应该都实现了更高级的模型,LSTM,ARIMA。
装箱部分,我查阅了许多资料,有first-fit,next-fit,ffd算法,为了提高分数,我在此基础之上加上了启发式算法,不过在练习阶段的测试,分数一直一样。后来,我有采用背包算法来装箱,采用动态规划尽量装满一个背包,之后再拿出新的背包,当我兴奋地提交时却发现分数还是一样的。。。
初赛正式提交时,我备份了差不多10个版本,ffd算法装箱的版本得到了220分,模拟退火法版本出现了资源超分的问题(应该是程序代码有bug),背包算法版本得到了237分。
吐槽:
比赛进行一半的时候,华为官方在后台做出了调整,大部分参赛选手都下降了8,9分。但是华为官方却没有做出实际有意义的说明,只是说“评测机制有变化”。这使得大部分人都停滞不前,我在初赛结束时都还不知道机制哪里变化了。
本次在班里拉了2个同学组队,结果是我一个人完成了全部代码,一个人写代码,特别是机器学习的部分有很大的局限性。大学里一直没有找到计算机专业志同道合的朋友,也是一种遗憾。
PS:如果明年杭电有同学参加华为软件精英挑战赛可以找我一起组队!!!
程序代码:
程序感觉除了我应该没人看得懂,完全是针对这个比赛写的,期间我自己也不知道哪个变量是什么含义。
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
public class Predict {
public static String[] predictVm(String[] ecsContent, String[] inputContent) {
/** =========do your work here========== **/
// 虚拟机类
class VirtualMachine {
String vmname;
int cpu;
int mem;
int index;// 还不知道能不能删去,待定
public VirtualMachine(String vmname, int cpu, int mem) {
super();
this.vmname = vmname;
this.cpu = cpu;
this.mem = mem / 1024;
}
public VirtualMachine() {
super();
// TODO Auto-generated constructor stub
}
@Override
public VirtualMachine clone() {
return new VirtualMachine(vmname,cpu,mem);
}
}
;
// 主机类
class Server {
int CPU;
int MEM;
int avariable_CPU_SIZE;
int avariable_MEM_SIZE;
Map<VirtualMachine,Integer> VM_MAP= new HashMap<>();
public Server(int cPU, int mEM) {
CPU = cPU;
MEM = mEM;
this.avariable_CPU_SIZE = CPU;
this.avariable_MEM_SIZE = MEM;
}
public Server() {
super();
// TODO Auto-generated constructor stub
}
public boolean add(VirtualMachine vm) {
if (this.avariable_CPU_SIZE >= vm.cpu && this.avariable_MEM_SIZE >= vm.mem) {
if(VM_MAP.containsKey(vm)) {
VM_MAP.put(vm,VM_MAP.get(vm)+1);
}else {
VM_MAP.put(vm, 1);
}
this.avariable_CPU_SIZE -=vm.cpu;
this.avariable_MEM_SIZE -= vm.mem;
return true;
} else {
return false;
}
}
public Server clone() {
Server copy=new Server(this.CPU,this.MEM);
copy.avariable_CPU_SIZE=this.avariable_CPU_SIZE;copy.avariable_MEM_SIZE=this.avariable_MEM_SIZE;
for(VirtualMachine vm:this.VM_MAP.keySet()) {
copy.VM_MAP.put(vm,this.VM_MAP.get(vm));
}
return copy;
}
}
;
// compare接口
Comparator<Server> ServerCPU_cmp = new Comparator<Server>() {
@Override
public int compare(Server o1, Server o2) {
// TODO Auto-generated method stub
if (o1.avariable_CPU_SIZE > o2.avariable_CPU_SIZE) {
return 1;
} else if (o1.avariable_CPU_SIZE == o2.avariable_CPU_SIZE) {
if (o1.avariable_MEM_SIZE > o2.avariable_MEM_SIZE) {
return 1;
} else if (o1.avariable_MEM_SIZE == o2.avariable_MEM_SIZE) {
return 0;
} else {
return -1;
}
} else {
return -1;
}
}
};
Comparator<Server> ServerMEM_cmp = new Comparator<Server>() {
@Override
public int compare(Server o1, Server o2) {
// TODO Auto-generated method stub
if (o1.avariable_MEM_SIZE > o2.avariable_MEM_SIZE) {
return 1;
} else if (o1.avariable_MEM_SIZE == o2.avariable_MEM_SIZE) {
if (o1.avariable_CPU_SIZE > o2.avariable_CPU_SIZE) {
return 1;
} else if (o1.avariable_CPU_SIZE > o2.avariable_CPU_SIZE) {
return 0;
} else {
return -1;
}
} else {
return -1;
}
}
};
Comparator<VirtualMachine> VM_CPU_cmp = new Comparator<VirtualMachine>() {
@Override
public int compare(VirtualMachine o1, VirtualMachine o2) {
// TODO Auto-generated method stub
if (o1.cpu > o2.cpu) {
return -1;
} else if (o1.cpu == o2.cpu) {
if (o1.mem > o2.mem) {
return -1;
} else if (o1.mem == o2.mem) {
return 0;
} else {
return 1;
}
} else {
return 1;
}
}
};
Comparator<VirtualMachine> VM_MEM_cmp = new Comparator<VirtualMachine>() {
@Override
public int compare(VirtualMachine o1, VirtualMachine o2) {
// TODO Auto-generated method stub
if (o1.mem > o2.mem) {
return -1;
} else if (o1.mem == o2.mem) {
if (o1.cpu > o2.cpu) {
return -1;
} else if (o1.cpu == o2.cpu) {
return 0;
} else {
return 1;
}
} else {
return 1;
}
}
};
int N = 50; // w向量长度
int RANGE = 100000; // 训练次数
double learningrate = 0.0001; // 学习率
double daylearningrate = 0.2; // 时间坐标学习率
Map<String, VirtualMachine> VMmap = new HashMap<>(); // 虚拟机名称->虚拟机类的映射
List<String> VMKINDList = new ArrayList<String>(); // index->虚拟机名称的映射
// 解析输入文件的信息物理服务器的 cpu,mem规模
String[] CPUCong = inputContent[0].split(" ");
Integer CPUSIZE = Integer.valueOf(CPUCong[0]);
Integer MEMSIZE = Integer.valueOf(CPUCong[1]);
// 得到预测虚拟机种类,优化参数,开始结束时间
Integer VMKINDS = Integer.valueOf(inputContent[2]);
for (int i = 3; i < 3 + VMKINDS; i++) {
String[] split = inputContent[i].split(" ");
VirtualMachine virtualmachine = new VirtualMachine();
virtualmachine.vmname = split[0];
virtualmachine.cpu = Integer.valueOf(split[1]);
virtualmachine.mem = Integer.valueOf(split[2])/1024;
virtualmachine.index = i - 3;//
VMmap.put(split[0], virtualmachine);
VMKINDList.add(split[0]);
}
Set<String> VMset = VMmap.keySet();
String OPTIMIZEPARA = inputContent[4 + VMKINDS];
String STARTTIME = inputContent[6 + VMKINDS];
String ENDTIME = inputContent[7 + VMKINDS];
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date starttime = null;
Date endtime = null;
try {
starttime = formatter.parse(STARTTIME);
endtime = formatter.parse(ENDTIME);
} catch (ParseException e) {
e.printStackTrace();
}
int predict_days = (int) ((endtime.getTime() - starttime.getTime()) / (24 * 60 * 60 * 1000));
// 获得训练天数
String TrainStartTime = ecsContent[0].split("\t")[2].split(" ")[0];
String TrainENDTime = ecsContent[ecsContent.length - 1].split("\t")[2].split(" ")[0];
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
Date beginDate = null;
Date endDate = null;
try {
beginDate = (Date) format.parse(TrainStartTime);
endDate = (Date) format.parse(TrainENDTime);
} catch (ParseException e) {
e.printStackTrace();
}
int Days = (int) ((endDate.getTime() - beginDate.getTime()) / (24 * 60 * 60 * 1000)) + 1;
double[][] W = new double[VMKINDS][N];
double[] b = new double[VMKINDS];
double time_parameter[]=new double[VMKINDS];
double[][] history = new double[Days][VMKINDS];
Arrays.fill(time_parameter, 0.85);
// 获得历史数据
for (int i = 0; i < ecsContent.length; i++) {
String[] array = ecsContent[i].split("\t");
String flavorName = array[1];
if (VMset.contains(flavorName)) {
String TempTime = ecsContent[i].split("\t")[2].split(" ")[0];
Date TempDate = null;
try {
TempDate = (Date) format.parse(TempTime);
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
int day = (int) ((TempDate.getTime() - beginDate.getTime()) / (24 * 60 * 60 * 1000));
int kind = VMmap.get(flavorName).index;
history[day][kind] += 1;
}
}
DataAverage(history);
// 开始训练
int traning_day=Days-N;//训练的天数
for (int epoch = 0; epoch < RANGE; epoch++) {
if (epoch==80000)
learningrate *= 0.95;
for (int k = 0; k < VMKINDS; k++) {
double[] W_gradient=new double[N];
double b_gradient=0;
double day_gradient=0;
double[] x=new double[N];
for (int i = N; i < Days; i++) {
double[] predict = new double[VMKINDS];
for (int j = i - 1; j >= i - N; j--) {
x[i-1-j]+=history[j][k];
predict[k] += history[j][k] * W[k][i - 1 - j];
}
double partday=((double)i)/(Days+predict_days);
predict[k] += b[k]+time_parameter[k]*partday;
predict[k] = RELU(predict[k]);
double loss= predict[k] - history[i][k];
b_gradient+=2*loss/traning_day;
day_gradient+=2*loss*partday/traning_day;
for(int j=0;j<N;j++) {
W_gradient[j]+=2*history[i-1-j][k]*loss/traning_day;
}
}
for (int i=0; i<N; i++) {
W[k][i] -= W_gradient[i]* learningrate;
}
b[k] -= b_gradient* learningrate;
time_parameter[k]-=day_gradient*daylearningrate;
}
}
// 得到结果矩阵
double[][] VMpredict = new double[predict_days][VMKINDS];
for (int day = 0; day < predict_days; day++) {
for (int k = 0; k < VMKINDS; k++) {
double predictvaule = 0;
for (int i = 0; i < N; i++) {
if (day - i - 1 >= 0) {
predictvaule += VMpredict[day - i - 1][k] * W[k][i];
} else {
predictvaule += history[Days + (day - i - 1)][k] * W[k][i];
}
}
VMpredict[day][k] = RELU(predictvaule + b[k]+time_parameter[k]*((day+Days)/(double)(Days+predict_days)));
}
}
// 开始进行输出操作
List<String> resultlist = new ArrayList<String>();
double[] kind_sum_d = new double[VMKINDS];
int[] kind_sum = new int[VMKINDS];
// 统计各类虚拟机数量
int total_sum = 0;
List<VirtualMachine> virtualmachine_list = new ArrayList<>();
List<Server> server_list = new ArrayList<>();
for (int i = 0; i < VMKINDS; i++) {
for (int day = 0; day < predict_days; day++) {
kind_sum_d[i] +=VMpredict[day][i];
}
kind_sum[i]=(int) kind_sum_d[i];
for (int k = 0; k < kind_sum[i]; k++) {
String vmname = VMKINDList.get(i);
VirtualMachine virtualMachine = VMmap.get(vmname);
virtualmachine_list
.add(new VirtualMachine(virtualMachine.vmname, virtualMachine.cpu, virtualMachine.mem));
}
total_sum += kind_sum[i];
}
String total_number = String.valueOf(total_sum);
resultlist.add(total_number);
for (int i = 0; i < VMKINDS; i++) {
resultlist.add(VMKINDList.get(i) + " " + kind_sum[i]);
}
resultlist.add("");
//开始背包操作处理
Server[][] Server_Record=null;
if(OPTIMIZEPARA.equals("CP")) {
//以CPU为维度优化资源
while(!isempty(kind_sum)) {
Server[][] Server_CPU=new Server[MEMSIZE+1][VMKINDS+1];
for(int i=0;i<MEMSIZE+1;i++) {
for(int j=0;j<VMKINDS+1;j++) {
Server_CPU[i][j]=new Server(CPUSIZE,MEMSIZE);
}
}
for(int k=1;k<VMKINDS+1;k++) {
String vm_name=VMKINDList.get(k-1);//虚拟机名称
VirtualMachine virtualMachine = VMmap.get(vm_name);
int vm_cpu=virtualMachine.cpu;
int vm_mem=virtualMachine.mem;
for(int v=1;v<MEMSIZE+1;v++) {
List<Server> serverlist=new ArrayList<>();
for(int i=0;i<=v/vm_mem&&i<=kind_sum[k-1];i++) {
if(i*vm_cpu<=Server_CPU[v-i*vm_mem][k-1].avariable_CPU_SIZE) {
Server clone = Server_CPU[v-i*vm_mem][k-1].clone();
for(int j=0;j<i;j++) {
clone.add(virtualMachine);
}
serverlist.add(clone);
}
}
Collections.sort(serverlist,ServerCPU_cmp);
Server_CPU[v][k]=serverlist.get(0);
}
}
Server server_temp=Server_CPU[MEMSIZE][VMKINDS];
//数量减少
server_list.add(server_temp);
for(VirtualMachine vm:server_temp.VM_MAP.keySet()) {
kind_sum[VMmap.get(vm.vmname).index]-=server_temp.VM_MAP.get(vm);
}
}
}else {
//以MEM为维度优化资源
while(!isempty(kind_sum)) {
Server[][] Server_MEM=new Server[CPUSIZE+1][VMKINDS+1];
for(int i=0;i<CPUSIZE+1;i++) {
for(int j=0;j<VMKINDS+1;j++) {
Server_MEM[i][j]=new Server(CPUSIZE,MEMSIZE);
}
}
for(int k=1;k<VMKINDS+1;k++) {
String vm_name=VMKINDList.get(k-1);//虚拟机名称
VirtualMachine virtualMachine = VMmap.get(vm_name);
int vm_cpu=virtualMachine.cpu;
int vm_mem=virtualMachine.mem;
for(int v=1;v<CPUSIZE+1;v++) {
List<Server> serverlist=new ArrayList<>();
for(int i=0;i<=v/vm_cpu&&i<=kind_sum[k-1];i++) {
if(i*vm_mem<=Server_MEM[v-i*vm_cpu][k-1].avariable_MEM_SIZE) {
Server clone = Server_MEM[v-i*vm_cpu][k-1].clone();
for(int j=0;j<i;j++) {
clone.add(virtualMachine);
}
serverlist.add(clone);
}
}
Collections.sort(serverlist,ServerMEM_cmp);
Server_MEM[v][k]=serverlist.get(0);
}
}
Server server_temp=Server_MEM[CPUSIZE][VMKINDS];
//数量减少
server_list.add(server_temp);
for(VirtualMachine vm:server_temp.VM_MAP.keySet()) {
kind_sum[VMmap.get(vm.vmname).index]-=server_temp.VM_MAP.get(vm);
}
}
}
resultlist.add(String.valueOf(server_list.size()));
for(int i=0;i<server_list.size();i++) {
Server server = server_list.get(i);
String str=(i+1)+" ";
for(Entry<VirtualMachine, Integer> entry:server.VM_MAP.entrySet()) {
str+=entry.getKey().vmname+" "+entry.getValue()+" ";
}
resultlist.add(str);
}
String[] results = new String[resultlist.size()];
resultlist.toArray(results);
return results;
}
private static boolean isempty(int[] kind_sum) {
boolean flag=true;
for(int i=0;i<kind_sum.length;i++) {
if(kind_sum[i]!=0) {
flag=false;
break;
}
}
return flag;
}
private static void DataAverage(double[][] history) {
int Days = history.length, VMKINDS = history[0].length;
int Daylength =10; //移动平均值
double[] weights={0.75,0.16,0.05,0.02,0.02,0.01,0.01,0.01,0.01,0.01};
double[][] M1=new double[Days][VMKINDS];
for(int i=0;i<Days;i++) {
for(int j=0;j<VMKINDS;j++) {
M1[i][j]=history[i][j];
}
}
for (int j = 0; j < VMKINDS; j++) {
for (int i = Daylength - 1; i < Days; i++) {
double sum = 0;
if(history[i][j]>8*history[i-1][j])
history[i][j]/=2.1;
if(history[i][j]<history[i-1][j]/8)
history[i][j]*=5; //正常化特殊情况的虚拟机数量
for (int day = i; day > i - Daylength; day--) {
sum += history[day][j]*weights[i-day];
}
history[i][j] = sum;
}
}
}
private static double RELU(double d) {
return d >= 0 ? d : 0;
}
}