下载YCSB软件
wget https://github.com/downloads/brianfrankcooper/YCSB/ycsb-0.1.4.tar.gz
解压
tar xf BigDataBench_V3.2.1_Hadoop.tar.gz
cd /opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4
cp /opt/cloudera/parcels/CDH-5.8.2-1.cdh5.8.2.p0.3/lib/hbase/lib/* /opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4/hbase-binding/lib
write写
cd $YCSB
python bin/ycsb load hbase -P workloads/workloadc -p threads=<thread-numbers> -p columnfamily=family -p recordcount=<recordcount-value> -p hosts=<hostip> -s >load.da
create 'usertable','cf' 数据量大不适合这样的创建表
要用预分区创建表,这样加载数据的速度快上很多。
hbase(main):001:0> n_splits = 120 --HBase recommends (10 * number of regionservers)
=> 120
hbase(main):002:0> create 'usertable', 'cf', {SPLITS => (1..n_splits).map {|i| "user#{1000+i*(9999-1000)/n_splits}"}}
加载数据
[root@namenode1 bin]#python ycsb load hbase -P ../workloads/workloadb -p threads=10 -p table=t5 -p columnfamily=family -p recordcount=10000000 -p hosts=10.1.8.10 -s >load.da
Loading workload...
Starting test.
0 sec: 0 operations;
10 sec: 126791 operations; 12657.58 current ops/sec; [INSERT AverageLatency(us)=72.98]
20 sec: 278531 operations; 15172.48 current ops/sec; [INSERT AverageLatency(us)=65.65]
30 sec: 414715 operations; 13617.04 current ops/sec; [INSERT AverageLatency(us)=71.18]
40 sec: 565002 operations; 15028.7 current ops/sec; [INSERT AverageLatency(us)=64.96]
50 sec: 696475 operations; 13147.3 current ops/sec; [INSERT AverageLatency(us)=71.65]
60 sec: 832659 operations; 13617.04 current ops/sec; [INSERT AverageLatency(us)=72.72]
70 sec: 973539 operations; 14088 current ops/sec; [INSERT AverageLatency(us)=68.64]
80 sec: 1119115 operations; 14557.6 current ops/sec; [INSERT AverageLatency(us)=66.38]
90 sec: 1245907 operations; 12677.93 current ops/sec; [INSERT AverageLatency(us)=77.95]
100 sec: 1368003 operations; 12209.6 current ops/sec; [INSERT AverageLatency(us)=79.19]
110 sec: 1490099 operations; 12209.6 current ops/sec; [INSERT AverageLatency(us)=78.93]
120 sec: 1612195 operations; 12208.38 current ops/sec; [INSERT AverageLatency(us)=80.93]
130 sec: 1753075 operations; 14088 current ops/sec; [INSERT AverageLatency(us)=66.95]
140 sec: 1884563 operations; 13148.8 current ops/sec; [INSERT AverageLatency(us)=74.02]
150 sec: 2025443 operations; 14086.59 current ops/sec; [INSERT AverageLatency(us)=69.22]
160 sec: 2195204 operations; 16976.1 current ops/sec; [INSERT AverageLatency(us)=56.79]
170 sec: 2336084 operations; 14088 current ops/sec; [INSERT AverageLatency(us)=69.41]
180 sec: 2432352 operations; 9625.84 current ops/sec; [INSERT AverageLatency(us)=102.08]
190 sec: 2584098 operations; 15174.6 current ops/sec; [INSERT AverageLatency(us)=64.09]
200 sec: 2709187 operations; 12508.9 current ops/sec; [INSERT AverageLatency(us)=74.99]
210 sec: 2811387 operations; 10218.98 current ops/sec; [INSERT AverageLatency(us)=77.3]
220 sec: 2811387 operations; 0 current ops/sec;
230 sec: 2842573 operations; 3118.6 current ops/sec; [INSERT AverageLatency(us)=703.84]
240 sec: 3006436 operations; 16384.66 current ops/sec; [INSERT AverageLatency(us)=60.36]
250 sec: 3164197 operations; 15776.1 current ops/sec; [INSERT AverageLatency(us)=59.41]
260 sec: 3364266 operations; 20004.9 current ops/sec; [INSERT AverageLatency(us)=49.03] 后面的创建预分区后的速度 260 sec: 10714543 operations; 15439.46 current ops/sec; [INSERT AverageLatency(us)=2167.37] 可以看出来将近三倍的速度。
270 sec: 3526366 operations; 16210 current ops/sec; [INSERT AverageLatency(us)=59.86]
280 sec: 3683436 operations; 15707 current ops/sec; [INSERT AverageLatency(us)=61.83]
290 sec: 3805503 operations; 12205.48 current ops/sec; [INSERT AverageLatency(us)=79.95]
300 sec: 3966411 operations; 16090.8 current ops/sec; [INSERT AverageLatency(us)=55.77]
310 sec: 4104606 operations; 13819.5 current ops/sec; [INSERT AverageLatency(us)=73.18]
320 sec: 4265900 operations; 16127.79 current ops/sec; [INSERT AverageLatency(us)=61.78]
330 sec: 4406855 operations; 14095.5 current ops/sec; [INSERT AverageLatency(us)=65.93]
340 sec: 4578587 operations; 17173.2 current ops/sec; [INSERT AverageLatency(us)=57.34]
350 sec: 4699126 operations; 12052.69 current ops/sec; [INSERT AverageLatency(us)=82.73]
360 sec: 4791369 operations; 9224.3 current ops/sec; [INSERT AverageLatency(us)=102.56]
370 sec: 4876270 operations; 8490.1 current ops/sec; [INSERT AverageLatency(us)=116.31]
380 sec: 4977278 operations; 10099.79 current ops/sec; [INSERT AverageLatency(us)=98.56]
390 sec: 5086244 operations; 10896.6 current ops/sec; [INSERT AverageLatency(us)=90.21]
400 sec: 5176789 operations; 9054.5 current ops/sec; [INSERT AverageLatency(us)=104.43]
410 sec: 5233990 operations; 5719.53 current ops/sec; [INSERT AverageLatency(us)=144.56]
420 sec: 5295014 operations; 6102.4 current ops/sec; [INSERT AverageLatency(us)=194.73]
430 sec: 5432827 operations; 13781.3 current ops/sec; [INSERT AverageLatency(us)=70.39]
440 sec: 5576111 operations; 14326.97 current ops/sec; [INSERT AverageLatency(us)=67.45]
450 sec: 5600806 operations; 2469.5 current ops/sec; [INSERT AverageLatency(us)=403.25]
460 sec: 5734396 operations; 13357.66 current ops/sec; [INSERT AverageLatency(us)=72.47]
470 sec: 5857395 operations; 12299.9 current ops/sec; [INSERT AverageLatency(us)=72.86]
480 sec: 5926115 operations; 6872 current ops/sec; [INSERT AverageLatency(us)=150.6]
490 sec: 5998172 operations; 7204.98 current ops/sec; [INSERT AverageLatency(us)=138.99]
500 sec: 6078015 operations; 7984.3 current ops/sec; [INSERT AverageLatency(us)=122.66]
510 sec: 6152681 operations; 7466.6 current ops/sec; [INSERT AverageLatency(us)=132.53]
520 sec: 6253199 operations; 10050.79 current ops/sec; [INSERT AverageLatency(us)=96.87]
530 sec: 6328446 operations; 7524.7 current ops/sec; [INSERT AverageLatency(us)=133.41]
540 sec: 6410082 operations; 8163.6 current ops/sec; [INSERT AverageLatency(us)=116.54]
550 sec: 6518866 operations; 10877.31 current ops/sec; [INSERT AverageLatency(us)=91.14]
560 sec: 6581271 operations; 6240.5 current ops/sec; [INSERT AverageLatency(us)=160.51]
570 sec: 6648889 operations; 6761.8 current ops/sec; [INSERT AverageLatency(us)=143.92]
580 sec: 6737937 operations; 8903.91 current ops/sec; [INSERT AverageLatency(us)=94.04]
590 sec: 6899522 operations; 16158.5 current ops/sec; [INSERT AverageLatency(us)=69.3]
600 sec: 7030846 operations; 13132.4 current ops/sec; [INSERT AverageLatency(us)=73.97]
610 sec: 7187002 operations; 15614.04 current ops/sec; [INSERT AverageLatency(us)=61.95]
620 sec: 7328169 operations; 14116.7 current ops/sec; [INSERT AverageLatency(us)=63.94]
630 sec: 7442937 operations; 11476.8 current ops/sec; [INSERT AverageLatency(us)=90.06]
640 sec: 7604453 operations; 16149.99 current ops/sec; [INSERT AverageLatency(us)=60.56]
650 sec: 7726817 operations; 12236.4 current ops/sec; [INSERT AverageLatency(us)=75.54]
660 sec: 7845818 operations; 11900.1 current ops/sec; [INSERT AverageLatency(us)=85.47]
670 sec: 7974330 operations; 12849.92 current ops/sec; [INSERT AverageLatency(us)=75.65]
680 sec: 8122908 operations; 14857.8 current ops/sec; [INSERT AverageLatency(us)=62.35]
690 sec: 8249924 operations; 12701.6 current ops/sec; [INSERT AverageLatency(us)=79.9]
700 sec: 8393678 operations; 14375.4 current ops/sec; [INSERT AverageLatency(us)=68.14]
710 sec: 8516021 operations; 12233.08 current ops/sec; [INSERT AverageLatency(us)=78.66]
720 sec: 8662364 operations; 14634.3 current ops/sec; [INSERT AverageLatency(us)=66.57]
730 sec: 8809476 operations; 14711.2 current ops/sec; [INSERT AverageLatency(us)=57.41]
740 sec: 8934056 operations; 12456.75 current ops/sec; [INSERT AverageLatency(us)=86.75]
750 sec: 9075227 operations; 14117.1 current ops/sec; [INSERT AverageLatency(us)=69.35]
760 sec: 9229611 operations; 15438.4 current ops/sec; [INSERT AverageLatency(us)=63.5]
770 sec: 9362480 operations; 13285.57 current ops/sec; [INSERT AverageLatency(us)=72.72]
780 sec: 9511875 operations; 14939.5 current ops/sec; [INSERT AverageLatency(us)=64.88]
790 sec: 9621633 operations; 10974.7 current ops/sec; [INSERT AverageLatency(us)=86.04]
800 sec: 9724258 operations; 10262.5 current ops/sec; [INSERT AverageLatency(us)=95.51]
810 sec: 9841411 operations; 11715.3 current ops/sec; [INSERT AverageLatency(us)=85.96]
820 sec: 9956803 operations; 11538.05 current ops/sec; [INSERT AverageLatency(us)=84.59]
824 sec: 10000000 operations; 9655.12 current ops/sec; [UPDATE AverageLatency(us)=518904] [INSERT AverageLatency(us)=89.62]
运行数据
./ycsb run hbase -P ../workloads/workloadb -p table=t5 -p columnfamily=family -p fieldcount=1 -p fieldlength=1000 -p operationcount=10000000 -p readproportion=0.95 -p dateproportion=0.5 -p insertproportion=0 -p scanproportion=0 -p readmodifywriteproportion=0 -p readallfields=true p requestdistribution=zipfian -threads xxxx -s > readmostly_client1
[root@datanode03 bin]# python ycsb run hbase -P ../workloads/workloadc -p table=t5 -p columnfamily=family -p fieldcount=1 -p fieldlength=1000 -p operationcount=5000000 -p readproportion=0.95 -p updateproportion=0.5 -p insertproportion=0 -p scanproportion=0 -p readmodifywriteproportion=0 -p readallfields=true -p requestdistribution=zipfian -threads 1000 -s > readmostly_client1
Loading workload...
Starting test.
2017-02-16 17:14:25:172 0 sec: 0 operations;
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4/hbase-binding/lib/hbase-binding-0.1.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4/hbase-binding/lib/slf4j-log4j12.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
hbase(main):002:0> exit
[root@namenode01 bin]# python ycsb load hbase -P ../workloads/workloada -p threads=10 -p table=usertable -p columnfamily=cf -p fieldcount=10 -p fieldlength=100 -p recordcount=20000000 -p hosts=10.1.8.102 -p insertorder=hashed -p insertstart=0 -threads 10 -s >load.da
Loading workload...
Starting test.
0 sec: 0 operations;
log4j:WARN No appenders could be found for logger (org.apache.hadoop.security.Groups).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4/hbase-binding/lib/slf4j-log4j12.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/BigDataBench_V3.2.1_Hadoop_Hive/BasicDatastoreOperations/ycsb-0.1.4/hbase-binding/lib/hbase-binding-0.1.4.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
10 sec: 96790 operations; 9661.61 current ops/sec; [INSERT AverageLatency(us)=164.15]
20 sec: 96790 operations; 0 current ops/sec; [INSERT AverageLatency(us)=16534124] [UPDATE AverageLatency(us)=24.75]
23 sec: 96790 operations; 0 current ops/sec; [INSERT AverageLatency(us)=20452926] [UPDATE AverageLatency(us)=8.83]
Read
1. For HBase
Basic command-line usage
cd $YCSB
sh bin/ycsb run hbase -P workloads/workloadc -p threads=<thread-numbers> -p columnfamily=<
family> -p operationcount=<operationcount-value> -p hosts=<hostip> -s >tran.dat
关于此命令的几个注释:
<thread-number>:客户端线程的数量,这是经常做的增加对数据库提供的负载量。
<family>:在Hbase的情况下,我们使用它来设置数据库列。 你应该有数据库用户表与列族之后运行此命令。 然后所有数据将被加载到具有列系列的数据库usertable中。
<operationcount-value>:此基准的总操作。 例如,当您要加载10GB数据时,应该设置它
Scan
1. For HBase
Basic command-line usage
cd $YCSB
sh bin/ycsb run hbase -P workloads/workloade -p threads=/textless thread-numbers> -p columnfamily=<family> -p operationcount=
<operationcount-value> -p hosts=<Hostip> -p columnfamily=<family> -s > tran.dat
关于此命令的几个注释
<thread-number>:客户端线程的数量,这是经常做的增加对数据库提供的负载量。
<family>:在Hbase的情况下,我们用它来设置数据库列。 在运行此命令之前,您应该具有带有列族的数据库用户表。
数据将被加载到具有列系列的数据库usertable中。
<operationcount-value>:此基准的总操作。 例如,当您要加载10GB数据时,您将其设置为10000000。
<hostip>:hbase主节点的IP地址
K-means
The K-means program we use is obtained from Mahout.
Hadoop-version
To prepare and generate data
1.
tar xzf BigDataBench_V3.1Hadoop.tar.gz
2.
cd BigDataBench_V3.1Hadoop_Hive/SNS/Kmeans
3.
sh genData_Kmeans.sh
To_run
sh run_Kmeans.sh