1、tablesample
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Sampling
create table id_graph_negative_sample1
as select * from id_graph_negative_sample tablesample (1304 rows) t;
select count(*) from id_graph_negative_sample1;#59984
注意1304并不是表的条数,是一个子文件的条数
2、另外一种控制总样本量方法
create table id_graph_negative_sample1
as select * from id_graph_negative_sample distribute by rand() sort by rand() limit 100000;
这里100000是总样本数