在reduce端做join,以TextPair做key,TextPair.getSecond为源文件类型区分,reduce端提取出来赋值给变量bz
a join b
//a放进内存list
while(hasNext())
if bz=0
list.add()
else
{
for clist :list
write clist next
}
a left outer join b
int count=0;
//a放进内存list
while(hasNext())
if bz=0
list.add()
else
{
count++; //记录该key在b表的记录数
for clist :list
write clist next
}
if count==0 //如果b表没有记录,则输出null
for clist :list
write clist null
//b放进内存list的写法
while(hasNext())
if bz=0
list.add() //b表的bz=0,b表放进list内存
else
{
if list.size==0 //如果b表没有对应该key的记录,则输出null
write next null
else //有则循环输出
for clist:list
write next clist
}
a full outer join b
int count=0;
//a放进内存list
while(hasNext())
if bz==0
list.add()
else
{
count++; //记录该key在b表的记录数
if list.size==0 //如果a表没有记录
write null next
else
for clist :list
write clist next
}
if count==0 //如果b表没有记录,则输出null
for clist :list
write clist null
a right outer join b
int count=0;
//a放进内存list
while(hasNext())
if bz=0
list.add()
else
{
count++; //记录该key在b表的记录数
if list.size==0
write null next
else
for clist :list
write clist next