Java表格仿mysql实现基本操作(连接,分组排序,统计等)

以下是数据矩阵操作类
package cn.cgh.table;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * 
 * @author xiegonghai
 *
 */
public class TableOperation
{

    /**
     * the separator of multiKey
     */
    public static String separator = "@&";

    /**
     * Join A and B according to one column
     * 
     * @param indexA
     * @param indexB
     * @return New table after innerJoin
     */
    public static List<List<String>> innerJoin(List<List<String>> A,
					       List<List<String>> B,
					       int indexA,
					       int indexB)
    {

	List<List<String>> res = new ArrayList<List<String>>();
	// colMap保存B表的连接列的索引
	Map<String, ArrayList<Integer>> colMap = new HashMap<String, ArrayList<Integer>>();
	int i = 0;
	for (List<String> row : B)
	{
	    String key = row.get(indexB);
	    if (colMap.containsKey(key))
	    {
		colMap.get(key).add(i);
	    } else
	    {
		ArrayList<Integer> arrTmp = new ArrayList<Integer>();
		arrTmp.add(i);
		colMap.put(key, arrTmp);
	    }
	    i = i + 1;
	}
	// 遍历A表进行连接
	for (List<String> row : A)
	{
	    String tarStr = row.get(indexA);
	    if (colMap.containsKey(tarStr))
	    {
		ArrayList<Integer> idList = colMap.get(tarStr);
		for (Integer id : idList)
		{
		    // deep copy
		    List<String> newRow = new ArrayList<String>(row);
		    Set<Integer> sList = new HashSet<Integer>();
		    sList.add(indexB);
		    List<String> rightB = getRowExcept(B.get(i), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     * Join A and B according to multiColumn
     * 
     * @param indexAList id list of table A
     * @param indexBList id list of table B
     * @return New table after innerJoin
     */
    public static List<List<String>> innerJoin(List<List<String>> A,
					       List<List<String>> B,
					       List<Integer> indexAList,
					       List<Integer> indexBList)
    {

	List<List<String>> res = new ArrayList<List<String>>();
	// colMap保存B表的连接列的索引
	Map<Long, ArrayList<Integer>> colMap = new HashMap<Long, ArrayList<Integer>>();
	int i = 0;
	for (List<String> row : B)
	{
	    long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
	    if (colMap.containsKey(murmurHashKeyB))
	    {
		colMap.get(murmurHashKeyB).add(i);
	    } else
	    {
		ArrayList<Integer> arrTmp = new ArrayList<Integer>();
		arrTmp.add(i);
		colMap.put(murmurHashKeyB, arrTmp);
	    }
	    i = i + 1;
	}
	// 遍历A表进行连接
	for (List<String> row : A)
	{
	    long murmurHashValA = getLineMultiKeyHash(row, indexAList);
	    if (colMap.containsKey(murmurHashValA))
	    {
		ArrayList<Integer> idList = colMap.get(murmurHashValA);
		for (Integer id : idList)
		{
		    List<String> newRow = new ArrayList<String>(row);
		    Set<Integer> sList = new HashSet<Integer>();
		    sList.addAll(indexBList);
		    List<String> rightB = getRowExcept(B.get(id), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     * 
     * @param row One line data
     * @param idList Clear the data that belong to idList
     * @return
     */
    public static List<String> getRowExcept(List<String> row,
					    Set<Integer> idList)
    {
	List<String> newRow = new ArrayList<String>();
	int size = row.size();
	for (int i = 0; i < size; ++i)
	{
	    if (!idList.contains(i))
	    {
		newRow.add(row.get(i));
	    }
	}
	return newRow;
    }

    /**
     * Join A and B according to multiColumn
     * 
     * @param indexAList id list of table A
     * @param indexBList id list of table B
     * @return New table after innerJoin
     */
    public static List<List<String>> leftJoin(List<List<String>> A,
					      List<List<String>> B,
					      List<Integer> indexAList,
					      List<Integer> indexBList)
    {

	List<List<String>> res = new ArrayList<List<String>>();
	// colMap保存B表的连接列的索引
	Map<Long, ArrayList<Integer>> colMap = new HashMap<Long, ArrayList<Integer>>();
	int i = 0;
	for (List<String> row : B)
	{
	    long murmurHashKeyB = getLineMultiKeyHash(row, indexBList);
	    if (colMap.containsKey(murmurHashKeyB))
	    {
		colMap.get(murmurHashKeyB).add(i);
	    } else
	    {
		ArrayList<Integer> arrTmp = new ArrayList<Integer>();
		arrTmp.add(i);
		colMap.put(murmurHashKeyB, arrTmp);
	    }
	    i = i + 1;
	}
	// 遍历A表进行连接
	for (List<String> row : A)
	{
	    long murmurHashValA = getLineMultiKeyHash(row, indexAList);
	    if (colMap.containsKey(murmurHashValA))
	    {
		ArrayList<Integer> idList = colMap.get(murmurHashValA);
		for (Integer id : idList)
		{
		    List<String> newRow = new ArrayList<String>(row);
		    Set<Integer> sList = new HashSet<Integer>();
		    sList.addAll(indexBList);
		    List<String> rightB = getRowExcept(B.get(id), sList);
		    newRow.addAll(rightB);
		    res.add(newRow);
		}
	    } else
	    {
		List<String> newRow = new ArrayList<String>(row);
		List<String> rightB = new ArrayList<String>();
		int bSize = B == null ? 0 : B.size();
		int rolCols = bSize == 0 ? 0 : B.get(0).size();
		int indexbSize = indexBList == null ? 0 : indexBList.size();
		for (int k = 0; k < rolCols - indexbSize; ++k)
		{
		    rightB.add("-");
		}
		newRow.addAll(rightB);
		res.add(newRow);
	    }
	}
	System.out.println(A);
	System.out.println(B);
	return res;
    }

    /**
     * 根据前几列进行排序
     * 
     * @param table
     * @param orderList
     */
    public static void groupByIdList(List<List<String>> table,
				     int val)
    {
	SortedTable sort = new SortedTable(table, val);
    }

    /**
     * 根据前几列进行排序
     * @param table 
     * @param orderList
     */
    public static List<List<String>> countGroupBy(List<List<String>> table,int sortCols,int colIndex)
    {
	List<List<String>> newTable = new ArrayList<List<String>>(table);
	
	SortedTable sort = new SortedTable(newTable,sortCols);
	int rowSize = newTable.size();
	for(int i = 0;i< rowSize;++i)
	{
	    
	}
	return newTable;
    }

    /**
     * 64 bit MurmurHash value
     * 
     * @param row String of list
     * @param idList multiKey of one line
     * @return the 64 bit of hash value
     */
    public static long getKeyHash(String key)
    {
	return MurmurHash.hash64(key);
    }

    /**
     * 64 bit MurmurHash value
     * 
     * @param row String of list
     * @param idList multiKey of one line
     * @return the 64 bit of hash value
     */
    public static long getLineMultiKeyHash(List<String> row,
					   List<Integer> idList)
    {
	StringBuilder sb = new StringBuilder();
	for (int i : idList)
	{
	    sb.append(row.get(i));
	    sb.append(separator);
	}
	return MurmurHash.hash64(sb.toString());
    }

}

以上是数据矩阵实现mysql的自然连接和左连接的基本操作,尚未完成, 持续更新


下面是对数据矩阵实现多级排序,用于实现group by

package cn.cgh.table;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/**
 * 实现table的多级(<=3)排序
 * 
 * @author xiegonghai
 *
 */
public class SortedTable
{
    public List<List<String>> mList;  
    public List<Comparator<List<String>>> mCmpList = new ArrayList<Comparator<List<String>>>();  
    public SortedTable(List<List<String>> list,int val){  
        mList = list;
        if(val>=1)mCmpList.add(compareFirASC);  
        if(val>=2)mCmpList.add(compareSecASC);
        if(val>=3)mCmpList.add(compareThirASC);  
        sort(mList, mCmpList);  
    }  
    public void sort(List<List<String>> list, final List<Comparator<List<String>>> comList) {  
        if (comList == null)  
            return;  
        Comparator<List<String>> cmp = new Comparator<List<String>>() {  
            public int compare(List<String> o1, List<String> o2) {  
                for (Comparator<List<String>> comparator : comList) {  
                    if (comparator.compare(o1, o2) > 0) {  
                        return 1;  
                    } else if (comparator.compare(o1, o2) < 0) {  
                        return -1;  
                    }  
                }  
                return 0;  
            }  
        };  
        Collections.sort(list, cmp);  
    }  
  
    private Comparator<List<String>> compareFirASC = new Comparator<List<String>>() {  
  
        public int compare(List<String> o1, List<String> o2) {  
            return o1.get(0).compareTo(o2.get(0)); 
        }  
    };  
  
    private Comparator<List<String>> compareSecASC = new Comparator<List<String>>() {  
  
        public int compare(List<String> o1, List<String> o2) {  
            return o1.get(1).compareTo(o2.get(1));  
        }  
    };  
    
    private Comparator<List<String>> compareThirASC = new Comparator<List<String>>() {  
	  
        public int compare(List<String> o1, List<String> o2) {  
            return o1.get(2).compareTo(o2.get(2));  
        }  
    }; 
}


以下是murmurhash实现类(hadoop,redis,nginx均有应用),实现任意长字符串到唯一32,64位整型的映射

package cn.cgh.table;

/**
 * 
 * @author reference to someone
 *
 */
public final class MurmurHash
{

    /**
     * Generates 32 bit hash from byte array of the given length and seed.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @param seed initial seed value
     * @return 32 bit hash of the given array
     */
    public static int hash32(final byte[] data,
			     int length,
			     int seed)
    {
	// 'm' and 'r' are mixing constants generated offline.
	// They're not really 'magic', they just happen to work well.
	final int m = 0x5bd1e995;
	final int r = 24;
	// Initialize the hash to a random value
	int h = seed ^ length;
	int length4 = length / 4;

	for (int i = 0; i < length4; i++)
	{
	    final int i4 = i * 4;
	    int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16)
		    + ((data[i4 + 3] & 0xff) << 24);
	    k *= m;
	    k ^= k >>> r;
	    k *= m;
	    h *= m;
	    h ^= k;
	}

	// Handle the last few bytes of the input array
	switch (length % 4)
	{
	case 3:
	    h ^= (data[(length & ~3) + 2] & 0xff) << 16;
	case 2:
	    h ^= (data[(length & ~3) + 1] & 0xff) << 8;
	case 1:
	    h ^= (data[length & ~3] & 0xff);
	    h *= m;
	}

	h ^= h >>> 13;
	h *= m;
	h ^= h >>> 15;

	return h;
    }

    /**
     * Generates 32 bit hash from byte array with default seed value.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @return 32 bit hash of the given array
     */
    public static int hash32(final byte[] data,
			     int length)
    {
	return hash32(data, length, 0x9747b28c);
    }

    /**
     * Generates 32 bit hash from a string.
     * 
     * @param text string to hash
     * @return 32 bit hash of the given string
     */
    public static int hash32(final String text)
    {
	final byte[] bytes = text.getBytes();
	return hash32(bytes, bytes.length);
    }

    /**
     * Generates 32 bit hash from a substring.
     * 
     * @param text string to hash
     * @param from starting index
     * @param length length of the substring to hash
     * @return 32 bit hash of the given string
     */
    public static int hash32(final String text,
			     int from,
			     int length)
    {
	return hash32(text.substring(from, from + length));
    }

    /**
     * Generates 64 bit hash from byte array of the given length and seed.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @param seed initial seed value
     * @return 64 bit hash of the given array
     */
    public static long hash64(final byte[] data,
			      int length,
			      int seed)
    {
	final long m = 0xc6a4a7935bd1e995L;
	final int r = 47;

	long h = (seed & 0xffffffffl) ^ (length * m);

	int length8 = length / 8;

	for (int i = 0; i < length8; i++)
	{
	    final int i8 = i * 8;
	    long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
		    + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
		    + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
		    + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);

	    k *= m;
	    k ^= k >>> r;
	    k *= m;

	    h ^= k;
	    h *= m;
	}

	switch (length % 8)
	{
	case 7:
	    h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
	case 6:
	    h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
	case 5:
	    h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
	case 4:
	    h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
	case 3:
	    h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
	case 2:
	    h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
	case 1:
	    h ^= (long) (data[length & ~7] & 0xff);
	    h *= m;
	}
	;

	h ^= h >>> r;
	h *= m;
	h ^= h >>> r;

	return h;
    }

    /**
     * Generates 64 bit hash from byte array with default seed value.
     * 
     * @param data byte array to hash
     * @param length length of the array to hash
     * @return 64 bit hash of the given string
     */
    public static long hash64(final byte[] data,
			      int length)
    {
	return hash64(data, length, 0xe17a1465);
    }

    /**
     * Generates 64 bit hash from a string.
     * 
     * @param text string to hash
     * @return 64 bit hash of the given string
     */
    public static long hash64(final String text)
    {
	final byte[] bytes = text.getBytes();
	return hash64(bytes, bytes.length);
    }

    /**
     * Generates 64 bit hash from a substring.
     * 
     * @param text string to hash
     * @param from starting index
     * @param length length of the substring to hash
     * @return 64 bit hash of the given array
     */
    public static long hash64(final String text,
			      int from,
			      int length)
    {
	return hash64(text.substring(from, from + length));
    }
}




  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值