mongo清洗id不唯一的脏数据,并创建唯一索引

public class ClearDirtyData {
    private static final String MONGO_DATABASE_NAME = "";
    private static final String MONGO_USERNAME = "";
    private static final String MONGO_PASSWORD = "";
    //ppc线上环境
    private static final String MONGO_SERVER = "";
    private static final int MONGO_PORT = 27017;
    private static Mongo mongo;
    private static DB db;

    static {
        try {
            mongo = new Mongo(MONGO_SERVER, MONGO_PORT);
            db = mongo.getDB(MONGO_DATABASE_NAME);
            if (!db.authenticate(MONGO_USERNAME, MONGO_PASSWORD.toCharArray())) {
                System.out.println("连接MongoDB数据库,校验失败!");
            }
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
    }

    public static synchronized DBCollection getDBCollection(String collectionName) {
        return db.getCollection(collectionName);
    }

    public static DBCollection createDBCollection(String collectionName) {
        return db.createCollection(collectionName, new BasicDBObject());
    }

    @Test
    public void clearDirtyData() {
        String collectionName = "cartoonbook";
        DBCollection collection = getDBCollection(collectionName);
        //构建查询
        DBObject groupFields = new BasicDBObject("_id", new BasicDBObject("id", "$id")).append("count", new BasicDBObject("$sum", 1));
        DBObject match = new BasicDBObject("count", new BasicDBObject("$gt", 1));
        DBObject project = new BasicDBObject("_id", 0).append("id", "$_id.id").append("count", 1);
        Iterable<DBObject> output = collection.aggregate(Arrays.asList(
                (DBObject) new BasicDBObject("$group", groupFields),
                (DBObject) new BasicDBObject("$match", match),
                (DBObject) new BasicDBObject("$project", project)
        )).results();
        for (DBObject dbObject : output) {
            System.out.println(dbObject.get("id"));
            List<DBObject> dbObjects= getObjectById(collection,dbObject.get("id"));
            //按照keySet.size倒排
            Collections.sort(dbObjects, new Comparator<DBObject>() {
                @Override
                public int compare(DBObject b1, DBObject b2) {
                   if(b1.keySet().size()-b2.keySet().size()<0)
                       return 1;
                    else if((b1.keySet().size()-b2.keySet().size())==0)
                       return 0;
                    else
                       return -1;
                }

            });
           if(dbObjects.size()>=2)
           {
               for(int i=1;i<dbObjects.size();i++)
               {
                   System.out.println(dbObjects.get(i).get("_id"));
                   //collection.remove(new BasicDBObject("_id",dbObjects.get(i).get("_id")));
               }
           }
        }
        BasicDBObject index = new BasicDBObject("id", 1);
        collection.createIndex(index,"index_id",true );
    }

    /**
     * 根据id来获取所有的数据
     * @param collection 集合名称
     * @param id         id
     * @return
     */
    public List<DBObject>  getObjectById(DBCollection collection, Object id) {
        List<DBObject> dbObjects=new ArrayList<DBObject>();
        BasicDBObject allQuery = new BasicDBObject();
        allQuery.put("id", id);
        DBCursor cursor = collection.find(allQuery);
        cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
        while (cursor.hasNext()) {
            DBObject dbObject = cursor.next();
            System.out.println(dbObject.keySet().size()+":"+dbObject);
            dbObjects.add(dbObject);
        }
        return dbObjects;
    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值