public class ClearDirtyData {
private static final String MONGO_DATABASE_NAME = "";
private static final String MONGO_USERNAME = "";
private static final String MONGO_PASSWORD = "";
//ppc线上环境
private static final String MONGO_SERVER = "";
private static final int MONGO_PORT = 27017;
private static Mongo mongo;
private static DB db;
static {
try {
mongo = new Mongo(MONGO_SERVER, MONGO_PORT);
db = mongo.getDB(MONGO_DATABASE_NAME);
if (!db.authenticate(MONGO_USERNAME, MONGO_PASSWORD.toCharArray())) {
System.out.println("连接MongoDB数据库,校验失败!");
}
} catch (UnknownHostException e) {
e.printStackTrace();
}
}
public static synchronized DBCollection getDBCollection(String collectionName) {
return db.getCollection(collectionName);
}
public static DBCollection createDBCollection(String collectionName) {
return db.createCollection(collectionName, new BasicDBObject());
}
@Test
public void clearDirtyData() {
String collectionName = "cartoonbook";
DBCollection collection = getDBCollection(collectionName);
//构建查询
DBObject groupFields = new BasicDBObject("_id", new BasicDBObject("id", "$id")).append("count", new BasicDBObject("$sum", 1));
DBObject match = new BasicDBObject("count", new BasicDBObject("$gt", 1));
DBObject project = new BasicDBObject("_id", 0).append("id", "$_id.id").append("count", 1);
Iterable<DBObject> output = collection.aggregate(Arrays.asList(
(DBObject) new BasicDBObject("$group", groupFields),
(DBObject) new BasicDBObject("$match", match),
(DBObject) new BasicDBObject("$project", project)
)).results();
for (DBObject dbObject : output) {
System.out.println(dbObject.get("id"));
List<DBObject> dbObjects= getObjectById(collection,dbObject.get("id"));
//按照keySet.size倒排
Collections.sort(dbObjects, new Comparator<DBObject>() {
@Override
public int compare(DBObject b1, DBObject b2) {
if(b1.keySet().size()-b2.keySet().size()<0)
return 1;
else if((b1.keySet().size()-b2.keySet().size())==0)
return 0;
else
return -1;
}
});
if(dbObjects.size()>=2)
{
for(int i=1;i<dbObjects.size();i++)
{
System.out.println(dbObjects.get(i).get("_id"));
//collection.remove(new BasicDBObject("_id",dbObjects.get(i).get("_id")));
}
}
}
BasicDBObject index = new BasicDBObject("id", 1);
collection.createIndex(index,"index_id",true );
}
/**
* 根据id来获取所有的数据
* @param collection 集合名称
* @param id id
* @return
*/
public List<DBObject> getObjectById(DBCollection collection, Object id) {
List<DBObject> dbObjects=new ArrayList<DBObject>();
BasicDBObject allQuery = new BasicDBObject();
allQuery.put("id", id);
DBCursor cursor = collection.find(allQuery);
cursor.addOption(Bytes.QUERYOPTION_NOTIMEOUT);
while (cursor.hasNext()) {
DBObject dbObject = cursor.next();
System.out.println(dbObject.keySet().size()+":"+dbObject);
dbObjects.add(dbObject);
}
return dbObjects;
}
}
mongo清洗id不唯一的脏数据,并创建唯一索引
最新推荐文章于 2022-05-04 16:05:30 发布