利用java多线程向MongoDB中批量插入静态文件

第一步、开发环境:

    win7 64位(注:MongoDb在32位windows上有数量限制(2G),详见官方文档)

    Mongodb3.2

    mongofb_java_driver 3.2.2

第二部、安装mongodb,并开启服务

    略:可参见官方文档

第三部、代码

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
import org.bson.Document;
import com.mongodb.MongoClient;
import com.mongodb.MongoWriteException;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;

/**
 *  created by soarhu 2016/4/21
 */

public class MongodbBatchInsetUtils {
    
    static final int ThreadNum=3;//设置向MongoDb中插入数据的线程数
    static int ThreadSizeCount = 0;//用于计算子线程完成数
    static final String HOST = "127.0.0.1";//主机
    static final int PORT = 27017;//端口    
    static final String DATABASE_NAME="mydb";//存储数据库名称,如果不存在会自动创建数据库
    static final String COLLECTION_NAME="md";//存储Collection
    public static final String DIR = "E:\\targets";//扫描文件路径
    public static final String FILE_SUFFIX = "html";//扫描文件类型,不设置,默认为所有文件
    public static final String CHARSET = "UTF-8";//文件处理编码格式
    
    public static void main(String[] args) {
        
        MongoClient client =new MongoClient(HOST,PORT);
        MongoDatabase dataBase = client.getDatabase(DATABASE_NAME);
        MongoCollection<Document> collection = dataBase.getCollection(COLLECTION_NAME);
        
        Pool p = new Pool();
        Produce pro = new Produce(p);
        
        Long startTime = System.currentTimeMillis(); 
         
        new Thread(pro).start();//开启从磁盘读取文件的线程
        Thread[] th = new Thread[ThreadNum];
         for(int i=0;i<ThreadNum;i++){//开启向mongoDb写入数据的线程
             Thread a = new Thread(new Customer(p,collection));
             a.start();
             th[i]=a;
         }
        
        boolean res=true;
        while(res){
            if(MongodbBatchInsetUtils.ThreadSizeCount==ThreadNum+1){
               res=false;
               Long endTime = System.currentTimeMillis();
               System.out.println("数据写入完成,吸入总数:"+p.hasUploadToDB+",共花费时间约为:"+(endTime-startTime)+"ms\n");
               
               for(Thread t:th){
                  t.interrupt();//在子线程将数据写完后,中断子线程。
               }
               
               if(null!=client){
                    client.close();//关闭连接
                    collection=null;
                    dataBase=null;
               } 
            } else {
                System.out.println("已写入数据:"+p.hasUploadToDB);
                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            
        }
    }
}

//生产者,从磁盘读取数据
class Produce implements Runnable{

    private Pool pool=null;
    public Produce(Pool pool){
        this.pool= pool;
    }
    
    @Override
    public void run() {
        getFilesInDir(MongodbBatchInsetUtils.DIR, MongodbBatchInsetUtils.FILE_SUFFIX);
        MongodbBatchInsetUtils.ThreadSizeCount++;
        System.out.println("READING FINISHED!!");
    }
    
    //递归读取dir目录中所有以suffix结尾的文件,若不指定文件类型,默认读取所有文件
    public void getFilesInDir(String dir,String suffix){
        if(null!=dir && dir.trim().length()>0){
            File file = new File(dir.trim());
            if(file.exists() && file.isDirectory()){
                File[] flist = file.listFiles();
                if(null!=flist && flist.length>0){
                    for(File f:flist){
                        if(f.isFile()){
                            if(null==suffix|| "".equals(suffix)){
                                pool.putFile(f);
                            }
                            if(null!=suffix &&suffix.trim().length()>0){
                                if(f.getName().endsWith(suffix.trim())){
                                    pool.putFile(f);
                                }else{throw new RuntimeException("找不到对应文件类型");}
                            }
                        }else{
                            getFilesInDir(f.getAbsolutePath(),suffix);
                        }
                    }
                }else{throw new RuntimeException("文件内容为空");}
            }else{throw new RuntimeException("目录不存在,请检查路径正确性!");}
        }
    }
}

//消费者,向mongoDb中写数据
class Customer implements Runnable{
    
    private Pool pool=null;
    MongoCollection<Document> collection = null;
    
    public Customer(Pool pool,MongoCollection<Document> collection){
        this.pool = pool;
        this.collection = collection;
    }
    
    @Override
    public void run() {
        while(true){
            File f = pool.fetchFile();
            if(null==f){
                return ;
            }
            try {
                saveToMonGoDb(f);
//                if(pool.hasUploadToDB%1000==0)
//                    System.out.println("已写入数据:"+pool.hasUploadToDB);
            } catch (MongoWriteException e) {
                System.out.println("写入数据库异常:"+e.getMessage());
                return ;
            }
            
            if(pool.getSize()==0){
                System.out.println(Thread.currentThread().getName()+" :WRITTING FINISHED!!");
                MongodbBatchInsetUtils.ThreadSizeCount++;
            }
        }
    }
    
    //将文件以文件名为id,文件内容为值保存在数据库中
    private void saveToMonGoDb(File file){
        String _id = file.getName().substring(0,file.getName().lastIndexOf("."));
        String content = readFileContext(file, MongodbBatchInsetUtils.CHARSET);
        Document document = new Document("_id",_id).append("content", content);
        collection.insertOne(document);
    }
    
    //读取文件内容,以charSet编码处理
    public static String readFileContext(File file,String charSet)  {
        StringBuilder sb;
        BufferedReader reader=null;
        try {
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), charSet)); 
            String line = null;
            sb = new StringBuilder();
            while(null!=(line = reader.readLine())){
                sb.append(line+"\n");
            }
            return sb.toString();
        }catch (Exception e) {
            System.out.println("文件读取失败!"+e.getMessage());
        }finally{
         try {
            reader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        }
        return null;
    }
    
}

//池,缓冲区
class Pool{

    volatile int size=0;//缓冲区中条目数量
    volatile int limit =1000;
    volatile int hasUploadToDB=0;
    volatile private  List<File> files = new LinkedList<File>();
    
    //入栈
    public  synchronized void putFile(File file){
        while(files.size()==limit){
             try {
                this.wait();
             } catch (InterruptedException e) {
                e.printStackTrace();
             }
         }
        files.add(file);
        notifyAll();
        ++size;
        
    }
    
    //出栈
    public synchronized File fetchFile(){
        while(files.size()==0 ){
             try {
                this.wait();
             } catch (InterruptedException e) {
                 return null;
             }
         }
        File file = null;
        notify();
        if(files.size()>0){
            file = files.remove(0);
            --size;
            ++hasUploadToDB;
        }
        return file;
    }
    
    public int getSize(){
        return this.size;
    }
    
}

 

转载于:https://www.cnblogs.com/alienSmoking/p/5422675.html

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值