使用多线程进行大文件的处理

最新推荐文章于 2024-08-10 20:11:23 发布

十丿四

最新推荐文章于 2024-08-10 20:11:23 发布

阅读量2.5k

点赞数

分类专栏： java 多线程

本文链接：https://blog.csdn.net/wangqingqi20005/article/details/78586253

版权

java 多线程专栏收录该内容

4 篇文章 0 订阅

订阅专栏

场景：

有一批数据由于种种原因需要重新处理。总量大概在180W左右。由于数据库的限制，不太好直接直接查库处理，已经由DBA 直接导出了。全都是流水号。具体实现为拿流水号去接口中查询数据，把符合条件的流水号通过接口同步到其他系统里面。

最开始的做法很简单，通过读取文件流一行一行的获取流水号，通过流水号再进行其他的处理。由于是单线程程序，加上接口的响应慢，处理的数据特别慢。一晚上才跑了20W 左右的数据。单线程的代码先贴上来

public static void main(String[] args) {
		init();
		String filePath = PATH + "1119.txt";
		readTxtFile(filePath);
	}

public static void readTxtFile(String filePath) {
		String lineTxt = null;
		try {
			String encoding = "GBK";
			File file = new File(filePath);
			if (file.isFile() && file.exists()) { // 判断文件是否存在
				InputStreamReader read = new InputStreamReader(
						new FileInputStream(file), encoding);// 考虑到编码格式
				BufferedReader bufferedReader = new BufferedReader(read);
				while ((lineTxt = bufferedReader.readLine()) != null) {
					String billCode = lineTxt;

					List<Map<String, Object>> list = getTrace(billCode);
					if (list == null) {
						write(JsonUtil.toJSON(billCode), "请求超时1.txt");
						continue;
					}
					if (list != null && list.size() == 0) {
						write(JsonUtil.toJSON(billCode), "无物流信息2.txt");
					}

					for (Map<String, Object> map : list) {
						String str = map.get("traces") + "";
						if ("".equals(str)) {
							System.out.println("出现异常   billCode : " + lineTxt
									+ "  data :" + map.toString());
							continue;
						}
						List<CommonTrace> traces = JsonUtil.parseArray(str,
								CommonTrace.class);
						List<CommonTrace> rec = new ArrayList<CommonTrace>();
						for (CommonTrace trace : traces) {
							if ("签收".equals(trace.getScanType())) {
								// pushXuanmi(trace, strs[0]);
								rec.add(trace);
							}
						}
						if (rec.size() < 1) {
							write(billCode, "未签收单号.txt");
						}else{
							buildTaoBaoData(rec, billCode, billCode);
						}

					}
				}
				read.close();
			} else {
				System.out.println("找不到指定的文件");
			}
		} catch (Exception e) {
			System.out.println("读取文件内容出错," + lineTxt);
			e.printStackTrace();
		}
	}

所有开始考虑使用多线程进行处理。初步想法是一次性读取10条左右，再把每个流水丢给每个线程去处理。

比较重要的API 是 bufferedReader.lines().limit(30) 方法。会一次读取30条数据，等同于 bufferedReader.readLine() ；

public static void main(String[] args) {
		init();
		String filePath = PATH + "1119.txt";
		readTxtFile(filePath);
	}

public static void readTxtFile(String filePath) {
		String lineTxt = null;
		try {
			String encoding = "GBK";
			File file = new File(filePath);
			if (file.isFile() && file.exists()) { // 判断文件是否存在

				InputStreamReader read = new InputStreamReader(
						new FileInputStream(file), encoding);// 考虑到编码格式
				BufferedReader bufferedReader = new BufferedReader(read);
				Object[] o;
				while ((o = bufferedReader.lines().limit(30).toArray()) != null) {
					List<Object> billCodes = Arrays.asList(o);
					for(int i=0;i<30;i++){
						new Thread(new Task(billCodes.get(i)+"")).start();
						Thread.sleep(10);
					}
				}
				read.close();
			} else {
				System.out.println("找不到指定的文件");
			}
		} catch (Exception e) {
			System.out.println("读取文件内容出错," + lineTxt);
			e.printStackTrace();
		}
	}

class Task implements Runnable {

	private String billCode;
	
	@Override
	public void run() {
		try {
			List<Map<String, Object>> list = TaoBaoTracePushertest2.getTrace(billCode);
			if (list == null) {
				TaoBaoTracePushertest2.write(JsonUtil.toJSON(billCode), "请求超时1.txt");
			}
			if (list != null && list.size() == 0) {
				TaoBaoTracePushertest2.write(JsonUtil.toJSON(billCode), "无物流信息2.txt");
			}
			for (Map<String, Object> map : list) {
				String str = map.get("traces") + "";
				if ("".equals(str)) {
					System.out.println("出现异常   billCode : " + billCode
							+ "  data :" + map.toString());
					continue;
				}
				List<CommonTrace> traces = JsonUtil.parseArray(str,
						CommonTrace.class);
				List<CommonTrace> rec = new ArrayList<CommonTrace>();
				for (CommonTrace trace : traces) {
					if ("签收".equals(trace.getScanType())) {
						rec.add(trace);
					}
				}
				if (rec.size() < 1) {
					TaoBaoTracePushertest2.write(billCode, "未签收单号.txt");
				} else {
					TaoBaoTracePushertest2.buildTaoBaoData(rec, billCode, billCode);
				}

			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

	public String getBillCode() {
		return billCode;
	}

	public void setBillCode(String billCode) {
		this.billCode = billCode;
	}

	public Task(String billCode) {
		super();
		this.billCode = billCode;
	}

}

忽略中间的业务逻辑。。。