Main.java
package cleandata; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.concurrent.LinkedBlockingQueue; public class Main { public static void main(String args[]) throws Exception{ LinkedBlockingQueue<String> queue = new LinkedBlockingQueue<String>(); //线程数量 int threadnum = 10; for(int i = 0; i < threadnum; i++) { CleandataRunnable r = new CleandataRunnable(queue, "r" + i); r.start(); } findfile(queue); } public static void findfile(LinkedBlockingQueue<String> queue) throws Exception { final String dirPath = "/home/tqhy/json-data"; File f = new File(dirPath); File[] fs = f.listFiles(); int count = 0; BufferedReader reader = null; String str = null; try { for(File file:fs) { //file: D:\test\3f008a67753f44496647c30d46a635cf.json if(file.getName().endsWith(".json")) { reader = new BufferedReader(new FileReader(file)); while((str=reader.readLine()) != null) { queue.put(str); if (queue.size() > 500) { System.out.println("queue is full,wait 1000 ms"); Thread.sleep(1000); } count++; System.out.println("all count: " + count); //System.out.println("all count:" + count + " queue's number:" + queue.size()); } System.out.println("read over" + file.getName()); reader.close(); } } } catch (Exception e) {} } /* public static void findfile1 (LinkedBlockingQueue queue) throws Exception { CleandataRunnable r1 = new CleandataRunnable(queue,"r1"); CleandataRunnable r2 = new CleandataRunnable(queue,"r2"); CleandataRunnable r3 = new CleandataRunnable(queue,"r3"); CleandataRunnable r4 = new CleandataRunnable(queue,"r4"); r1.start(); r2.start(); r3.start(); r4.start(); String dirPath = "D:\\test"; File f = new File(dirPath); File[] fs = f.listFiles(); int count=0; for(File file:fs) { //file: D:\test\3f008a67753f44496647c30d46a635cf.json System.out.println(file); if(file.getName().endsWith(".json")) { queue.put(file); } } }*/ }
package cleandata; import java.io.FileWriter; import java.io.IOException; import java.util.concurrent.LinkedBlockingQueue; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import redis.clients.jedis.Jedis; public final class CleandataRunnable extends Thread { private String threadName; LinkedBlockingQueue<?> queue; //write category file path static final String FILEPATH = "/home/tqhy/yl/category/"; //write image path static final String IMAPATH = "/home/tqhy/yl/img/"; Jedis jedis = new Jedis("127.0.0.1", 6379); CleandataRunnable(LinkedBlockingQueue<?> queue, String name) { this.queue = queue; threadName = name; System.out.println("Creating " + threadName); } @Override public void run() { System.out.println("Running " + threadName); FileUtil.createFile(FILEPATH); //Jedis jedis = new Jedis("127.0.0.1", 6379); try { while (true) { long startTime = System.currentTimeMillis();// current time //long startMem = Runtime.getRuntime().freeMemory(); JSONObject jsondata = JSONObject.parseObject((String) queue.take()); /** * 排除分类3空项 */ String category3 = jsondata.get("category3").toString(); if(category3.length() <= 0) { System.out.println("category3为空 id: " + jsondata.get("id")); continue; } /** * 验证图片 */ JSONArray images = JSONArray.parseArray(jsondata.get("images").toString()); JSONArray newimages = new JSONArray(); for (int i = 0, len=images.size();i < len; i++) { String base64 = images.getJSONObject(i).getString("base64"); String id = images.getJSONObject(i).getString("id"); String tmpPath = ImgUtil.GenerateImage(base64, id, IMAPATH + jsondata.getString("id"));// 合成图片 if(tmpPath != null) { JSONObject imageinfo = new JSONObject(); imageinfo.put("tmpPath", tmpPath); imageinfo.put("id", id); newimages.add(imageinfo); } } /** * 排除图片不合格项 */ if(newimages.size() == 0) { System.out.println("没有图片 id: " + jsondata.get("id")); continue; } jsondata.put("images", newimages); /** * 新json写进分类文件 */ FileWriter writer = new FileWriter(FILEPATH + category3 + ".json", true); writer.write(jsondata.toJSONString()); writer.write(System.getProperty("line.separator")); writer.close(); /** * 分类放redis里 */ setObject(category3, jedis); long endTime = System.currentTimeMillis(); //long endMem = Runtime.getRuntime().freeMemory(); System.out.println("程序运行时间:" + (endTime - startTime) + "ms " + threadName);// + " Use memory: "+ (startMem - endMem));// + " nowTime" + new Date()); } } catch (InterruptedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 分类存入redis,计数器 * @param category * @param jedis */ public void setObject(String category,Jedis jedis) { if (jedis.get(category) == null) { jedis.set(category, "1"); } else { int count = Integer.parseInt(jedis.get(category)); count++; jedis.set(category, String.valueOf(count)); } } /* public void run() { System.out.println("Running " + threadName); try { while (queue.isEmpty()) { Thread.currentThread().sleep(10); } } catch (InterruptedException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } // 写入文件地址 //final String filePath = "D:\\test1\\category\\"; File sf = new File(FILEPATH); if (!sf.exists()) { sf.mkdirs(); } // 存入图片地址 //final String imgPath = "D:\\test1\\img\\"; Jedis jedis = new Jedis("127.0.0.1", 6379); String category3=null; //String Eid=null; String base64=null; String id=null; //String imagePath=null; String tmpPath=null; //String catePath=null; JSONObject jsondata=null; long startTime=0; long endTime=0; JSONArray images=null; //boolean verify = true; JSONArray newimages=null; JSONObject imageinfo=null; String content=null; FileWriter writer=null; //File tmpimage=null; FileInputStream fi=null; //BufferedImage sourceImg=null; //int picWidth=0; while (!queue.isEmpty()) { try { startTime = System.currentTimeMillis();// 获取当前时间 jsondata = (JSONObject) queue.take(); images = JSONArray.parseArray(jsondata.get("images").toString()); newimages = new JSONArray(); //Eid = jsondata.getString("id"); // 商品ID //verify = true; // 图片合成、转存 for (int i = 0; i < images.size(); i++) { base64 = images.getJSONObject(i).getString("base64"); id = images.getJSONObject(i).getString("id"); //imagePath = IMAPATH + Eid; tmpPath = GenerateImage(base64, id, IMAPATH + jsondata.getString("id"));// 合成图片 // 验证图片 //tmpimage = new File(tmpPath); fi = new FileInputStream(new File(tmpPath)); //sourceImg = ImageIO.read(fi); //picWidth = ImageIO.read(fi).getWidth(); if (ImageIO.read(fi).getWidth() < 0) { //verify = false; System.out.println("图片损坏:" + tmpPath); continue; } fi.close(); imageinfo = new JSONObject(); imageinfo.put("tmpPath", tmpPath); imageinfo.put("id", id); newimages.add(imageinfo); } category3 = jsondata.get("category3").toString(); //if (verify == true) { jsondata.put("images", newimages); content = jsondata.toJSONString(); // 当前分类文件地址 //catePath = FILEPATH + category3 + ".json"; writer = new FileWriter(FILEPATH + category3 + ".json", true); writer.write(content); writer.write(System.getProperty("line.separator")); writer.close(); setObject(category3,jedis);// 分类放redis里 //} endTime = System.currentTimeMillis(); System.out.println( "程序运行时间:" + (endTime - startTime) + "ms " + threadName + " 处理category3:" + category3); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }*/ /** * 放文件进队列,所以处理的是文件 * * @param category */ // @Override /* * public void run1(){ System.out.println("Running "+threadName); while(true) { * try { File file = (File) queue.take(); BufferedReader reader = new * BufferedReader(new FileReader(file)); String str; int count=0; //存入图片地址 * String imgPath = "D:\\test1\\img\\"; //写入文件地址 String filePath = * "D:\\test1\\category\\"; File sf = new File(filePath); if (!sf.exists()) { * sf.mkdirs(); } while((str=reader.readLine())!=null) { JSONObject jsondata = * JSONObject.parseObject(str); String category3 = * jsondata.get("category3").toString(); if(category3.length()>0) { * * count++; * System.out.println("count:"+count+" 当前分类:"+jsondata.get("category3"). * toString()+"threadName:"+threadName); JSONArray images = * JSONArray.parseArray(jsondata.get("images").toString()); JSONArray newimages * = new JSONArray(); String Eid=jsondata.getString("id"); //商品ID boolean * verify=true; //图片合成、转存 for(int i=0;i<images.size();i++) { * * String base64=images.getJSONObject(i).getString("base64"); String * id=images.getJSONObject(i).getString("id"); String imagePath = imgPath+Eid; * * String tmpPath=GenerateImage(base64,id,imagePath);//合成图片 //验证图片 File tmpimage * = new File(tmpPath); FileInputStream fi = new FileInputStream(tmpimage); * BufferedImage sourceImg =ImageIO.read(fi); int picWidth= * sourceImg.getWidth(); if(picWidth<0) { verify=false; * System.out.println("图片损坏:"+tmpPath); } fi.close(); * * JSONObject imageinfo = new JSONObject(); imageinfo.put("tmpPath", tmpPath); * imageinfo.put("id", id); newimages.add(imageinfo); * * } * * if(verify==true) { jsondata.put("images", newimages); String * content=jsondata.toJSONString(); //当前分类文件地址 String catePath = * filePath+category3+".json"; FileWriter writer = new FileWriter(catePath, * true); writer.write(content); * writer.write(System.getProperty("line.separator")); writer.close(); * setObject(category3);//分类放redis里 } } } reader.close(); * * } catch (InterruptedException e) { // TODO Auto-generated catch block * e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } * * } */ }
package cleandata; import java.io.File; public final class FileUtil { /** * 文件夹是否存在,不存在创建 * @param path */ public static void createFile(String path) { File file = new File(path); if (!file.exists()) { file.mkdirs(); } } }
package cleandata; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Iterator; import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.stream.ImageInputStream; import javax.imageio.stream.MemoryCacheImageInputStream; import sun.misc.BASE64Decoder; public final class ImgUtil { /** * base64字符合成图片 * @param imgStr * @param id * @param path * @return */ public static String GenerateImage(String imgStr, String id, String path) { FileUtil.createFile(path); // 图像数据为空 if (imgStr == null) return null; //BASE64Decoder decoder = new BASE64Decoder(); try { // Base64解码 byte[] b = new BASE64Decoder().decodeBuffer(imgStr); //判断文件大小,格式是否符合要求 if(!checkSize(b)) { System.out.println("图片大小不符合要求"+path+ "/" + id); return null; } if(!verifyJPG(b)) { System.out.println("图片格式不是jpg"); return null; } for (int i = 0,len=b.length; i < len; ++i) { if (b[i] < 0) {// 调整异常数据 b[i] += 256; } } // 生成jpg图片 String imgFilePath = path + "/" + id + ".jpg"; OutputStream out = new FileOutputStream(imgFilePath); out.write(b); out.flush(); out.close(); //判断图片能否打开 FileInputStream fi = new FileInputStream(new File(imgFilePath)); if(ImageIO.read(fi) == null) { fi.close(); System.out.println("图片打不开"); return null; } fi.close(); return imgFilePath; } catch (Exception e) { e.printStackTrace(); return null; } } /** * 校验格式是否为jpeg * @param bytes * @return */ public static boolean verifyJPG(byte[] bytes) { String suffix = null; try { ImageInputStream imageInputstream = new MemoryCacheImageInputStream(new ByteArrayInputStream(bytes)); //不使用磁盘缓存 ImageIO.setUseCache(false); Iterator<ImageReader> it = ImageIO.getImageReaders(imageInputstream); if (it.hasNext()) { ImageReader imageReader = it.next(); suffix = imageReader.getFormatName().trim().toLowerCase(); } }catch (IOException e) { e.printStackTrace(); } //System.out.println("suffix: " + suffix); return suffix.equals("jpeg"); } /** * 校验文件大小 * * @param * @return */ public static boolean checkSize(byte[] bytes) { //符合条件的照片大小(可配置) 单位:M double imgSize = 0.002; //图片转base64字符串一般会大,这个变量就是设置偏移量。可配置在文件中,随时修改。目前配的是0。后续看情况适当做修改 double deviation = 0.0; int length = bytes.length; //原照片大小 double size = (double) length / 1024 / 1024 * (1 - deviation); //System.out.println("照片大小为:" + size + "M"); return size > imgSize; } }