Match_ToTerm.java

Match_ToTerm.java将match里的东西都变成分词结果,分词空格隔开,产品间可替代还是用逗号,搭配用;隔开,输出文件ToTerms2.txt
MatchDeleteLine.txt,输出文件ToTerms3.txt,将dim_fashion_matches.txt中第一列删除。

package test;




import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import redis.clients.jedis.Jedis;

public class Match_ToTerm {


    public static void getStrings(int count) {
        FileInputStream fis;
        InputStreamReader isr;
        BufferedReader br = null;
        Jedis jedis;
        String host = "10.20.100.5";
        int port = 6379;
        jedis = new Jedis(host, port);
        try {

//fis = new FileInputStream("D://te.txt");
//fis = new FileInputStream("D://dim_items.txt");   
fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_items.txt");
isr = new InputStreamReader(fis, "UTF-8");
            br = new BufferedReader(isr);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        String[] strings = new String[1];
        int i=0;
        String str;
    //  ArrayList<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
        try {
            while ((str = br.readLine()) != null) {
                count++;
                strings[0] = str;
                String[] str1 = str.split(" ");
                //分割“ ”
                Map<String, Object> map = new HashMap<String, Object>();
                for(int k=0;k<3;k++)
                {
                    map.put("item_id",str1[0]);
                    map.put("cat_id", str1[1]);
                    map.put("terms", str1[2]);
                      jedis.set(str1[0].getBytes(), writeObject(map));


                }
        //      list.add(map);

            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    //  return list;
    }


    private static byte[] writeObject(Object obj)//写对象
    {
          byte[] array = null;
        try
        {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            ObjectOutputStream os = new ObjectOutputStream(baos);
            os.writeObject(obj);
            array =baos.toByteArray();
           // System.out.println("序列化成功。");
           // System.out.println(array);
        }
        catch(Exception ex)
        {
            ex.printStackTrace();
        }
        return array;
    }

    private static Object readObject( byte[] array)//读对象
    {
        try
        {
            ByteArrayInputStream bais = new ByteArrayInputStream(array);
            ObjectInputStream is = new ObjectInputStream(bais);

            Object temp = (Object) is.readObject();


            if (temp != null)
            {
              //  System.out.println("反序列化成功。");
//              System.out.println("age"+temp.age);
              //  System.out.println(temp);
                return temp;
            }
        }
        catch(Exception ex)
        {
            ex.printStackTrace();
        }

        return null;
    }

   public static void appendMethod(String fileName, String content) {
       try {
           //打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
           FileWriter writer = new FileWriter(fileName, true);
           writer.write(content);
           writer.close();
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

    public static void main(String args[])
    {
        Jedis jedis;
        String host = "10.20.100.5";
        int port = 6379;
        jedis = new Jedis(host, port);
        int count=0;
        getStrings(count);
        String fileName = "/public/home/dsj/Public/zfy/resources/ToTerms2.txt";
        String content;

        FileInputStream fis;
        InputStreamReader isr;
        BufferedReader br = null;
        try {
            fis = new FileInputStream("/public/home/dsj/Public/zfy/resources/dim_fashion_matchsets.txt");
            isr = new InputStreamReader(fis, "UTF-8");
            br = new BufferedReader(isr);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        String[] strings = new String[1];

        String str;
        try {
            while ((str = br.readLine()) != null) {
                strings[0] = str;
                String[] str1 = str.split(" ");
//              System.out.println("str1[1]"+str1[1]);
                String[] str2 = str1[1].split(";");
//          System.out.println(str2.length);
//          System.out.println(str2[0]);
            for(int k=0;k<str2.length;k++)
                {
    //          System.out.println(str2[k].length());
                if(str2[k].length()>7)
                {
                String[] str3 = str2[k].split(",");
                for(int j=0;j<str3.length;j++)
                {
                //System.out.println(str3[j]);
                //need replace
            //  map=list.get(binarySearch(list, str3[j]));
                 byte[] mapbyte=jedis.get(str3[j].getBytes());
                 Object mapobj=  readObject(mapbyte);
                //System.out.println("id"+((Map<String, Object>) mapobj).get("terms"));
                //write file append
                content=(String)((Map<String, Object>) mapobj).get("terms");

                appendMethod(fileName, content.replaceAll(",", " "));
                appendMethod(fileName, ",");
                }
                }else
                {
                //  System.out.println(str2[k]);
                    //need replace
                //  map=list.get(binarySearch(list, str2[k]));
                     byte[] mapbyte=jedis.get(str2[k].getBytes());
                     Object mapobj=  readObject(mapbyte);
                    //System.out.println("id"+((Map<String, Object>) mapobj).get("terms"));
                    //write file append
                    content=(String)((Map<String, Object>) mapobj).get("terms");
                    appendMethod(fileName, content.replaceAll(",", " "));
                    appendMethod(fileName, ";");
                }

                }
            appendMethod(fileName, "\n");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }


    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值