Java分析淘宝搜索页数据

Fenxi.java

package com.code;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

import jxl.Workbook;
import jxl.write.Label;
import jxl.write.Number;
import jxl.write.WritableCellFormat;
import jxl.write.WritableFont;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;

public class Fenxi {
	
	public static void main(String args[]) {
        //File file = new File("E:/taobao/first/连带销售.txt");
		String str = "1115pm";
		shengcheng(str);
        File file = new File("E:/taobao/first/"+str+".txt");
        //List<String> itemlist=getItem(txt2String(file));
        //System.out.println(itemlist.size());
        //System.out.println(itemlist.get(47));
        //System.out.println(getShangpin(txt2String(file)).spShop);
        
        //List<Shangpin> getShangpins = getShangpins(getItem(txt2String(file)));
        //int i = 1;
        //for(Shangpin sp:getShangpins){
        //	System.out.println(i++ + "." + sp.getSpName());
        //}
        //writeExcel(getShangpins,"E:/taobao/first/"+str+".xls",1);
    }
	
	public static void shengcheng(String str){
		File file1 = new File("E:/taobao/first/"+str+"-1.txt");
		List<Shangpin> getShangpins = getShangpins(getItem(txt2String(file1)));
		writeExcel(getShangpins,str,0);
		File file2 = new File("E:/taobao/first/"+str+"-2.txt");
		getShangpins = getShangpins(getItem(txt2String(file2)));
		writeExcel(getShangpins,str,1);
	}
	
	public static void writeExcel(List<Shangpin> records,String str,int f){  
		String[] headers = {"序号","ID","店名", "标题",  "价格","发货地", "数量"};
		int[] columnWidth = {10,20,25,65,12,20,20};
		try {  
			String path = "E:/taobao/first/"+str+".xls";
			WritableWorkbook book = null;
			File dbfFile = new File(path);
	        if (!dbfFile.exists() || dbfFile.isDirectory()) {
	        	book = Workbook.createWorkbook(new File(path));
	        }else{
	        	//打开文件  
	            Workbook wb = Workbook.getWorkbook(new File(path));  
	            //打开一个文本的副本,并指定数据写回到源文件  
	            book = Workbook.createWorkbook(new File(path),wb);
	        }

	        
            
            // 生成名为“sheet1”的工作表,参数0表示这是第一页  
            WritableSheet sheet = book.createSheet(f==0?"综合排序":"销量排序", f); 
            WritableFont bold = new WritableFont(WritableFont.ARIAL, 14, WritableFont.BOLD);
            WritableCellFormat wcfFormat = new WritableCellFormat(bold);
            wcfFormat.setAlignment(jxl.format.Alignment.CENTRE);//单元格中的内容水平方向居中
            for (int i = 0; i < headers.length; i++) {  
                Label label = new Label(i, 0 , headers[i],wcfFormat);  
                sheet.setColumnView(i, columnWidth[i]);
                sheet.addCell(label);  
            }  
              
            int c = 1;  
            for (Shangpin record : records) {  
            	sheet.addCell(new Number(0, c, c));  
                sheet.addCell(new Label(1, c, record.getSpId()));  
                sheet.addCell(new Label(2, c, record.getSpShop()));  
                sheet.addCell(new Label(3, c, record.getSpName())); 
                System.out.println(c + "." + record.getSpName());
                //sheet.addCell(new Number(3, c, record.getQuantity()));  
                sheet.addCell(new Label(4, c, record.getSpprice()));  
                sheet.addCell(new Label(5, c, record.getSpLocation()));  
                sheet.addCell(new Label(6, c, record.getSpCnt()));  
                c++;  
            }
            /*// 在Label对象的构造子中指名单元格位置是第一列第一行(0,0),单元格内容为string  
            Label label = new Label(0, 0, "string");  
            // 将定义好的单元格添加到工作表中  
            sheet.addCell(label);  
            // 生成一个保存数字的单元格,单元格位置是第二列,第一行,单元格的内容为1234.5  
            Number number = new Number(1, 0, 1234.5);  
            sheet.addCell(number);;  
            // 生成一个保存日期的单元格,单元格位置是第三列,第一行,单元格的内容为当前日期  
            DateTime dtime = new DateTime(2, 0, new Date());  
            sheet.addCell(dtime);*/  
            // 写入数据并关闭文件  
            book.write();  
            book.close();  
        } catch (Exception e) {  
            System.out.println(e);  
        }             
    } 
	public static List<Shangpin> getShangpins(List<String> itemlist){
		List<Shangpin> spList = new ArrayList<Shangpin>();
		for(String item:itemlist){
			Shangpin sp = getShangpin(item);
			spList.add(sp);
		}
		return spList;
	}
	
	public static Shangpin getShangpin(String item){
		Shangpin sp = new Shangpin();
		int s0=0,s1=0,s2=0;
		s1 = item.indexOf("data-nid=");//data-nid="
		s2 = item.indexOf("\"",s1+12);
		sp.setSpId(item.substring(s1+10, s2));
		s1 = item.indexOf("¥");//¥   <span>¥</span><strong>89.90</strong>
		s2 = item.indexOf("</strong>",s1);
		sp.setSpprice(item.substring(s1+16, s2));
		s1 = item.indexOf("ItemPic img");//ItemPic img
		s1 = item.indexOf("alt=",s1);//
		s2 = item.indexOf("\"",s1+10);
		sp.setSpName(item.substring(s1+5, s2));
		s1 = item.indexOf("deal-cnt");//¥   <div class="deal-cnt">2804人付款</div>
		s2 = item.indexOf("人",s1);
		sp.setSpCnt(item.substring(s1+10, s2));
		s1 = item.indexOf("location");//<div class="location">上海</div>
		s2 = item.indexOf("</div>",s1);
		sp.setSpLocation(item.substring(s1+10, s2));
		//s1 = item.indexOf("ShopInfo");//J_ShopInfo
		s1 = item.indexOf("dsrs");//
		s1 = item.indexOf("<span>",s1+120);//
		s2 = item.indexOf("</span>",s1);
		sp.setSpShop(item.substring(s1+6, s2));
		return sp ;
	}
	
	//MouserOnverReq
	public static List<String> getItem(String listString){
		List<String> itemlist = new ArrayList<String>();		
		int item0=0,item1=0,item2=0;
		int lastitem = listString.lastIndexOf("MouserOnverReq");
		while(item2<lastitem-19){
			String item="";
			item0 = listString.indexOf("MouserOnverReq",item2);
			item1 = item0-19;
			item2 = listString.indexOf("MouserOnverReq",item1+100)-19;
			item = listString.substring(item1, item2);
			itemlist.add(item);
		}
		itemlist.add(listString.substring(item2));
		return itemlist;
	}
	
	public static String txt2String(File file){
        StringBuilder result = new StringBuilder();
        try{
            BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件
            String s = null;
            while((s = br.readLine())!=null){//使用readLine方法,一次读一行
                result.append(System.lineSeparator()+s);
            }
            br.close();    
        }catch(Exception e){
            e.printStackTrace();
        }
        return result.toString();
    }

}

 

 

Shangpin.java

package com.code;

public class Shangpin {
	
	String spId;  //id
	String spName;//标题
	String spprice;//价格
	String spShop;//店名
	String spLocation;//发货地
	String spCnt;//XX人付款
	//
	public String getSpId() {
		return spId;
	}
	public void setSpId(String spId) {
		this.spId = spId;
	}
	public String getSpName() {
		return spName;
	}
	public void setSpName(String spName) {
		this.spName = spName;
	}
	public String getSpprice() {
		return spprice;
	}
	public void setSpprice(String spprice) {
		this.spprice = spprice;
	}
	public String getSpShop() {
		return spShop;
	}
	public void setSpShop(String spShop) {
		this.spShop = spShop;
	}
	public String getSpLocation() {
		return spLocation;
	}
	public void setSpLocation(String spLocation) {
		this.spLocation = spLocation;
	}
	public String getSpCnt() {
		return spCnt;
	}
	public void setSpCnt(String spCnt) {
		this.spCnt = spCnt;
	}
}

开源资料:Java分析淘宝搜索页数据

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值