抓取网页数据

import com.mvc.bean.Product;
import com.mvc.util.ExcelUtils;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;


/**
 * 抓取网页数据
 */
public class TestMain {

    private static int endIx = 0;
    private static int index = 0;
    public static void main(String[] args) {
        try {
            String buf = "<body data-spm=\"7874262\" style=\"font-size: 12px;\"><script type=\"text/javascript\" src=\"//g.alicdn.com/alilog/??s/7.5.8/plugin/aplus_windvane2.js,s/7.6.8/plugin/aplus_client.js,aplus_cplugin/0.1.2/monitor.js,s/7.6.8/aplus_wap.js,aplus_cplugin/0.1.2/aol.js,s/7.6.8/plugin/aplus_spmact.js\" async=\"\" defer=\"\"></script><script type=\"text/javascript\" id=\"aplus-sufei\" src=\"//g.alicdn.com/secdev/sufei_data/3.3.5/index.js\" async=\"\" defer=\"\"></script> <div data-v-15130f34=\"\" class=\"itemHeader\"><div data-v-15130f34=\"\" class=\"it-flex it-box\"><div data-v-15130f34=\"\">关于</div> <div data-v-15130f34=\"\" class=\"it-icon\"></div> <div data-v-15130f34=\"\" class=\"it-name-box\"><div data-v-15130f34=\"\" class=\"it-name\">破洞牛仔裤女春秋2018新款韩版显瘦社会复古港味高腰秋装九分裤夏</div></div>的<div data-v-15130f34=\"\" class=\"it-count\"><span data-v-15130f34=\"\" class=\"count\">199</span>个问题</div></div></div> <div data-v-9da5c5fc=\"\" class=\"container\"><div data-v-0ba1ee29=\"\" data-v-9da5c5fc=\"\" id=\"wdj\" class=\"c_scroll c_scroll_touch\"><div data-v-4e8890f6=\"\" data-v-9da5c5fc=\"\" class=\"c_tab\" data-v-0ba1ee29=\"\"><!----><!----><div data-v-4e8890f6=\"\" class=\"tab-body\" style=\"width: 100%; min-height: 4.41333rem;\"><div data-v-4e8890f6=\"\" class=\"tab-main\" style=\"transform: translateX(0%);\"><div data-v-9da5c5fc=\"\" data-v-4e8890f6=\"\"><div data-v-9da5c5fc=\"\" mode=\"out-in\" class=\"body\" data-v-4e8890f6=\"\"><div data-v-fa698328=\"\" data-v-9da5c5fc=\"\" class=\"card tags show\"><div data-v-fa698328=\"\" class=\"box\"><div data-v-fa698328=\"\" class=\"tag selected\">\n" +
                    "      全部\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      裤子\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      弹力\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      身高\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      质量\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      弹性\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      褪色\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      显瘦\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      尺码\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      夏天\n" +
                    "    </div><div data-v-fa698328=\"\" class=\"tag\">\n" +
                    "      体重\n" +
                    "    </div></div></div><div data-v-853baab2=\"\" data-v-9da5c5fc=\"\" class=\"top-anchor card\" card=\"[object Object]\"></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">弹性怎么样</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">不是那种弹性很大的,修身的</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-24</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">怎么样弹性</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">说实话很好看,弹性也好,好显瘦</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 2 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-6</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 10 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">质量怎么样</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">挺好的</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 3 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-21</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">质量怎么样</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">还是可以的,夏天穿也很舒服</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-12</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 15 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">穿着显瘦吗?有弹性吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">可以滴</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 9 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-25</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">布料舒服吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">舒服的,很有弹性</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 2 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-28</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 39 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">掉色吗裤子</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">还没洗呢</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 4 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-10</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">质量好么,褪色么</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">不褪色  比图片颜色浅</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 3 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-5</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 29 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">色差大吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">不大很漂亮,我一开始也怕不好看邮回来一点也没有色差还很合身</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 5 天前</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">尺码标准吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">准的</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 3 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-10</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 14 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">腰到肚脐了吗?</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">到了,在肚脐处</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-16</div></div></div></div> <!----></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">165  110斤穿多大码?  显瘦吗?</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">28码应该可以,个人穿着感觉不错!</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-21</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">160 100斤 穿什么码</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">我167,110斤,穿29腿刚好,腰太大</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-8</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 15 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">裤子弹性怎么样夏天穿热吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">弹性还行,夏天穿有一点</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 3 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-27</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">夏天穿热吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">阴天,下雨天刚刚好</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 2 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-8</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 8 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">穿上舒服吗?</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">舒服,比较薄,很凉快</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 7 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-23</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">请问跟着平时穿的尺码买吗</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">嗯嗯,是的,尺码很平时的一样</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 1 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-5</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 31 个相似问题</span></div></div></div><div data-v-39986688=\"\" data-v-9da5c5fc=\"\" class=\"card group ios8\"><div data-v-39986688=\"\" class=\"card-box\"><div data-v-39986688=\"\"><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">弹力怎么样(-o-)/</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">可以的</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 2 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-7-9</div></div></div><div data-v-39986688=\"\" class=\"qa-card\"><div data-v-3543fb8e=\"\" data-v-39986688=\"\" class=\"question mgb16\"><div data-v-3543fb8e=\"\" class=\"icon\">问</div> <div data-v-3543fb8e=\"\" class=\"title text\">1米65    98斤  穿多大?</div></div> <div data-v-72a4ccd1=\"\" data-v-39986688=\"\" class=\"answer mgb22\"><div data-v-72a4ccd1=\"\" class=\"icon\">答</div> <p data-v-72a4ccd1=\"\" class=\"title text\">26 27吧</p></div> <div data-v-39986688=\"\" class=\"count\"><div data-v-39986688=\"\" class=\"info\">全部<span data-v-39986688=\"\" class=\"num\"> 4 </span>个回答</div> <div data-v-39986688=\"\" style=\"flex: 1 1 0%;\"></div> <div data-v-3374e320=\"\" data-v-39986688=\"\" class=\"time\">更新于 2018-6-18</div></div></div></div> <div data-v-39986688=\"\" class=\"last-buttion\"><span data-v-39986688=\"\">还有 16 个相似问题</span></div></div></div></div></div></div></div></div><!----><div data-v-3b1c7dd0=\"\" data-v-0ba1ee29=\"\" class=\"c_end\"><div data-v-3b1c7dd0=\"\" class=\"text\">你看到我的底线了</div></div><div data-v-0ba1ee29=\"\" style=\"height: 1.52rem; width: 10rem;\"></div></div><!----><div data-v-208f898a=\"\" data-v-9da5c5fc=\"\" mode=\"in-out\" class=\"c_operation\"><div data-v-208f898a=\"\" class=\"opr\" style=\"display: none;\"><div data-v-208f898a=\"\" mode=\"out-in\"><div data-v-208f898a=\"\" class=\"box\" style=\"display: none;\"><div data-v-208f898a=\"\" class=\"options cancel\">取消</div></div></div></div></div><div data-v-b29b1ca4=\"\" data-v-9da5c5fc=\"\"></div><div class=\"x\"><!----> <div data-v-5af913e8=\"\" class=\"publish\"><!----> <!----> <div data-v-5af913e8=\"\" class=\"input-box\" style=\"height: 1.52rem; bottom: 0px;\"><div data-v-5af913e8=\"\" class=\"move-bug-div\"></div> <div data-v-5af913e8=\"\" class=\"i-box\"><textarea data-v-5af913e8=\"\" id=\"JInput\" type=\"text\" rows=\"1\" placeholder=\"向 2455 位已买过的人提问,4~40字\" class=\"input\"></textarea></div> <div data-v-5af913e8=\"\" class=\"bt\">提问</div></div></div></div></div> <script keeplive=\"\" src=\"//g.alicdn.com/mtb/??vue/2.4.0/vue.min.js,lib-windvane/2.1.8/windvane.js,lib-promise/3.1.3/polyfillB.js,lib-env/1.5.15/env.js,lib-mtop/2.3.6/mtop.js,lib-login/1.5.8/login.js,lib-httpurl/1.3.2/httpurl.js?v=2433438933_121885\"></script> <script>window.navigator.userAgent.indexOf(\"iPhone\")>-1&&(function(d,f){if(!f||!f.mtop||f.mtop.ERROR){throw new Error(\"Mtop 初始化失败!请参考Mtop文档(emoji)\");return}function e(c){var b=this;var i=this.options;var a=this.params;var j=false;if(typeof a.data===\"string\"){a.data=a.data.replace(/(\\ud83c[\\udf00-\\udfff]|\\ud83d[\\udc00-\\ude4f]|\\ud83d[\\ude80-\\udeff])/g,function(g){j=true;return\"\\\\u\"+g.charCodeAt(0).toString(16)+\"\\\\u\"+g.charCodeAt(1).toString(16)})}return c().then(function(){var g=i.retJson;var h=g.ret;if(h instanceof Array){h=h.join(\",\")}if(j&&h.indexOf(\"FAIL_SYS_ILLEGAL_ACCESS\")>-1){return b.__sequence([b.__processToken,b.__processRequestUrl,b.__processUnitPrefix,b.middlewares,b.__processRequest])}})}f.mtop.middlewares.push(e)})(window,window.lib||(window.lib={}));</script> <script src=\"//g.alicdn.com/mtb/app-ask-2017/2.0.2/main/index-min.js?v=2389285280_135500\"></script>  <script type=\"text/javascript\" src=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/lib/jquery.js\"></script><script type=\"text/javascript\" src=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/entry/js/entry.js\"></script><link href=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/imagesearch/css/imagesearch.css\" rel=\"stylesheet\"><script src=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/imagesearch/js/imagesearch.js\" type=\"text/javascript\"></script><link href=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/rankboard/css/rankboard.css\" rel=\"stylesheet\"><script src=\"//astyle-src.alicdn.com/app/searchweb/products/zhaohuoshenqi/rankboard/js/rankboard.js\" type=\"text/javascript\"></script><div class=\"zh-rankboard-toolbar\"><a href=\"//shen.1688.com/?navType=rankboard&amp;tab=zhaohuoshenqi\" target=\"_blank\" class=\"toolbar-logo rect\"></a><span class=\"icon-right\"></span><div class=\"toolbar-rankboard\" style=\"display: none;\"><a class=\"tool-rank\" href=\"//shen.1688.com/?navType=rankboard&amp;tab=zhaohuoshenqi\" target=\"_blank\"><span class=\"icon rank-icon\">\uE607</span><span class=\"rank-txt\">热销榜单</span><span class=\"rank-txt-hover\">查看榜单</span></a><span class=\"icon-left\"></span></div></div><span id=\"dzt-installed\" version=\"1.0.1.5\"></span></body>";
            TestMain.captureHtml(buf,2);
            //TestMain.captureHtml("www.baidu.com",1);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     *
     * @param addrOrStr
     * @param i  //1=按网址读取 2=按字符串读取
     * @throws Exception
     */
    public static void captureHtml(String addrOrStr,int i) throws Exception {
        String buf="";
        if(i == 1){
            URL url = new URL(addrOrStr);
            HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
            InputStreamReader input = new InputStreamReader(httpConn
                    .getInputStream(), "utf-8");
            BufferedReader bufReader = new BufferedReader(input);
            String line = "";
            StringBuilder contentBuf = new StringBuilder();
            while ((line = bufReader.readLine()) != null) {
                contentBuf.append(line);
            }
            buf = contentBuf.toString();
        }else{
            buf = addrOrStr;
        }

        //现在暂时运用正则表达式对数据进行抽取提取
        String[] info = buf.split("问</div> <div data-v-3543fb8e=\"\" class=\"title text\">");
        List<Product> list = new ArrayList<Product>();
        for (String s : info) {
            if(index!=0){
                Product p = null;
                p = new Product();
                endIx=s.indexOf("</div></div> <div data-v-72a4ccd1") ;
                String result = s.substring(0, endIx);

                //设置问的数据
                p.setProAmount(result);
                //设置答的数据
                p.setProStyle("");
                list.add(p);
            }
            index++;
        }
        for (Product item : list) {
            System.out.println(item.toString());
        }
        //ExcelUtils.myWrite(list);
    }
}
public class Product {
        private String proAmount;//问
        private String proStyle;//答
    
        public String getProStyle() {
            return proStyle;
        }
        public void setProStyle(String proStyle) {
            this.proStyle = proStyle;
        }

        public String getProAmount() {
            return proAmount;
        }
        public void setProAmount(String proAmount) {
            this.proAmount = proAmount;
        }

        public Product() {

        }
        @Override
        public String toString() {
            return "买家问:" + proAmount
                    + ", 卖家答:" + proStyle;
        }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值