Http爬取第三方接口,特此记录

1. Http方法,内置内部类,监测方法执行时间

    public static String httpPost(String word,String host,String port) {
        //添加内部类,设置任务等待时间,若方法执行时间超过10秒,则返回null给调用方法
        final ExecutorService exec = Executors.newFixedThreadPool(1);
        Callable<String> call = new Callable<String>() {
            public String call() {
                List<String> userAgent = new ArrayList<>();
                //添加user-agent伪装请求头
                userAgent.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");
                userAgent.add("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1");
                userAgent.add("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362");
                userAgent.add("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36");
                userAgent.add("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:65.0) Gecko/20100101 Firefox/65.0");
                userAgent.add("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15");
                userAgent.add("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
                userAgent.add("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763");
                userAgent.add("Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/31.0.1650.18 Mobile/11B554a Safari/8536.25 ");
                userAgent.add("Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4");
                userAgent.add("Mozilla/5.0 (Linux; Android 4.2.1; M040 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36");
                userAgent.add("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11");
                userAgent.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36");
                userAgent.add("Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5");
                // 设置代理IP
                Random random = new Random();
                int user_agent = random.nextInt(userAgent.size());
                System.getProperties().setProperty("proxySet", "true");
                System.getProperties().setProperty("proxyHost", host);
                System.getProperties().setProperty("proxyPort", port);
                System.out.println("所用ip为"+host+":"+port);
                Reader in = null;
                HttpURLConnection conn =null;
                URL url = null;
                try {
                    url = new URL("https://aidemo.youdao.com/trans");
                } catch (MalformedURLException e) {
                    System.out.println(e);
                    return null;
                }
                //post参数
                Map<String,Object> params = new LinkedHashMap<>();
                params.put("q",word);
                params.put("from", "auto");
                params.put("to", "auto");

                //开始访问
                StringBuilder postData = new StringBuilder();
                for (Map.Entry<String,Object> param : params.entrySet()) {
                    if (postData.length() != 0) postData.append('&');
                    try {
                        postData.append(URLEncoder.encode(param.getKey(), "UTF-8"));
                        postData.append('=');
                        postData.append(URLEncoder.encode(String.valueOf(param.getValue()), "UTF-8"));
                    } catch (UnsupportedEncodingException e) {
                        System.out.println(e);
                        return null;
                    }
                }
                //设置写入参数属性
                byte[] postDataBytes = null;
                try {
                    postDataBytes = postData.toString().getBytes("UTF-8");
                } catch (UnsupportedEncodingException e) {
                    System.out.println(e);
                    return null;
                }
                //设置连接属性
                try {
                    conn = (HttpURLConnection)url.openConnection();
                    conn.setRequestMethod("POST");
                } catch (IOException e) {
                    System.out.println(e);
                    conn.disconnect();
                    return null;
                }
                conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
                conn.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length));
                conn.setRequestProperty("User-Agent",userAgent.get(user_agent));
                System.out.println("所用USERAGENT为"+userAgent.get(user_agent));
                conn.setDoOutput(true);
                try {
                    //将post参数写入流中
                    conn.getOutputStream().write(postDataBytes);
                } catch (IOException e) {
                    System.out.println("写入流异常"+e);
                    conn.disconnect();
                    return null;
                }

                try {
                    //从流中读取数据
                    in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8"));
                    StringBuilder sb = new StringBuilder();
                    for (int c; (c = in.read()) >= 0;)
                        sb.append((char)c);
                        String response = sb.toString();
                        return response;
                } catch (IOException e) {
                    System.out.println("读取流异常"+e);
                    return null;
                }finally {
                    try {
                        if (conn !=null){
                            conn.disconnect();
                        }
                    }catch (Exception e){
                        return null;
                    }
                    try {
                        if (in!=null){
                            in.close();
                        }
                    }catch (Exception e){
                        return null;
                    }
                }
            }
        };
        try {
            Future<String> future = exec.submit(call);
            String obj = future.get(1000 * 10, TimeUnit.MILLISECONDS); //任务处理超时时间设为 1 秒
            // 关闭线程池
            exec.shutdown();
            return call.call();
        } catch (TimeoutException e) {
            System.out.println("处理超时啦...."+e);
            // 关闭线程池
            exec.shutdown();
            return null;
        } catch (Exception e) {
            System.out.println("处理失败."+e);
            // 关闭线程池
            exec.shutdown();
            return null;
        }
    }

2. 解析返回的JSON数据

public static Map parseJsonString(String jsonData) {
    Map<Integer,Commondic> commondicMap = new HashMap<>();
    //new CommodicWord 对象,取单词信息
    Commondic CommodicWord = new Commondic();
    Commondic commondic=null;
    Commondic commondic_t =null;
    JSONObject jsonObject = new JSONObject(jsonData);
    String query = null;
    try {
        //得到源单词
        query = jsonObject.get("query").toString();
    } catch (JSONException e) {
        System.out.println("query未发现"+e);
        return null;
    }
    //将源单词给bean
    CommodicWord.setVcVocabulary(query);
    //判断源词汇是否为单词,也可能为短语,ture为单词
    Object isWord = jsonObject.get("isWord");

    try{
        //得到词汇的网络释义
        //web字段可能不存在,为使程序呢继续运行,放入 try,catch块中
        JSONArray web = jsonObject.getJSONArray("web");
        for(int i=0;i<web.length();i++){
            if (i == 0){
                //i等于0为网络解释的第一组,一般为源词汇,下面做判断是不是源词汇
                String translationNet =null;
                //网络释义一般为数组且第一个为源单词释义
                JSONObject webOne = web.getJSONObject(0);
                //得到数组为0的value
                JSONArray webOneValue = webOne.getJSONArray("value");
                //得到数组为0的key
                String key = webOne.get("key").toString();
                //判断得到的网络释义的第一条key是否为源词汇,若是则插入原测绘的网络释义,若不是当多新的词条插入
                if (key.equals(query)|| key.equals(query.toUpperCase())||key.equals(query.toLowerCase())){
                    for(Object item : webOneValue){
                        translationNet = translationNet+","+item.toString();
                    }
                    //处理拼接好的字符串,将字符串中的“null,”替换掉
                    String dealAfterString=translationNet.replaceAll("null,", "").trim();
                    CommodicWord.setVcTranslationNet(dealAfterString);
                }else {
                    //判断得出非源词汇,则把该词条当新词插入
                    commondic =new Commondic();
                    for(Object item : webOneValue){
                        translationNet = translationNet+","+item.toString();
                    }
                    //处理拼接好的字符串,将字符串中的“null,”替换掉
                    String dealAfterString=translationNet.replaceAll("null,", "").trim();
                    commondic.setVcTranslation(dealAfterString);
                    commondic.setVcVocabulary(key);
                    commondic.setIsword(1);
                    commondicMap.put(web.length(), commondic);
                }
            }else {
                //非网络翻译的第一组,一般为词组,当做新词条插入
                commondic_t =new Commondic();
                JSONObject webOne = web.getJSONObject(i);
                JSONArray webOneValue = webOne.getJSONArray("value");
                Object key = webOne.get("key");
                String translation =null;
                for(Object item : webOneValue){
                    translation = translation+","+item.toString();
                }
                //处理拼接好的字符串,将字符串中的“,null”替换掉
                String dealAfterString=translation.replaceAll("null,", "").trim();
                commondic_t.setVcTranslation(dealAfterString);
                commondic_t.setVcVocabulary(key.toString());
                commondic_t.setIsword(1);
                commondicMap.put(i, commondic_t);
            }
        }
    }catch (Exception e){

    }
    //判断得到的词汇是为单词还是短语
    if (isWord.equals(true)){
        //设置词汇标签为0,0:单词,1:词组
        CommodicWord.setIsword(0);
        JSONObject baseic = jsonObject.getJSONObject("basic");
        //wfs 为时态变换 可能不存在 所以做异常
        try {
            JSONArray wfs = baseic.getJSONArray("wfs");
            String wfsString = null;
            for (int i =0;i<wfs.length();i++){
              wfsString= wfsString+"/"+ wfs.getJSONObject(i).getJSONObject("wf").get("name")+":"+wfs.getJSONObject(i).getJSONObject("wf").get("value");
            }
            String finalWfsString =wfsString.replaceAll("null/", "").trim();
            CommodicWord.setExchange(finalWfsString);
        } catch (JSONException e) {
            System.out.println("wfs未发现"+e);
        }
        //exam_type 可能不存在,捕捉异常
        try {
            String examTypeString =null;
            String explainString = null;
            //获取词汇标签,如 cet4,cet6,考研等等
            JSONArray exam_type =baseic.getJSONArray("exam_type");
            //获取词汇的准确解释
            JSONArray explainNeedDealArray = baseic.getJSONArray("explains");
            //处理词汇标签,让其成为字符串
            for (Object item : exam_type){
                examTypeString = examTypeString+"/"+item.toString();
            }
            String examTypeStringTrim = examTypeString.replaceAll("null/", "").trim();
            CommodicWord.setTag(examTypeStringTrim);
            //获取词汇音标
            CommodicWord.setVcSpeechUs("["+baseic.get("us-phonetic").toString()+"]");
            CommodicWord.setVcSpeechUk("["+baseic.get("uk-phonetic").toString()+"]");
            //处理准确解释,让其成为成为String字符串
            for (Object item: explainNeedDealArray){
                explainString = explainString+","+item.toString();
            }
            String explainDealAfter=explainString.replaceAll("null,", "").trim();
            CommodicWord.setVcTranslation(explainDealAfter);
        }catch (Exception e){
            //若有异常,只取词汇准确解释
            String exStringExplain =null;
            JSONArray explains = baseic.getJSONArray("explains");
            for (Object item :explains){
                exStringExplain = exStringExplain+","+item.toString();
            }
            String explainDealAfter = exStringExplain.replaceAll("null,", "").trim();
            CommodicWord.setVcTranslation(explainDealAfter);
        }

    }else if (isWord.equals(false)){
        String phaseTrans =null;
        JSONArray phaseTranslation=jsonObject.getJSONArray("translation");
        for (Object item : phaseTranslation){
            phaseTrans = phaseTrans+","+item.toString();
        }
        String tranDealAfter = phaseTrans.replaceAll("null,", "").trim();
        CommodicWord.setVcTranslation(tranDealAfter);
    }
    commondicMap.put(0, CommodicWord);
    return  commondicMap;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值