搜索引擎接口测试-大数据统计测试

搜索重要指标:召回率、准确率。

核心指标:relevance相关性、freshness时效性、quality质量、ctr点击率、confidence权威度、cold_start冷启动。最终的排序依赖这几个字段计算分数

 

 

业务接口返回6000-9000个字段

搜索接口查了7个搜索链路

PM需求:

选取某一天人物的query,标识出同时出type=12及type=3的query,计算同时出现的概率,另外把同时出的query给出来,分析使用

query选取:人物top1000 、随机1000


引擎接口传参加上调试信息cmd=xx后的接口返回43846个字段

 

引擎接口不加调试信息cmd=xx的接口返回23944个字段

 

如果从业务接口去请求,不可,因为业务接口过滤了引擎返回的大量字段

所以只能测引擎接口

难点:上万个返回字段里如何取到这个“特殊的字段”,查了很多个query,看json格式吐了,在json在线解析直接无响应。。。

最终:取resultList这个jsonArray下的jsonObject的元素即可

 


QA测试设计:

数据源准备:去FBI捞取或去ODPS读取top1000个人物query、任意1000个人物query

读取引擎接口(对内),判断接口返回,取标志性字段(type类型),for循环遍历JSONObject下的value。分支判断,组合各种场景实现需求要的数据统计

 


发现开发代码的bug:引擎接口召回不稳定,同一个query请求2次,时而返回3 和12类型,时而仅返回12类型

 

写给自己的bug:跑完数据才发现,召回比率应该改为百分比

 

修改后:

 



代码实现:

模块划分:

 

1. HTTPCommonMethod为拼接http请求的工具类

package com.xx.searchRecall.utils;

import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.util.EncodingUtil;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class HTTPCommonMethod {

    /**
     * get 请求,只需将变动的参数传入params中即可
     *
     * @param url_pre
     * @param params
     * @return
     */
    public static String requestURL;

    public static String doGet(String url_pre, Map<String, String> params, int count) {
        try {
            Header header = new Header("Content-type", "application/json");
            String response = "";

            // HttpClient是Apache Jakarta Common下的子项目,用来提供高效的、最新的、功能丰富的支持HTTP协议的客户端编程工具包,并且它支持HTTP协议最新的版本和建议。
            // HttpClient已经应用在很多的项目中,比如Apache Jakarta上很著名的另外两个开源项目Cactus和HTMLUnit都使用了HttpClient。
            // 使用HttpClient发送请求、接收响应
            HttpClient httpClient = new HttpClient();

            if (url_pre != null) {
                // NameValuePair是简单名称值对节点类型。多用于Java像url_pre发送Post请求。在发送post请求时用该list来存放参数
//                getParamsList(url_online, params, count);
                // 预发环境value替换线上环境value
                List<NameValuePair> qparams_pre = getParamsList_pre(params);
                if (qparams_pre != null && qparams_pre.size() > 0) {
                    String formatParams = EncodingUtil.formUrlEncode(qparams_pre.toArray(new NameValuePair[qparams_pre.size()]),
                            "utf-8");
                    url_pre = url_pre.indexOf("?") < 0 ? url_pre + "?" + formatParams : url_pre + "&" + formatParams;

                }
                requestURL = url_pre;

//                System.out.println("第【" + count + "】条日志,预发环境pre请求的url_pre==" + url_pre);
                GetMethod getMethod = new GetMethod(url_pre);
                getMethod.addRequestHeader(header);
            /*if (null != headers) {
                Iterator var8 = headers.entrySet().iterator();

                while (var8.hasNext()) {
                    Map.Entry<String, String> entry = (Map.Entry)var8.next();
                    getMethod.addRequestHeader((String)entry.getKey(), (String)entry.getValue());
                }
            }*/
                //System.out.println(getMethod.getRequestHeader("User-Agent"));

                int statusCode = httpClient.executeMethod(getMethod);
                // 如果请求失败则打印出失败的返回码
                if (statusCode != 200) {
                    System.out.println("第" + statusCode + "【" + count + "】条日志,预发环境请求出错,错误码为=======" + statusCode);
                    return response;
                }
                response = new String(getMethod.getResponseBody(), "utf-8");

            }
            return response;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;

    }


    // 参数格式化
    private static List<NameValuePair> getParamsList_pre(Map<String, String> paramsMap) {
        if (paramsMap != null && paramsMap.size() != 0) {
            List<NameValuePair> params = new ArrayList();
            Iterator var2 = paramsMap.entrySet().iterator();

            while (var2.hasNext()) {
                Map.Entry<String, String> map = (Map.Entry) var2.next();

                // 预发环境最新版本日志回放,请求参数打开以下if else,注释掉最后一行

                    // 参数格式化,commons-httpclient自带的方法NameValuePair会自动将==转为=,还有特殊符号格式化
                    // NameValuePair是简单名称值对节点类型。多用于Java像url_pre发送Post请求。在发送post请求时用该list来存放参数
                    params.add(new NameValuePair(map.getKey() + "", map.getValue() + ""));

//                params.add(new NameValuePair(map.getKey() + "", map.getValue() + ""));
            }

            return params;
        } else {
            return null;
        }
    }
}


2. OdpsUtil为连接数据库的工具类

package com.xx.searchRecall.utils;

import com.aliyun.odps.Instance;
import com.aliyun.odps.Odps;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.account.Account;
import com.aliyun.odps.account.AliyunAccount;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.task.SQLTask;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class OdpsUtil {
    // 以下为https://xx.dw.xx-inc.com/ 点击用户的头像,获取如下连接开发环境数据库的参数

    private static String accessId = "xx";
    private static String accessKey = "xx";

    private static String odpsUrl = "http://xx.com/api";
    // xx为odps的dev环境(测试环境),线上为xx
//    private static String project = "xx";
    private static String project = "xx";

    public static List<Record> getSQLResult(String sql){
        Account account = new AliyunAccount(accessId, accessKey);
        Odps odps =new Odps(account);
        odps.setEndpoint(odpsUrl);
        odps.setDefaultProject(project);
        Instance i;
        List<Record> records = new ArrayList<>();
        try {
            i = SQLTask.run(odps, sql);
            i.waitForSuccess();
            records = SQLTask.getResult(i);
        } catch (OdpsException e) {
            e.printStackTrace();
        }

        return records;
    }

    public static List<Record> getSQLResult(String sql,String accessSelfId,String accessSelfKey){
        Account account = new AliyunAccount(accessSelfId, accessSelfKey);
        Odps odps =new Odps(account);
        odps.setEndpoint(odpsUrl);
        odps.setDefaultProject(project);
        Instance i;
        List<Record> records = new ArrayList<>();
        try {
            i = SQLTask.run(odps, sql);
            i.waitForSuccess();
            records = SQLTask.getResult(i);
        } catch (OdpsException e) {
            e.printStackTrace();
        }

        return records;
    }
    public static List<String> record2wordList(List<Record> list)
    {
        List<String> listFile = new ArrayList<>();
        if(list !=null && list.size()>0)
        {
            Iterator iterator=list.iterator();
            while (iterator.hasNext())
            {
                Record record= (Record) iterator.next();
                String keyWord=record.getString(0);
                listFile.add(keyWord);
            }
        }

        return listFile;

    }
}


3. RunProcess为程序入口方法,传入要查询的SQL,调用odps工具类按行读取数据源(top1000的query)

package com.xx.searchRecall;

import com.xx.searchRecall.utils.OdpsUtil;
import com.xx.searchRecall.utils.TimeTransfer;
import com.xx.searchRecall.utils.logOnlineReadODPS;
import com.xx.odps.data.Record;

import java.text.SimpleDateFormat;
import java.util.List;


public class RunProcess {
    private static String accessId = "xx";
    private static String accessKey = "xx";


    public static void main(String[] args) {
        // 运行程序(读取新的log文件)之前,清空旧文件(上次的log日志信息)
        String currentDay = TimeTransfer.getCurrentTime();

        // top1000个人物卡
//       xx

        // 随机1000个人物卡,SQL查询条件不一致
        String sql="SELECT t0t.query AS f1 FROM(  \n" +
                "\n" +
                "SELECT ftbl_1t.type AS type\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.ctr AS ctr\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.vv AS vv\n" +
                "  , ftbl_1t.ts AS ts\n" +
                "  , ftbl_1t.xx AS xx\n" +
                "  , ftbl_1t.query AS query\n" +
                "  , ftbl_1t.ds AS ds\n" +
                "FROM xx.xx ftbl_1t\n" +
                "\n" +
                "\n" +
                "  )t0t WHERE ((t0t.ds  >=  '20200419')  AND(t0t.ds  <  '20200519'))  AND(t0t.type  =  '人物卡片') ORDER BY TO_DATE(t0t.ds,'yyyymmdd')  DESC LIMIT 1000;";
        // 定义日期时间格式,DateFormat 类的子类——SimpleDateFormat。SimpleDateFormat使得可以选择任何用户定义的日期/时间格式的模式
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH:mm:ss");

        long startTime = System.currentTimeMillis();

        // 当前时间默认为毫秒,直接转换为年月日时分秒
        String startTimeRun = dateFormat.format(startTime);
        System.out.println("===============查询客户端传参日志SQL开始执行了,startTimeRun为=================【" + startTimeRun + "】");

//        System.out.println(sql);
        List<Record> list = OdpsUtil.getSQLResult(sql, accessId, accessKey);
//        System.out.println("list==" + list);


        logOnlineReadODPS.startSearch(list);


        long endTime = System.currentTimeMillis();
        // 当前时间默认为毫秒,直接转换为年月日时分秒
        String endTimeRun = dateFormat.format(endTime);
        System.out.println("==========读取odps当前日期的传参日志完毕,endTimeRun为===========【" + endTimeRun + "】");
        long ReadTime = (endTime - startTime) / 1000;
        System.out.println("==========从连接到读取数据库日志的时长,ReadTime为===========【" + ReadTime + "】秒");


        System.err.println("list.size=" + list.size());

    }

}

 

4. ReadFiles为读取本地数据方法(百度的一段代码),本地文件格式--以=","拆分

赵露思,周星驰,陈芊芊,林正英,迪丽热巴,杨烁,刘德华,吴亦凡
package com.alibaba.searchRecall.utils;

import java.io.*;
import java.util.Arrays;

public class ReadFiles {

    public static String[] readTxt(String filePath) {
        StringBuilder builder = new StringBuilder();
        try {
            File file = new File(filePath);
            if (file.isFile() && file.exists()) {
                InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "utf-8");
                BufferedReader br = new BufferedReader(isr);
                String lineTxt = null;
                int num = 0;
                long time1 = System.currentTimeMillis();
                while ((lineTxt = br.readLine()) != null) {
                    System.out.println(lineTxt);
                    builder.append(lineTxt);
                    builder.append(",");
                    num++;
//                    System.out.println("总共" + num + "条数据!");
                }
                //System.out.println("总共"+num+"条数据!");
                long time2 = System.currentTimeMillis();
                long time = time1 - time2;
//                System.out.println("共花费" + time + "秒");
                br.close();
            } else {
                System.out.println("文件不存在!");
            }
        } catch (Exception e) {
            System.out.println("文件读取错误!");
        }
        String[] strings = builder.toString().split(",");
        return strings;
    }

    public static void main(String[] args) {
        String filePath = "/Users/xx/searchRecall/utils/person.txt";
        System.out.println(filePath);
        String[] strings = readTxt(filePath);
        System.out.println("strings:"+Arrays.toString(strings));
    }
}

 

5. logOnlineReadODPS为从数据库类取到源数据后请求接口,接口返回解析

package com.xx.searchRecall.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.odps.data.Record;

import java.util.*;


public class logOnlineReadODPS {

    public static void main(String[] args) {
//        startSearch();
    }

    // 定义集合,把搜索词query放到list集合
    public static List<String> list = new ArrayList<>();

    public static void startSearch(List<Record> list) {

        int only3 = 0;
        String query3 = "";

        int only12 = 0;
        String query12 = "";

        int both3_12 = 0;
        String query3_12 = "";

        int no3_12 = 0;
        String queryNO3_12 = "";

        int totalCount = 0;

        for (int i = 0; i < list.size(); i++) {
            // 获取单条SQL的查询字段内容
            Record record = list.get(i);
            String keywords = record.getString("f1");

            Map<String, String> query = new HashMap<>();
            query.put("keyword", keywords);
            // 如果URL没有公共参数,则把 ?去掉;
            // 业务接口传参增加cmd=4拿到引擎字段返回
            String url_pre = "http://xx/query?noqc=0&xx=xx&pg=1&nocache=1&sdkver=xx";

            // 开始请求,域名、接口名==url+请求参数param(hashMap)
            //        String response = HTTPCommonMethod.doGet(url_pre, url_online, map, count);
            System.out.println("第" + (i + 1) + "条数据==" + query);
            String response = HTTPCommonMethod.doGet(url_pre, query, i);

            JSONObject responseJson = JSONObject.parseObject(response);

            int type = responseToParse(i, keywords, responseJson);

            if (type == 1) {
                only3++;
                query3 = query3 + keywords + ",";
            } else if (type == 2) {
                only12++;
                query12 = query12 + keywords + ",";

            } else if (type == 3) {
                both3_12++;
                query3_12 = query3_12 + keywords + ",";

            } else {
                no3_12++;
                queryNO3_12 = queryNO3_12 + keywords + ",";

            }

            // 打印接口返回的数据
            totalCount = i + 1;
        }

        System.out.println("totalCount==" + totalCount);

        String rate3 = ((float) only3 / (float) totalCount) * 100 + "%";
        String rate12 = ((float) only12 / (float) totalCount) * 100 + "%";
        String rate3_12 = ((float) both3_12 / (float) totalCount) * 100 + "%";
        String rateNO3_12 = ((float) no3_12 / (float) totalCount) * 100 + "%";

        System.out.println("------------------------------------------------------------------------------------------------");
        System.out.println("------------------------------------------------------------------------------------------------g");
        System.out.println("only3---只召回自频道==【" + only3 + "】---比率为==【" + rate3 + "】---query3==【" + query3 + "】");
        System.out.println("only12---只召回人物==【" + only12 + "】---比率为==【" + rate12 + "】---query12==【" + query12 + "】");
        System.out.println("both3-12---同时召回自频道和人物==【" + both3_12 + "】---比率为==【" + rate3_12 + "】---query3_12==【" + query3_12 + "】");
        System.out.println("no3-12---均未召回自频道和人物==【" + no3_12 + "】---比率为==【" + rateNO3_12 + "】---queryNO3_12==【" + queryNO3_12 + "】");

    }


    /**
     * @param count
     * @param query
     * @param response
     * @return 1:只包含自频道类型3   2:只包含人物类型12   3:既包含频道类型3且包含人物类型12   0:既不包含频道类型3且不包含人物类型12
     */

    public static int responseToParse(int count, String query, JSONObject response) {
        try {

//            HashMap<Integer, Integer> hm = new HashMap<Integer, Integer>();
            boolean docSource3 = false;
            boolean docSource12 = false;
            boolean docSource3_12 = false;
            if (!response.isEmpty()) {
                // 获取JSONArray
                JSONArray jsonArray = response.getJSONArray("resultList");
                // for循环遍历JSONObject
                for (int i = 0; i < jsonArray.size(); i++) {
                    JSONObject jsonObject = jsonArray.getJSONObject(i);
                    // 获取key对应value的取值getInteger("key")
                    int doc_source = jsonObject.getInteger("doc_source");

                    if (doc_source == 3) {
                        docSource3 = true;
                    } else if (doc_source == 12) {
                        docSource12 = true;
                    } else if (docSource3 && docSource12) {
                        docSource3_12 = true;
                    }
//                    System.out.println("第【" + count + "】条日志,搜索query为==【" + query + "】,doc_source==【" + doc_source + "】");

                    /*if (hm.containsKey(doc_source)) {
                        int value = hm.get(doc_source);
                        if (String.valueOf(value) != null && value != 0) {
                            hm.put(doc_source, value);
                        } else {
                            hm.put(doc_source, 1);
                        }
//                    System.out.println("value==" +  hm.get(doc_source));
                        System.out.println("hm=" + hm);

                    }*/
                }

                if (docSource3 && !docSource12) {
                    return 1;
                } else if (!docSource3 && docSource12) {
                    return 2;
                } else if (docSource3 && docSource12) {
                    return 3;
                }

            } else {
                System.err.println("第【" + count + "】条日志,搜索query为==【" + query + "】,接口返回为空");
            }


        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0;
    }

    public static JSONObject jsonObject = new JSONObject();

}

 

6. logOnlineReadFiles为读取本地数据源,请求接口,接口返回解析

package com.alibaba.searchRecall.utils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

import java.util.*;

import static com.alibaba.searchRecall.utils.ReadFiles.readTxt;

public class logOnlineReadFiles {

    public static void main(String[] args) {
        startSearch();
    }

    // 定义集合,把搜索场景放到list集合
    public static List<String> list = new ArrayList<>();

    public static void startSearch() {

        // 拼接的传参参数为中文,需要把中文放到map
        // 方法1:把待测试的query top排行前1000在odps查询出,存到本地,再通过接口拼接
        String filePath = "/Users/lishan/Desktop/xx/xx/src/main/java/com/xx/searchRecall/person.txt";
        System.out.println(filePath);
        String[] keywords = readTxt(filePath);
        System.out.println("strings:" + Arrays.toString(keywords));
//        String keywords=record.getString("f1");

        // 方法2:代码读取odps工具类,查询top1000的query,再通过接口拼接
        // 见logOnlineReadODPS

//        String[]  keywords={"吴亦凡","杨幂","唐嫣"};
//        String[] keywords = {"吴亦凡"};
        int only3 = 0;
        String query3 = "";

        int only12 = 0;
        String query12 = "";

        int both3_12 = 0;
        String query3_12 = "";

        int no3_12 = 0;
        String queryNO3_12 = "";

        int totalCount = 0;

        for (int i = 0; i < keywords.length; i++) {

            Map<String, String> query = new HashMap<>();
            query.put("keyword", keywords[i]);
            // 如果URL没有公共参数,则把 ?去掉;
            // 业务接口传参增加cmd=4拿到引擎字段返回
            String url_pre = "http://xx/query?noqc=0&pg=1&nocache=1&xx=308";

            // 开始请求,域名、接口名==url+请求参数param(hashMap)
            //        String response = HTTPCommonMethod.doGet(url_pre, url_online, map, count);
            System.out.println("第" + (i + 1) + "条数据==" + query);
            String response = HTTPCommonMethod.doGet(url_pre, query, i);

            JSONObject responseJson = JSONObject.parseObject(response);

            int type = responseToParse(i, keywords[i], responseJson);

            if (type == 1) {
                only3++;
                query3 = query3 + keywords[i] + ",";
            } else if (type == 2) {
                only12++;
                query12 = query12 + keywords[i] + ",";

            } else if (type == 3) {
                both3_12++;
                query3_12 = query3_12 + keywords[i] + ",";

            } else {
                no3_12++;
                queryNO3_12 = queryNO3_12 + keywords[i] + ",";

            }

            // 打印接口返回的数据
//            System.out.println("第【" + i + "】条日志,预发环境pre接口返回response为=======" + response);
            totalCount = i + 1;
//            System.out.println("每次循环的totalCount=="+totalCount);

        }

        System.out.println("totalCount==" + totalCount);

        float rate3 = (float) only3 / (float) totalCount;
        float rate12 = (float) only12 / (float) totalCount;
        float rate3_12 = (float) both3_12 / (float) totalCount;
        float rateNO3_12 = (float) no3_12 / (float) totalCount;

        System.out.println("------------------------------------------------------------------------------------------------");
        System.out.println("------------------------------------------------------------------------------------------------g");
        System.out.println("only3---只召回自频道==【" + only3 + "】---比率为==【" + rate3 + "】---query3==【" + query3 + "】");
        System.out.println("only12---只召回人物==【" + only12 + "】---比率为==【" + rate12 + "】---query12==【" + query12 + "】");
        System.out.println("both3-12---同时召回自频道和人物==【" + both3_12 + "】---比率为==【" + rate3_12 + "】---query3_12==【" + query3_12 + "】");
        System.out.println("no3-12---均未召回自频道和人物==【" + no3_12 + "】---比率为==【" + rateNO3_12 + "】---queryNO3_12==【" + queryNO3_12 + "】");

    }


    /**
     * @param count
     * @param query
     * @param response
     * @return 1:只包含自频道类型3   2:只包含人物类型12   3:既包含频道类型3且包含人物类型12   0:既不包含频道类型3且不包含人物类型12
     */

    public static int responseToParse(int count, String query, JSONObject response) {
        try {

//            HashMap<Integer, Integer> hm = new HashMap<Integer, Integer>();
            boolean docSource3 = false;
            boolean docSource12 = false;
            boolean docSource3_12 = false;
            if (!response.isEmpty()) {
                // 获取JSONArray
                JSONArray jsonArray = response.getJSONArray("resultList");
                // for循环遍历JSONObject
                for (int i = 0; i < jsonArray.size(); i++) {
                    JSONObject jsonObject = jsonArray.getJSONObject(i);
                    // 获取key对应value的取值getInteger("key")
                    int doc_source = jsonObject.getInteger("doc_source");

                    if (doc_source == 3) {
                        docSource3 = true;
                    } else if (doc_source == 12) {
                        docSource12 = true;
                    } else if (docSource3 && docSource12) {
                        docSource3_12 = true;
                    }
//                    System.out.println("第【" + count + "】条日志,搜索query为==【" + query + "】,doc_source==【" + doc_source + "】");

                    /*if (hm.containsKey(doc_source)) {
                        int value = hm.get(doc_source);
                        if (String.valueOf(value) != null && value != 0) {
                            hm.put(doc_source, value);
                        } else {
                            hm.put(doc_source, 1);
                        }
//                    System.out.println("value==" +  hm.get(doc_source));
                        System.out.println("hm=" + hm);

                    }*/
                }

                if (docSource3 && !docSource12) {
                    return 1;
                } else if (!docSource3 && docSource12) {
                    return 2;
                } else if (docSource3 && docSource12) {
                    return 3;
                }

            } else {
                System.err.println("第【" + count + "】条日志,搜索query为==【" + query + "】,接口返回为空");
            }


        } catch (Exception e) {
            e.printStackTrace();
        }
        return 0;
    }

    public static JSONObject jsonObject = new JSONObject();

}

 

最终提供给pm的样子 

。。。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

东方狱兔

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值