百度自然语言接口调用

最新推荐文章于 2022-09-29 16:26:47 发布

风zi

最新推荐文章于 2022-09-29 16:26:47 发布

阅读量3.9k

点赞数 2

分类专栏：中文解析工具调用文章标签：中文分词

本文链接：https://blog.csdn.net/qq_35241080/article/details/82999551

版权

中文解析工具调用专栏收录该内容

4 篇文章 0 订阅

订阅专栏

java调用百度自然语言中文分词接口

一、应用创建

百度自然语言提供词法分析：提供分词、词性标注、命名实体识别三大功能，等一系列其他功能；

下面记录一下百度自然语言接口调用：

1.首先需要登陆自己百度云账号进入百度云管理中心

2.由于百度自然语言调用根据自己需求创建一个应用，根据此应用的key来调用接口
在这里插入图片描述

点击创建应用，进入应用创建界面，填写必选项选择需要调用的服务，我这里用到了分词因此选择自然语言处理的默认接口，输入完成点击立即创建。

回到概览页面发现以建应用，如上图显示的已建应用1个，点击进入管理应用

在这里插入图片描述

上面是我之前创建的应用主要是：APIKey、Secret Key，因为需要这两个值来创建接口调用令牌，再根据令牌来调用接口，好了下面开始创建个人令牌吧。

二、接口调用

官方文档地址：http://ai.baidu.com/docs#/NLP-API/top

查看官方API文档会发现调用接口需要个人令牌

令牌创建：http://ai.baidu.com/docs#/Auth/top

1.创建令牌，官方代码记录，需要修改自己应用分配的API Key、Secret Key

package com.zhjy.yyfx;

import org.json.JSONObject;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import java.util.Map;

/**
 * 获取token类
 */
public class AuthService {
	public static void main(String[] args) {
		String auth = getAuth();
		System.out.println(auth);
		//24.b89362e9c8f8218417975271405b8e00.2592000.1541730494.xxxxxxxx	这是我获取的令牌
	}
//说明： 方式一鉴权使用的Access_token必须通过API Key和Secret Key获取。
    /**
     * 获取权限token
     * @return 返回示例：
     * {
     * "access_token": "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567",
     * "expires_in": 2592000
     * }
     */
    public static String getAuth() {
        // 官网获取的 API Key 更新为你注册的
        String clientId = "FkmuhNDy2an1Ki5Q9kxxxxx";
        // 官网获取的 Secret Key 更新为你注册的
        String clientSecret = "K1vXp5iviNNGxLv15MMdIiKixxxxx";
        return getAuth(clientId, clientSecret);
    }

    /**
     * 获取API访问token
     * 该token有一定的有效期，需要自行管理，当失效时需重新获取.
     * @param ak - 百度云官网获取的 API Key
     * @param sk - 百度云官网获取的 Securet Key
     * @return assess_token 示例：
     * "24.460da4889caad24cccdb1fea17221975.2592000.1491995545.282335-1234567"
     */
    public static String getAuth(String ak, String sk) {
        // 获取token地址
        String authHost = "https://aip.baidubce.com/oauth/2.0/token?";
        String getAccessTokenUrl = authHost
                // 1. grant_type为固定参数
                + "grant_type=client_credentials"
                // 2. 官网获取的 API Key
                + "&client_id=" + ak
                // 3. 官网获取的 Secret Key
                + "&client_secret=" + sk;
        try {
            URL realUrl = new URL(getAccessTokenUrl);
            // 打开和URL之间的连接
            HttpURLConnection connection = (HttpURLConnection) realUrl.openConnection();
            connection.setRequestMethod("GET");
            connection.connect();
//            realUrl.
            // 获取所有响应头字段
            Map<String, List<String>> map = connection.getHeaderFields();
            // 遍历所有的响应头字段
            for (String key : map.keySet()) {
                System.err.println(key + "--->" + map.get(key));
            }
            // 定义 BufferedReader输入流来读取URL的响应
            BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
            String result = "";
            String line;
            while ((line = in.readLine()) != null) {
                result += line;
            }
            /**
             * 返回结果示例
             */
            System.err.println("result:" + result);
            JSONObject jsonObject = new JSONObject(result);
            String access_token = jsonObject.getString("access_token");
            return access_token;
        } catch (Exception e) {
            System.err.printf("获取token失败！");
            e.printStackTrace(System.err);
        }
        return null;
    }

}

2.根据生成的令牌调用词法分析接口

（通用版）词法分析接口：向用户提供分词、词性标注、专名识别三大功能；能够识别出文本串中的基本词汇（分词），对这些词汇进行重组、标注组合后词汇的词性，并进一步识别出命名实体。（定制版）词法分析接口：向用户提供分词、词性标注、专名识别三大功能；用户在控制台中进行个性化配置，支持自定义词表与规则，通过定制版可有效识别应用场景中的小众词汇与类别。定制版接口的使用教程请看链接：http://ai.baidu.com/forum/topic/show/496975

（通用版）请求URL: https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer
（定制版）请求URL: https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom
URL参数：access_token=获取的token令牌值

请求参数：
text：（string）待分析文本（目前仅支持GBK编码），长度不超过20000字节

代码调用：亲测可用

需要注意：1.请求text参数需要是GBK格式；2.接口传回结果的乱码处理

package com.zhjy.yyfx;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;

import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;


public class HttpClienUtil {

    private static HttpClient httpClient = null;  
    private static HttpPost method = null;  
    private static HttpResponse response = null;
    private int status = 0;  
    
	 public static String httpPostMethod(String url,String data){
	    	String licenseStr="";
       //XmzhUtil：本人字符串非空判断工具类去掉即可
	    	if(!XmzhUtil.isNull(url) && !XmzhUtil.isNull(data)){
	    		System.out.println("url==>"+url);
	    		System.out.println("data==>"+data);
	    		try {
	    		 //1.建立httpclient
	    		 httpClient = new DefaultHttpClient();
	    		 //2.根据url建立请求方式，设置请求方式，添加发送参数
	    		 method = new HttpPost(url);
	    		 method.addHeader("Content-type","application/json; charset=utf-8");  
	             method.setHeader("Accept", "application/json");  
	             //设置参数，官方给出必须是GBK编码格式
	             method.setEntity(new StringEntity(data, Charset.forName("GBK")));
	             //3.发送请求，得到响应
				 response = httpClient.execute(method);
				 //4.获取响应码，判断是否请求成功
			     int statusCode = response.getStatusLine().getStatusCode();  
		         if (statusCode == HttpStatus.SC_OK) {  
		        	 //5.获取响应信息
		            	 licenseStr = EntityUtils.toString(response.getEntity());  
		             }  
				}  catch (IOException e) {
					e.printStackTrace();
					System.out.println("http调用失败");
				} 
	    	}
	    	return licenseStr;
	    }
	 
/*	    public int getStatus() {  
	        return status;  
	    }  
	    public void setStatus(int status) {  
	        this.status = status;  
	    }  */
	    
	    public static void main(String[] args) throws UnsupportedEncodingException {
	    	//（通用版）请求URL: https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer
	    	String url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer?access_token=24.b89362e9c8f8218417975271405b8e00.2592000.1541730494.282335-14384615";
	    	//（定制版）请求URL: https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom
//	    	String url = "https://aip.baidubce.com/rpc/2.0/nlp/v1/lexer_custom?access_token=24.b89362e9c8f8218417975271405b8e00.2592000.1541730494.282335-14384615";
	    	//待解析文本
	    	String data ="{\"text\": \"百度是一家高科技公司\"}";
	    	
	    	String httpPostMethod = httpPostMethod(url,data);
          //本地接受结果乱码，根据自己需求修改编码格式
	    	System.out.println(toUTF8(httpPostMethod));
		}
	    
	    
	    /**
	     * 将字符串的编码格式转换为utf-8
	     * 
	     * @param str
	     * @return Name = new
	     * String(Name.getBytes("ISO-8859-1"), "utf-8");
	     */
	    public static String toUTF8(String str) {
	    	if (isEmpty(str)) {
	    		return "";
	    	}
	    	try {
	    		if (str.equals(new String(str.getBytes("GB2312"), "GB2312"))) {
	    			str = new String(str.getBytes("GB2312"), "utf-8");
	    			return str;
	    		}
	    	} catch (Exception exception) {
	    	}
	    	try {
	    		if (str.equals(new String(str.getBytes("ISO-8859-1"), "ISO-8859-1"))) {
	    			str = new String(str.getBytes("ISO-8859-1"), "GBK");
	    			return str;
	    		}
	    	} catch (Exception exception1) {
	    	}
	    	try {
	    		if (str.equals(new String(str.getBytes("GBK"), "GBK"))) {
	    			str = new String(str.getBytes("GBK"), "utf-8");
	    			return str;
	    		}
	    	} catch (Exception exception3) {
	    	}
	    	return str;
	    }
	     
	    /**
	     * 判断是否为空
	     * 
	     * @param str
	     * @return
	     */
	    public static boolean isEmpty(String str) {
	    	// 如果字符串不为null，去除空格后值不与空字符串相等的话，证明字符串有实质性的内容
	    	if (str != null && !str.trim().isEmpty()) {
	    		return false;// 不为空
	    	}
	    	return true;// 为空
	    }
}