java中从高德地图爬取数据

    最近一个人负责公司的一个app项目开发,需要从高德地图爬取杭州市全部的超市信息,放入mongodb的数据库中。做地理位置查询。(mongodb这部分有时间补上)


   首先去高德地图创建一个开发者账号,获取一个开发web服务的高德key.这个是必须要有的,可以用我这个从百度到的key试一下。


 废话不说了直接上代码

package com.pingogo.visit.service;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.pingogo.api.common.HttpUtils;
import com.pingogo.visit.domain.Shop;
import jxl.Cell;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
//import org.apache.poi.ss.usermodel.Workbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by cw on 2017/8/29.
 *
 */
public class AddressLngLatExchange {
    private static final String KEY = "389880a06e3f893ea46036f030c94700";
    private static final String OUTPUT = "JSON";
    private static final String GET_LNG_LAT_URL = "http://restapi.amap.com/v3/geocode/geo";

    private static final String GET_LNG_PIO_URL = "http://restapi.amap.com/v3/place/polygon";

    private static final Logger LOGGER = LoggerFactory.getLogger(AddressLngLatExchange.class);


    //获取指定地点经纬度
    public static String[] getLngLatFromOneAddr(String address){
        if(StringUtils.isBlank(address)) {
            LOGGER.error("地址(" + address + ")为null或者空");
            return null;
        }
        Map<String, String> params = new HashMap<String, String>();
        params.put("address", address);
        params.put("output", OUTPUT);
        params.put("key", KEY);
        String result = HttpUtils.URLPost(GET_LNG_LAT_URL,params,"");
        JSONObject jsonObject = JSONObject.parseObject(result);
        String[] lngLatArr = new String[2];
        //拿到返回报文的status值,高德的该接口返回值有两个:0-请求失败,1-请求成功;
        int status = Integer.valueOf(jsonObject.getString("status"));
        if(status == 1) {
            JSONArray jsonArray = jsonObject.getJSONArray("geocodes");
            for(int i = 0; i < jsonArray.size(); i++) {
                JSONObject json = jsonArray.getJSONObject(i);
                String lngLat = json.getString("location");
                 lngLatArr = lngLat.split(",");
            }
        } else {
            String errorMsg = jsonObject.getString("info");
            LOGGER.error("地址(" + address + ")" + errorMsg);
        }
        return lngLatArr;
    }

    public static List<Shop> initialData(String lonLat, String keyword, List<Shop> shopListSon){
        if(StringUtils.isBlank(keyword)) {
            LOGGER.error("地址(" + keyword + ")为null或者空");
        }
        Map<String, String> params = new HashMap<String, String>();
        try {
            Thread.sleep(5000);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        params.put("polygon", lonLat);//"118.21,29.11;120.30,30.33"
        params.put("output", OUTPUT);
        params.put("keywords", keyword);
        params.put("offset", "20");
        params.put("page", "1");
        params.put("key", KEY);
        String result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
        JSONObject jsonObject = JSONObject.parseObject(result);
        int statusOne = Integer.valueOf(jsonObject.getString("status"));
        //第一次获取数据时做的判断
        if(statusOne==1){
            int count=Integer.valueOf(jsonObject.getString("count"));
            int pageNumber=count/20;
            int remainder=count%20;
            if(remainder>0)pageNumber=pageNumber+1;
            for(int i=1;i<=pageNumber;i++){
                params.put("page", String.valueOf(i));
                result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
                JSONObject jsonObject2 = JSONObject.parseObject(result);
                System.out.println("+++++++++"+result);
                //拿到返回报文的status值,高德的该接口返回值有两个:0-请求失败,1-请求成功;
                int status = Integer.valueOf(jsonObject2.getString("status"));
                if(status == 1) {
                    JSONArray jsonArray = jsonObject2.getJSONArray("pois");
                    if(jsonArray.size()>0){
                        for(int j =0;j<jsonArray.size();j++){
                            Shop shop =new Shop();
                            JSONObject jsonObject1 =jsonArray.getJSONObject(j);
                            shop.setShopName(jsonObject1.getString("name"));
                            shop.setSpecificAddress(jsonObject1.getString("address"));
                            shop.setId(jsonObject1.getString("id"));
                            String [] initLonLat =jsonObject1.getString("location").split(",");
                            shop.setLongitude(initLonLat[0]);
                            shop.setLatitude(initLonLat[1]);
                            shopListSon.add(shop);
                            //DBObject  doci = new BasicDBObject("shopId", "300"+i).append("shopName", "人生得意"+i).append("shopStatus",0).append("specificAddress","天堂"+i).append("gps", new Point(new Position(lon, lat)));

                        }
                    }

                } else {
                        String errorMsg = jsonObject.getString("info");
                        LOGGER.error("地址(" + keyword + ")" + errorMsg);
                }
            }

        }
        return shopListSon;

    }

    //从高德地图上取数据
//    public static void main(String[] args) {
//        List<Shop> listShop =new ArrayList<>();
//        //东经118°21′-120°30′,北纬29°11′-30°33′。杭州位置
//        for(double i=118.20;i<=120.31;i=i+0.1){
//            for(double j=29.10;j<=30.33;j=j+0.1){
//                List<Shop> listShopSon =new ArrayList<>();
//                double lonHead=i;
//                double latHead=j;
//                double lonTail=i+0.1;
//                double latTail=j+0.1;
//                String LonLat=lonHead+","+latHead+";"+lonTail+","+latTail;
//                listShopSon =initialData(LonLat,"便利店",listShopSon);
//                for(int n=0;n<listShopSon.size();n++){
//                    System.out.println("店铺地址:"+listShopSon.get(n).getSpecificAddress());
//                }
//                if(listShopSon.size()>0){
//                    listShop.addAll(listShopSon);
//                }
//                System.out.println("ListShop的大小:"+listShop.size());
//                double d =Distance(lonHead,latHead,lonTail,latTail);
//                System.out.println("两点距离"+d);
//
//            }
//
//        }
//
//        System.out.println("ListShop的大小:"+listShop.size());
//        creatExcel(listShop);
//    }

    public static void main(String[] args) {
        readFile("D:\\geode\\222.xls");
    }


    //写入excel中
    public static void creatExcel(List<Shop> shopList){
        HSSFWorkbook workbook = new HSSFWorkbook();
        //第二部,在workbook中创建一个sheet对应excel中的sheet
        HSSFSheet sheet = workbook.createSheet("高德地图数据");
        //第三部,在sheet表中添加表头第0行,老版本的poi对sheet的行列有限制
        HSSFRow row = sheet.createRow(0);
        //第四步,创建单元格,设置表头
        HSSFCell cell = row.createCell(0);
        cell.setCellValue("店铺id");
        cell = row.createCell(1);
        cell.setCellValue("店铺名称");
        cell = row.createCell(2);
        cell.setCellValue("店铺地址");
        cell = row.createCell(3);
        cell.setCellValue("经度");
        cell = row.createCell(4);
        cell.setCellValue("纬度");

        //第五步,写入实体数据,实际应用中这些数据从数据库得到,对象封装数据,集合包对象。对象的属性值对应表的每行的值
        for (int i = 0; i < shopList.size(); i++) {
            HSSFRow row1 = sheet.createRow(i + 1);
            Shop shop = shopList.get(i);
            //创建单元格设值
            row1.createCell(0).setCellValue(shop.getId());
            row1.createCell(1).setCellValue(shop.getShopName());
            row1.createCell(2).setCellValue(shop.getSpecificAddress());
            row1.createCell(3).setCellValue(shop.getLongitude());
            row1.createCell(4).setCellValue(shop.getLatitude());
        }

        //将文件保存到指定的位置
        try {
            FileOutputStream fos = new FileOutputStream("D:\\geode\\高德便利店地图数据.xls");
            workbook.write(fos);
            System.out.println("写入成功");
            fos.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static double Distance(double long1, double lat1, double long2, double lat2) {
        double a, b, R;
        R =6371; // 地球半径 6371km
        lat1 = lat1 * Math.PI / 180.0;
        lat2 = lat2 * Math.PI / 180.0;
        a = lat1 - lat2;
        b = (long1 - long2) * Math.PI / 180.0;
        double d;
        double sa2, sb2;
        sa2 = Math.sin(a / 2.0);
        sb2 = Math.sin(b / 2.0);
        d = 2
                * R
                * Math.asin(Math.sqrt(sa2 * sa2 + Math.cos(lat1)
                * Math.cos(lat2) * sb2 * sb2));
        BigDecimal bigDecimal = new BigDecimal(d*1000);
        Double din = bigDecimal.setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
        return din ;
    }

    public static List<Shop> readFile(String filename){
        List<Shop> shopList =new ArrayList<>();
        Workbook wb=null;
        Cell cell=null;
        try {
            File f=new File(filename);
            InputStream in=new FileInputStream(f);             //创建输入流
            wb= Workbook.getWorkbook(in);               //获取Excel文件对象
            jxl.Sheet s=wb.getSheet(0);                        //获取文件的指定工作表,默认为第一个
            String value=null;
            for(int i=1;i<s.getRows();i++){//表头目录不需要,从第一行开始
                Shop shop =new Shop();
                for(int j=0;j<s.getColumns();j++){
                    cell=s.getCell(j, i);
                    value=cell.getContents();
                    if(j==0){
                        shop.setId(value);
                    }else if(j==1){
                        shop.setShopName(value);
                    }else if(j==2){
                        shop.setSpecificAddress(value);
                    }else if(j==3){
                    shop.setLongitude(value);
                    }else if(j==4){
                    shop.setLatitude(value);
                    }
                  //  System.out.println("value:"+value);
                }
                shopList.add(shop);
            }

        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (BiffException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return shopList;
    }
}

由于高德地图对他的数据做了保护,我这边采用的是矩形搜索。 百度到杭州的经纬度划分成多个小矩形,然后调用高德地图的API服务。我这边将爬取的数据写入excel表格中,一是为了展示验证数据是否准确,二是怕直接写入会不会有内存泄漏问题。我上面的代码有写入excel和读取excel的代码。不过要注意一下 我用得jar包不同。写入用的poi,读取用的是jxl.


这边调用http请求是客户端,代码如下。是在网上找到的,首先谢谢分享的人。因为有段时间,原文地址忘记了。一开始用的是

URL myURL = null;
URLConnection httpsConn = null;
try {
    myURL = new URL(url);
} catch (MalformedURLException e) {
    e.printStackTrace();
}
InputStreamReader insr = null;
BufferedReader br = null;
httpsConn = (URLConnection) myURL.openConnection();// 不使用代理
然后在tomcat项目中调用时,报错了,原因现在还没有弄清楚,知道原因的告诉我一二。

package com.pingogo.api.common;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * HTTP工具类
 *
 * @author lixiangyang
 *
 */
public class HttpUtils {

    private static Log log = LogFactory.getLog(HttpUtils.class);

    /**
     * 定义编码格式 UTF-8
     */
    public static final String URL_PARAM_DECODECHARSET_UTF8 = "UTF-8";

    /**
     * 定义编码格式 GBK
     */
    public static final String URL_PARAM_DECODECHARSET_GBK = "GBK";

    private static final String URL_PARAM_CONNECT_FLAG = "&";

    private static final String EMPTY = "";

    private static MultiThreadedHttpConnectionManager connectionManager = null;

    private static int connectionTimeOut = 25000;

    private static int socketTimeOut = 25000;

    private static int maxConnectionPerHost = 20;

    private static int maxTotalConnections = 20;

    private static HttpClient client;

    static{
        connectionManager = new MultiThreadedHttpConnectionManager();
        connectionManager.getParams().setConnectionTimeout(connectionTimeOut);
        connectionManager.getParams().setSoTimeout(socketTimeOut);
        connectionManager.getParams().setDefaultMaxConnectionsPerHost(maxConnectionPerHost);
        connectionManager.getParams().setMaxTotalConnections(maxTotalConnections);
        client = new HttpClient(connectionManager);
    }

    /**
     * POST方式提交数据
     * @param url
     *          待请求的URL
     * @param params
     *          要提交的数据
     * @param enc
     *          编码
     * @return
     *          响应结果
     * @throws IOException
     *          IO异常
     */
    public static String URLPost(String url, Map<String, String> params, String enc){
        enc=URL_PARAM_DECODECHARSET_UTF8;
        String response = EMPTY;
        PostMethod postMethod = null;
        try {
            postMethod = new PostMethod(url);
            postMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc);
            //将表单的值放入postMethod中
            Set<String> keySet = params.keySet();
            for(String key : keySet){
                String value = params.get(key);
                postMethod.addParameter(key, value);
            }
            //执行postMethod
            int statusCode = client.executeMethod(postMethod);
            if(statusCode == HttpStatus.SC_OK) {
                response = postMethod.getResponseBodyAsString();
            }else{
                log.error("响应状态码 = " + postMethod.getStatusCode());
            }
        }catch(HttpException e){
            log.error("发生致命的异常,可能是协议不对或者返回的内容有问题", e);
            e.printStackTrace();
        }catch(IOException e){
            log.error("发生网络异常", e);
            e.printStackTrace();
        }finally{
            if(postMethod != null){
                postMethod.releaseConnection();
                postMethod = null;
            }
        }

        return response;
    }

    /**
     * GET方式提交数据
     * @param url
     *          待请求的URL
     * @param params
     *          要提交的数据
     * @param enc
     *          编码
     * @return
     *          响应结果
     * @throws IOException
     *          IO异常
     */
    public static String URLGet(String url, Map<String, String> params, String enc){

        String response = EMPTY;
        GetMethod getMethod = null;
        StringBuffer strtTotalURL = new StringBuffer(EMPTY);

        if(strtTotalURL.indexOf("?") == -1) {
            strtTotalURL.append(url).append("?").append(getUrl(params, enc));
        } else {
            strtTotalURL.append(url).append("&").append(getUrl(params, enc));
        }
        log.debug("GET请求URL = \n" + strtTotalURL.toString());

        try {
            getMethod = new GetMethod(strtTotalURL.toString());
            getMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc);
            //执行getMethod
            int statusCode = client.executeMethod(getMethod);
            if(statusCode == HttpStatus.SC_OK) {
                response = getMethod.getResponseBodyAsString();
            }else{
                log.debug("响应状态码 = " + getMethod.getStatusCode());
            }
        }catch(HttpException e){
            log.error("发生致命的异常,可能是协议不对或者返回的内容有问题", e);
            e.printStackTrace();
        }catch(IOException e){
            log.error("发生网络异常", e);
            e.printStackTrace();
        }finally{
            if(getMethod != null){
                getMethod.releaseConnection();
                getMethod = null;
            }
        }

        return response;
    }

    /**
     * 据Map生成URL字符串
     * @param map
     *          Map
     * @param valueEnc
     *          URL编码
     * @return
     *          URL
     */
    private static String getUrl(Map<String, String> map, String valueEnc) {

        if (null == map || map.keySet().size() == 0) {
            return (EMPTY);
        }
        StringBuffer url = new StringBuffer();
        Set<String> keys = map.keySet();
        for (Iterator<String> it = keys.iterator(); it.hasNext();) {
            String key = it.next();
            if (map.containsKey(key)) {
                String val = map.get(key);
                String str = val != null ? val : EMPTY;
                try {
                    str = URLEncoder.encode(str, valueEnc);
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
                url.append(key).append("=").append(str).append(URL_PARAM_CONNECT_FLAG);
            }
        }
        String strURL = EMPTY;
        strURL = url.toString();
        if (URL_PARAM_CONNECT_FLAG.equals(EMPTY + strURL.charAt(strURL.length() - 1))) {
            strURL = strURL.substring(0, strURL.length() - 1);
        }

        return (strURL);
    }
}

maven里的配置

<dependency>
    <groupId>commons-httpclient</groupId>
    <artifactId>commons-httpclient</artifactId>
    <version>3.1</version>
</dependency

最后给大家看一下我爬取出来的数据



如果有什么不对的地方,希望大家指点。


试一下付款二维码



评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Java_wucao

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值