头歌-旅游网站大数据分析 - 数据存储

第1关:保存酒店和城市数据

package com.savedata;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import com.alibaba.fastjson.JSONObject;
import com.entity.Hotel;
import com.entity.HotelComment;
import com.util.HBaseUtil;
public class SaveData {
    /**
     * 获取并保存酒店和城市数据
     */
    public static void saveCityAndHotelInfo() {
        /**********   Begin   **********/        
                try {
            HBaseUtil.createTable("t_city_hotels_info", new String[] { "cityInfo", "hotel_info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        List<Put> puts = new ArrayList<>();
        // 添加数据
        try {
            InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("aomen.txt");
            String readFileToString = IOUtils.toString(resourceAsStream, "UTF-8");
            List<Hotel> parseArray = JSONObject.parseArray(readFileToString, Hotel.class);
            String hongkong = IOUtils.toString(SaveData.class.getClassLoader().getResourceAsStream("hongkong.txt"),
                    "UTF-8");
            List<Hotel> hongkongHotel = JSONObject.parseArray(hongkong, Hotel.class);
            parseArray.addAll(hongkongHotel);
            for (Hotel hotel : parseArray) {
                String cityId = hotel.getCity_id();
                String hotelId = hotel.getId();
                Put put = new Put(Bytes.toBytes(cityId + "_" + hotelId));
                // 添加city数据
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("cityId"), Bytes.toBytes(cityId));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("cityName"),
                        Bytes.toBytes(hotel.getCity_name()));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("pinyin"), Bytes.toBytes(hotel.getPinyin()));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("collectionTime"),
                        Bytes.toBytes(hotel.getCollectionTime()));
                // 添加hotel数据
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("id"), Bytes.toBytes(hotel.getId()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("name"), Bytes.toBytes(hotel.getName()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("price"), Bytes.toBytes(String.valueOf(hotel.getPrice())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("lon"), Bytes.toBytes(String.valueOf(hotel.getLon())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("url"), Bytes.toBytes(hotel.getUrl()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("img"), Bytes.toBytes(hotel.getImg()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("address"), Bytes.toBytes(hotel.getAddress()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("score"), Bytes.toBytes(String.valueOf(hotel.getScore())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("dpscore"), Bytes.toBytes(String.valueOf(hotel.getDpscore())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("dpcount"), Bytes.toBytes(String.valueOf(hotel.getDpcount())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("star"), Bytes.toBytes(hotel.getStar()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("stardesc"),
                        Bytes.toBytes(hotel.getStardesc()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("shortName"),
                        Bytes.toBytes(hotel.getShortName()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("isSingleRec"),
                        Bytes.toBytes(hotel.getIsSingleRec()));
                puts.add(put);
            }
            // 批量保存数据
            HBaseUtil.putByTable("t_city_hotels_info", puts);
        } catch (Exception e) {
            e.printStackTrace();
        }
        
        
        
        /**********   End   **********/         
    }
    
    /**
     * 获取和保存酒店的评论数据
     */
    public static void saveCommentInfo() {
        /**********   Begin   **********/
         
         
         
         
        /**********   End   **********/
    }
}

第2关:保存酒店评论信息

package com.savedata;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import com.alibaba.fastjson.JSONObject;
import com.entity.Hotel;
import com.entity.HotelComment;
import com.util.HBaseUtil;
public class SaveData {
    /**
     * 获取并保存酒店和城市数据
     */
    public static void saveCityAndHotelInfo() {
        /**********   Begin   **********/        
                try {
            HBaseUtil.createTable("t_city_hotels_info", new String[] { "cityInfo", "hotel_info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        List<Put> puts = new ArrayList<>();
        // 添加数据
        try {
            InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("aomen.txt");
            String readFileToString = IOUtils.toString(resourceAsStream, "UTF-8");
            List<Hotel> parseArray = JSONObject.parseArray(readFileToString, Hotel.class);
            String hongkong = IOUtils.toString(SaveData.class.getClassLoader().getResourceAsStream("hongkong.txt"),
                    "UTF-8");
            List<Hotel> hongkongHotel = JSONObject.parseArray(hongkong, Hotel.class);
            parseArray.addAll(hongkongHotel);
            for (Hotel hotel : parseArray) {
                String cityId = hotel.getCity_id();
                String hotelId = hotel.getId();
                Put put = new Put(Bytes.toBytes(cityId + "_" + hotelId));
                // 添加city数据
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("cityId"), Bytes.toBytes(cityId));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("cityName"),
                        Bytes.toBytes(hotel.getCity_name()));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("pinyin"), Bytes.toBytes(hotel.getPinyin()));
                put.addColumn(Bytes.toBytes("cityInfo"), Bytes.toBytes("collectionTime"),
                        Bytes.toBytes(hotel.getCollectionTime()));
                // 添加hotel数据
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("id"), Bytes.toBytes(hotel.getId()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("name"), Bytes.toBytes(hotel.getName()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("price"), Bytes.toBytes(String.valueOf(hotel.getPrice())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("lon"), Bytes.toBytes(String.valueOf(hotel.getLon())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("url"), Bytes.toBytes(hotel.getUrl()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("img"), Bytes.toBytes(hotel.getImg()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("address"), Bytes.toBytes(hotel.getAddress()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("score"), Bytes.toBytes(String.valueOf(hotel.getScore())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("dpscore"), Bytes.toBytes(String.valueOf(hotel.getDpscore())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("dpcount"), Bytes.toBytes(String.valueOf(hotel.getDpcount())));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("star"), Bytes.toBytes(hotel.getStar()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("stardesc"),
                        Bytes.toBytes(hotel.getStardesc()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("shortName"),
                        Bytes.toBytes(hotel.getShortName()));
                put.addColumn(Bytes.toBytes("hotel_info"), Bytes.toBytes("isSingleRec"),
                        Bytes.toBytes(hotel.getIsSingleRec()));
                puts.add(put);
            }
            // 批量保存数据
            HBaseUtil.putByTable("t_city_hotels_info", puts);
        } catch (Exception e) {
            e.printStackTrace();
        }
        
        
        
        /**********   End   **********/         
    }
    
    /**
     * 获取和保存酒店的评论数据
     */
    public static void saveCommentInfo() {
        /**********   Begin   **********/
        // 创建评论表
        try {
            HBaseUtil.createTable("t_hotel_comment", new String[] { "hotel_info", "comment_info" });
        } catch (Exception e) {
            // 创建表失败
            e.printStackTrace();
        }
        InputStream resourceAsStream = SaveData.class.getClassLoader().getResourceAsStream("comment.txt");
        try {
        String readFileToString = IOUtils.toString(resourceAsStream, "UTF-8");
        List<HotelComment> otherCommentListByPage = JSONObject.parseArray(readFileToString, HotelComment.class);
        // 获取数据
        List<Put> puts = new ArrayList<>();
        // 定义Put对象
        for (HotelComment comment : otherCommentListByPage) {
            Put put = new Put((comment.getHotel_id()  + "_" + comment.getId()).getBytes());
            put.addColumn("hotel_info".getBytes(), "hotel_name".getBytes(),
                    comment.getHotel_name().getBytes());
            put.addColumn("hotel_info".getBytes(), "hotel_id".getBytes(), comment.getHotel_id().getBytes());
            // 数据量很大在这里只保存用作分析的数据
            put.addColumn("comment_info".getBytes(), "id".getBytes(), Bytes.toBytes(String.valueOf(comment.getId())));
            put.addColumn("comment_info".getBytes(), "baseRoomId".getBytes(), Bytes.toBytes(String.valueOf(comment.getBaseRoomId())));
            if (comment.getBaseRoomId() != -1 && comment.getBaseRoomName() != null) {
                put.addColumn("comment_info".getBytes(), "baseRoomName".getBytes(),
                        Bytes.toBytes(comment.getBaseRoomName()));
            }
            put.addColumn("comment_info".getBytes(), "checkInDate".getBytes(), Bytes.toBytes(comment.getCheckInDate()));
            put.addColumn("comment_info".getBytes(), "postDate".getBytes(), Bytes.toBytes(comment.getPostDate()));
            put.addColumn("comment_info".getBytes(), "content".getBytes(), Bytes.toBytes(comment.getContent()));
            put.addColumn("comment_info".getBytes(), "highlightPosition".getBytes(),
                    Bytes.toBytes(comment.getHighlightPosition()));
            put.addColumn("comment_info".getBytes(), "hasHotelFeedback".getBytes(),
                    Bytes.toBytes(String.valueOf(comment.getHasHotelFeedback())));
            put.addColumn("comment_info".getBytes(), "userNickName".getBytes(),
                    Bytes.toBytes(comment.getUserNickName()));
            puts.add(put);
        }
            // 上传数据
            HBaseUtil.putByTable("t_hotel_comment", puts);
        } catch (Exception e) {
            e.printStackTrace();
        }
         
         
         
         
        /**********   End   **********/
    }
}

  • 4
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值