1 引言
本文记录百度街景图片的存储过程,主要逻辑是:读取本地图片,存mysql,代码直接下载稍作修改就可运行,存储相关使用的是Java原生接口。更多代码以及百度街景相关背景参考另一篇博客-----百度地图街景图片爬取并存库。
2 建表
表字段主要分为两部分:当前街景部分和历史街景部分,每部分都包含4大属性:id、拍摄日期、图片url(4张)、图片二进制流。为了更好的搜索,建立相关索引
CREATE TABLE `test`.`baidu_pano_pics` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`cur_pano_id` char(30) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '当前街景位置ID',
`cur_pano_date` varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '当前街景的拍摄日期',
`cur_pic_url_1` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片1url',
`cur_pic_url_2` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片2url',
`cur_pic_url_3` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片3url',
`cur_pic_url_4` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片4url',
`cur_pic_1` mediumblob NULL COMMENT '图片1(从左往右)',
`cur_pic_2` mediumblob NULL COMMENT '图片2',
`cur_pic_3` mediumblob NULL COMMENT '图片3',
`cur_pic_4` mediumblob NULL COMMENT '图片4',
`his_pano_id` char(30) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史街景位置ID',
`his_pano_date` varchar(6) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史街景的拍摄日期',
`his_pic_url_1` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史图片1url',
`his_pic_url_2` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史图片2url',
`his_pic_url_3` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史图片3url',
`his_pic_url_4` char(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '历史图片4url',
`his_pic_1` mediumblob NULL COMMENT '历史街景位置图片1',
`his_pic_2` mediumblob NULL COMMENT '历史街景位置图片2',
`his_pic_3` mediumblob NULL COMMENT '历史街景位置图片3',
`his_pic_4` mediumblob NULL COMMENT '历史街景位置图片4',
`timestamp` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP COMMENT '入库时间戳',
PRIMARY KEY USING BTREE (`id`),
UNIQUE INDEX `pano_index` USING BTREE(`cur_pano_id`)
) ENGINE = InnoDB AUTO_INCREMENT = 1 CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '百度街景图片存储表' ROW_FORMAT = Compact;
3 代码
先封装一个数据库连接的工具类DbUtil,ConfigParser类源码移步至作者之前的爬虫相关文章:
/**
* @author zhanghuigen
* @since 0.1.0
**/
public class DbUtil {
private final static Logger LOGGER = Logger.getLogger(DbUtil.class);
private DbUtil(){}
private static class DbUtilHolder{
private static DbUtil singleton = new DbUtil();
}
public static DbUtil getInstance(){
return DbUtilHolder.singleton;
}
// 获取连接
public Connection getConn() throws SQLException {
ConfigParser parser = ConfigParser.getInstance();
String dbAlias = "mysql-data";
Map<String, Object> dbConfig = parser.getModuleConfig("database");
Map<String, Object> mysqlConfig = (Map)parser.assertKey(dbConfig, dbAlias, "database");
String url = (String)parser.assertKey(mysqlConfig, "url", "database." + dbAlias);
String username = (String)parser.assertKey(mysqlConfig, "username", "database." + dbAlias);
String password = (String)parser.assertKey(mysqlConfig, "password", "database." + dbAlias);
return DriverManager.getConnection(url, username, password);
}
// 关闭连接
public void closeConn(Connection conn) {
if (null != conn) {
try {
conn.close();
} catch (SQLException e) {
LOGGER.error("关闭连接失败!");
e.printStackTrace();
}
}
}
}
建立百度街景图片的java bean,通过该类实现相关属性的封装或存取,PanoPic的属性填充详见前文,这里不再赘述。
import java.util.List;
/**
* @author zhanghuigen
* @since 0.1.0
**/
public class PanoPic {
/**
* 当前街景ID
*/
private String curPanoId;
/**
*当前街景拍摄时间
*/
private String curShootDate;
/**
* 当前街景本地存储路径
*/
private List<String> curPicPath;
/**
* 历史街景ID
*/
private String hisPanoId;
/**
* 历史街景拍摄时间
*/
private String hisShootDate;
/**
* 历史街景本地存储路径
*/
private List<String> hisPicPath;
public PanoPic(String curPanoId,String curShootDate,List<String> curPicPath){
this.curPanoId = curPanoId;
this.curShootDate = curShootDate;
this.curPicPath = curPicPath;
}
public String getCurPanoId() {
return curPanoId;
}
public void setCurPanoId(String curPanoId) {
this.curPanoId = curPanoId;
}
public String getCurShootDate() {
return curShootDate;
}
public void setCurShootDate(String shootDate) {
this.curShootDate = shootDate;
}
public List<String> getCurPicPath() {
return curPicPath;
}
public void setCurPicPath(List<String> curPicPath) {
this.curPicPath = curPicPath;
}
public String getHisPanoId() {
return hisPanoId;
}
public void setHisPanoId(String hisPanoId) {
this.hisPanoId = hisPanoId;
}
public String getHisShootDate() {
return hisShootDate;
}
public void setHisShootDate(String hisShootDate) {
this.hisShootDate = hisShootDate;
}
public List<String> getHisPicPath() {
return hisPicPath;
}
public void setHisPicPath(List<String> hisPicPath) {
this.hisPicPath = hisPicPath;
}
}
街景图片存储的工具类封装如下:
/**
* @author zhanghuigen
* @since 0.1.0
**/
public class BlobInsertUtils {
private final static Logger LOGGER = Logger.getLogger(BlobInsertUtils.class);
private final String SQL_NO_HIS;
private final String SQL_WITH_HIS;
public BlobInsertUtils(String tableName){
SQL_NO_HIS = getSQLWithoutHis(tableName);
SQL_WITH_HIS = getSqlWithHis(tableName);
}
/**
* 将当前街景图片插入数据库
*/
public void readImage2DB(String tableName,String panoId, List<String> picPath) {
Connection conn = null;
PreparedStatement ps = null;
ImagePersistence imagePersistence = new ImagePersistence();
List<FileInputStream> fileInputStreamsList = new ArrayList<>(4);
FileInputStream fileInputStream;
try {
for (String pic:picPath){
fileInputStreamsList.add(imagePersistence.readImage(pic));
}
conn = DbUtil.getInstance().getConn();
String sql = "insert into "+ tableName +
"(pano_id,pic_url_1,pic_url_2,pic_url_3,pic_url_4,pic_1,pic_2,pic_3,pic_4,timestamp)values(?,?,?,?,?,?,?,?,?,?)";
ps = conn.prepareStatement(sql);
ps.setString(1, panoId);
for (int i = 0; i < picPath.size(); i++){
fileInputStream = fileInputStreamsList.get(i);
ps.setString(2+i, picPath.get(i));
ps.setBinaryStream(6+i, fileInputStream, fileInputStream.available());
}
ps.setTimestamp(10,new Timestamp(System.currentTimeMillis()));
int count = ps.executeUpdate();
if (count > 0) {
LOGGER.info(panoId + "插入成功!");
} else {
LOGGER.info(panoId + "插入失败!");
}
} catch (Exception e) {
e.printStackTrace();
} finally {
DbUtil.getInstance().closeConn(conn);
if (null != ps) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
private String getSql(String tableName){
return "insert into " + tableName +
"(cur_pano_id,cur_pano_date,cur_pic_url_1,cur_pic_url_2,cur_pic_url_3,cur_pic_url_4,cur_pic_1,cur_pic_2,cur_pic_3,cur_pic_4,"+
"(his_pano_id,his_pano_date,his_pic_url_1,his_pic_url_2,his_pic_url_3,his_pic_url_4,his_pic_1,his_pic_2,his_pic_3,his_pic_4,"+
"timestamp)"+
"values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"+
"on duplicate key update"+
"cur_pano_id=?,cur_pano_date=?,cur_pic_url_1=?,cur_pic_url_2=?,cur_pic_url_3=?,cur_pic_url_4=?,cur_pic_1=?,cur_pic_2=?,cur_pic_3=?,cur_pic_4=?"+
"his_pano_id=?,his_pano_date=?,his_pic_url_1=?,his_pic_url_2=?,his_pic_url_3=?,his_pic_url_4=?,his_pic_1=?,his_pic_2=?,his_pic_3=?,his_pic_4=?";
}
private String getSqlWithHis(String tableName){
return "insert into " + tableName +
"(cur_pano_id,cur_pano_date,cur_pic_url_1,cur_pic_url_2,cur_pic_url_3,cur_pic_url_4,cur_pic_1,cur_pic_2,cur_pic_3,cur_pic_4,"+
"his_pano_id,his_pano_date,his_pic_url_1,his_pic_url_2,his_pic_url_3,his_pic_url_4,his_pic_1,his_pic_2,his_pic_3,his_pic_4,"+
"timestamp)"+
"values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
}
private String getSQLWithoutHis(String tableName){
return "insert into " + tableName +
"(cur_pano_id,cur_pano_date,cur_pic_url_1,cur_pic_url_2,cur_pic_url_3,cur_pic_url_4,cur_pic_1,cur_pic_2,cur_pic_3,cur_pic_4,"+
"timestamp)"+
"values(?,?,?,?,?,?,?,?,?,?,?)";
}
/**
* 将当前及历史街景图片插入数据库
*
* @param panoPic bean
*/
public void insertAllImage2DBWithNoCheck(PanoPic panoPic) {
Connection conn = null;
PreparedStatement ps = null;
ImagePersistence imagePersistence = new ImagePersistence();
int allPicNum;
boolean isHisEmpty = true;
List<String> curPicPath = panoPic.getCurPicPath();
List<String> hisPicPath = null;
if (null==panoPic.getHisPicPath()){
allPicNum = curPicPath.size();
}else {
isHisEmpty = false;
hisPicPath = panoPic.getHisPicPath();
allPicNum = curPicPath.size() + panoPic.getHisPicPath().size();
}
List<FileInputStream> fileInputStreamsList = new ArrayList<>(allPicNum);
FileInputStream fileInputStream;
try {
//加载当前街景输入流
for (String curPic:curPicPath){
fileInputStreamsList.add(imagePersistence.readImage(curPic));
}
conn = DbUtil.getInstance().getConn();
//加载历史街景输入流
if (!isHisEmpty){
for (String hisPic:panoPic.getHisPicPath()){
fileInputStreamsList.add(imagePersistence.readImage(hisPic));
}
ps = conn.prepareStatement(SQL_WITH_HIS);
}else {
ps = conn.prepareStatement(SQL_NO_HIS);
}
ps.setString(1, panoPic.getCurPanoId());
ps.setString(2, panoPic.getCurShootDate());
Timestamp timestamp = new Timestamp(System.currentTimeMillis());
for (int i = 0; i < curPicPath.size(); i++){
fileInputStream = fileInputStreamsList.get(i);
ps.setString(3+i, curPicPath.get(i));
ps.setBinaryStream(7+i, fileInputStream, fileInputStream.available());
}
if (!isHisEmpty){
ps.setString(11, panoPic.getHisPanoId());
ps.setString(12, panoPic.getHisShootDate());
for (int j = 0; j < hisPicPath.size(); j++){
fileInputStream = fileInputStreamsList.get(4+j);
ps.setString(13+j, hisPicPath.get(j));
ps.setBinaryStream(17+j, fileInputStream, fileInputStream.available());
}
ps.setTimestamp(21,timestamp);
}else {
ps.setTimestamp(11,timestamp);
}
int count = ps.executeUpdate();
if (count > 0) {
LOGGER.info(panoPic.getCurPanoId() + "插入成功!");
} else {
LOGGER.info(panoPic.getCurPanoId() + "插入失败!");
}
} catch (Exception e) {
e.printStackTrace();
} finally {
DbUtil.getInstance().closeConn(conn);
if (null != ps) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
/**
* 将当前及历史街景图片插入数据库
* 已有则更新
* @param panoPic bean
*/
public void readAllImage2DB(PanoPic panoPic) {
Connection conn = null;
PreparedStatement ps = null;
ImagePersistence imagePersistence = new ImagePersistence();
int allPicNum;
boolean isHisEmpty = true;
List<String> curPicPath = panoPic.getCurPicPath();
List<String> hisPicPath = null;
if (null==panoPic.getHisPicPath()){
allPicNum = panoPic.getCurPicPath().size();
}else {
isHisEmpty = false;
hisPicPath = panoPic.getHisPicPath();
allPicNum = panoPic.getCurPicPath().size() + panoPic.getHisPicPath().size();
}
List<FileInputStream> fileInputStreamsList = new ArrayList<>(allPicNum);
FileInputStream fileInputStream;
try {
//加载当前街景输入流
for (String curPic:curPicPath){
fileInputStreamsList.add(imagePersistence.readImage(curPic));
}
//加载历史街景输入流
if (!isHisEmpty){
for (String hisPic:panoPic.getHisPicPath()){
fileInputStreamsList.add(imagePersistence.readImage(hisPic));
}
}
conn = DbUtil.getInstance().getConn();
ps.setString(1, panoPic.getCurPanoId());
ps.setString(2, panoPic.getCurShootDate());
Timestamp timestamp = new Timestamp(System.currentTimeMillis());
for (int i = 0; i < curPicPath.size(); i++){
fileInputStream = fileInputStreamsList.get(i);
ps.setString(3+i, curPicPath.get(i));
ps.setBinaryStream(7+i, fileInputStream, fileInputStream.available());
}
if (!isHisEmpty){
ps.setString(11, panoPic.getHisPanoId());
ps.setString(12, panoPic.getHisShootDate());
for (int j = 0; j < hisPicPath.size(); j++){
fileInputStream = fileInputStreamsList.get(4+j);
ps.setString(13+j, hisPicPath.get(j));
ps.setBinaryStream(17+j, fileInputStream, fileInputStream.available());
}
ps.setTimestamp(21,timestamp);
//on duplicate key update 后续参数拼接
ps.setString(22, panoPic.getCurPanoId());
ps.setString(23, panoPic.getCurShootDate());
for (int i = 0; i < curPicPath.size(); i++){
fileInputStream = fileInputStreamsList.get(i);
ps.setString(24+i, curPicPath.get(i));
ps.setBinaryStream(28+i, fileInputStream, fileInputStream.available());
}
ps.setString(32, panoPic.getHisPanoId());
ps.setString(33, panoPic.getHisShootDate());
for (int j = 0; j < hisPicPath.size(); j++){
fileInputStream = fileInputStreamsList.get(4+j);
ps.setString(34+j, hisPicPath.get(j));
ps.setBinaryStream(38+j, fileInputStream, fileInputStream.available());
}
}else {
ps.setTimestamp(21,timestamp);
//on duplicate key update 后续参数拼接
ps.setString(22, panoPic.getCurPanoId());
ps.setString(23, panoPic.getCurShootDate());
for (int i = 0; i < curPicPath.size(); i++){
fileInputStream = fileInputStreamsList.get(i);
ps.setString(24+i, curPicPath.get(i));
ps.setBinaryStream(28+i, fileInputStream, fileInputStream.available());
}
}
ps.setTimestamp(42,timestamp);
int count = ps.executeUpdate();
if (count > 0) {
LOGGER.info(panoPic.getCurPanoId() + "插入成功!");
} else {
LOGGER.info(panoPic.getCurPanoId() + "插入失败!");
}
} catch (Exception e) {
e.printStackTrace();
} finally {
DbUtil.getInstance().closeConn(conn);
if (null != ps) {
try {
ps.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
这里有个输入流的封装(线程安全),传入文件路径参数,用于读取本地图片,源码如下:
/**
* @author zhanghuigen
* @since 0.1.0
**/
public class ImagePersistence {
// 读取本地图片获取输入流
public FileInputStream readImage(String path) throws IOException {
return new FileInputStream(new File(path));
}
// 读取表中图片获取输出流
public void readBin2Image(InputStream in, String targetPath) {
File file = new File(targetPath);
String path = targetPath.substring(0, targetPath.lastIndexOf("/"));
if (!file.exists()) {
new File(path).mkdir();
}
FileOutputStream fos = null;
try {
fos = new FileOutputStream(file);
int len = 0;
byte[] buf = new byte[1024];
while ((len = in.read(buf)) != -1) {
fos.write(buf, 0, len);
}
fos.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (null != fos) {
try {
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
最后封装整个街景图片存储逻辑示例调用如下:
private void storePanoPicsToDb(PanoPic panoPic,String tableName){
//读本地图片存数据库
BlobInsertUtils blobInsertUtils = new BlobInsertUtils(tableName);
blobInsertUtils.insertAllImage2DBWithNoCheck(panoPic);
}
3.1 注意
BlobInsertUtils类中有些插入方法是没有用到的,本文只调用了insertAllImage2DBWithNoCheck方法。readAllImage2DB方法和insertAllImage2DBWithNoCheck方法区别是考虑到爬虫逻辑一般为周期性的计划任务(街景可能每周或每个月爬取1次),为了实现插入时判断数据库表中是否已有记录,readAllImage2DB采用on duplicate key update关键字建立插入语句(无则插入,有则更新),但本方法由于字段过多(超过了41个),调用该方法插入会报错,解决办法:重新建表,删除不必要的字段(如:图片路径),重新封装sql参数即可。
参考
blob类型数据存储参考:
https://blog.csdn.net/qq_38785658/article/details/75009700
https://www.cnblogs.com/warrior4236/p/5682830.html