此工具类传入网址和字符编码参数
将网址源码爬取到内存,返回String类型
/**
*
* @param 创建并且获取连接 urlStr
* @param 字符编码 encoding
* @return 将所爬内容装载到内存
*/
public static String urlGetString(String urlStr, String encoding) {
StringBuffer sb = new StringBuffer();
BufferedReader br = null;
try {
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
br = new BufferedReader(new InputStreamReader(conn.getInputStream(), encoding));
String line = null;
while ((line = br.readLine()) != null) {
// System.lineSeparator() 自动添加操作系统对应的换行符
sb.append(line).append(System.lineSeparator());
//将所爬内容装载到内存
}
} catch (Exception e) {
e.printStackTrace();
} finally {
CloseUtil.close(br);
}
return sb.toString();
}
第二种 传入网址
保存为byte数组并且返回
public static byte[] urlGetByteArray(String urlStr) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BufferedInputStream bis = null;
byte[] byteArray = new byte[0];
try {
URL url = new URL(urlStr);
URLConnection conn = url.openConnection();
bis = new BufferedInputStream(conn.getInputStream());
int b = -1;
while ((b = bis.read()) != -1) {
baos.write(b);
}
byteArray = baos.toByteArray();
} catch (Exception e) {
e.printStackTrace();
} finally {
CloseUtil.close(bis);
CloseUtil.close(baos);
}
return byteArray;
}
}