本篇介绍缓冲流和转换流的作用,最后结合在一起,实现一个爬取网页内容的小程序。
一、缓冲流
1、概述:Buffered…
- 字节/字符缓冲流可以很大程度提高性能,虚拟机内部就有,默认缓冲区大小为8K,可以自己指定。
- 不管怎么处理,底层都是节点流:文件节点流、字节数组节点流、网络流…
- 可以直接释放BufferdInputStream
2、实例:文件字符输出
主程序:
public static void main(String[] args){
//1、创建源
File src = new File("dest.txt");
//2、选择字符流
BufferedWriter writer=null;
try {
//操作内容...
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
操作方法一:
writer = new BufferedWriter(new FileWriter(src));
String msg = "Study is so easy周百青";
char[] datas = msg.toCharArray(); //字符串转字符数组
writer.write(datas,0,datas.length);
操作方法二:
writer = new BufferedWriter(new FileWriter(src));
String msg = "Study is so easy周百青";
writer.write(msg);
writer.write("add");
writer.flush();
操作方法三:
writer = new BufferedWriter(new FileWriter(src));
writer.append("Study is so easy\r\n").append("周百青");
操作方法四:
writer = new BufferedWriter(new FileWriter(src));
writer.append("Study is so easy");
writer.newLine(); //新增换行符
writer.append("周百青");
writer.flush();
二、转换流
1、概述:InputStreamReader&OutputStreamWriter
- 将字节流(纯文本)转换为字符流,方便处理
- 可以指定字符集,以防出现乱码
- InputStreamReader 字节流到字符流
OutputStreamWriter字符流到字节流
2、实例:控制台看转换过程
public class ConvertTest {
public static void main(String[] args) {
//操作System.in和System.out
//凡是字符串的地方,都建议加上缓冲
try(BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(System.out));){
//循环获取键盘的输入(exit退出),并输出此内容
String msg = "";
while(!msg.equals("exit")){
msg=reader.readLine(); //循环逐行读取
writer.write(msg); //循环写出
writer.newLine(); //换行
writer.flush(); //强制刷新,防止内容驻留在管道中
}
}catch(Exception e){
System.out.println("操作异常!");
}
}
}
结果
三、爬取网页(百度首页为例)
10行代码
public static void main(String[] args) {
try(BufferedReader reader = new BufferedReader(new InputStreamReader(new URL("http://www.baidu.com").openStream(),"UTF-8"));
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("baidu.html"),"UTF-8"));){
String msg;
while((msg=reader.readLine())!=null){
writer.write(msg);
writer.newLine(); }
writer.flush();
}catch(Exception e){
System.out.println("操作异常!"); } }
源码
public class ConvertTest2 {
public static void main(String[] args) {
try(BufferedReader reader =
//加上缓冲
new BufferedReader(
//转为字符操作
new InputStreamReader(
//获取网络流,最好指定字符集(默认跟工程字符集一致可能会乱)
new URL("http://www.baidu.com").openStream(),"UTF-8"));
BufferedWriter writer =
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream("baidu.html"),"UTF-8"));){
//读取
String msg;
while((msg=reader.readLine())!=null){
//System.out.print(msg);
writer.write(msg);
writer.newLine();
}
writer.flush();
}catch(Exception e){
System.out.println("操作异常!");
}
}
}
结果
<!DOCTYPE html>
<!--STATUS OK--><html> <head><meta http-equiv=content-type content=text/html;charset=utf-8><meta http-equiv=X-UA-Compatible content=IE=Edge><meta content=always name=referrer><link rel=stylesheet type=text/css href=http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css><title>百度一下,你就知道</title></head> <body link=#0000cc> <div id=wrapper> <div id=head> <div class=head_wrapper> <div class=s_form> <div class=s_form_wrapper> <div id=lg> <img hidefocus=true src=//www.baidu.com/img/bd_logo1.png width=270 height=129> </div> <form id=form name=f action=//www.baidu.com/s class=fm> <input type=hidden name=bdorz_come value=1> <input type=hidden name=ie value=utf-8> <input type=hidden name=f value=8> <input type=hidden name=rsv_bp value=1> <input type=hidden name=rsv_idx value=1> <input type=hidden name=tn value=baidu><span class="bg s_ipt_wr"><input id=kw name=wd class=s_ipt value maxlength=255 autocomplete=off autofocus></span><span class="bg s_btn_wr"><input type=submit id=su value=百度一下 class="bg s_btn"></span> </form> </div> </div> <div id=u1> <a href=http://news.baidu.com name=tj_trnews class=mnav>新闻</a> <a href=http://www.hao123.com name=tj_trhao123 class=mnav>hao123</a> <a href=http://map.baidu.com name=tj_trmap class=mnav>地图</a> <a href=http://v.baidu.com name=tj_trvideo class=mnav>视频</a> <a href=http://tieba.baidu.com name=tj_trtieba class=mnav>贴吧</a> <noscript> <a href=http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2f%3fbdorz_come%3d1 name=tj_login class=lb>登录</a> </noscript> <script>document.write('<a href="http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u='+ encodeURIComponent(window.location.href+ (window.location.search === "" ? "?" : "&")+ "bdorz_come=1")+ '" name="tj_login" class="lb">登录</a>');</script> <a href=//www.baidu.com/more/ name=tj_briicon class=bri style="display: block;">更多产品</a> </div> </div> </div> <div id=ftCon> <div id=ftConw> <p id=lh> <a href=http://home.baidu.com>关于百度</a> <a href=http://ir.baidu.com>About Baidu</a> </p> <p id=cp>©2017 Baidu <a href=http://www.baidu.com/duty/>使用百度前必读</a> <a href=http://jianyi.baidu.com/ class=cp-feedback>意见反馈</a> 京ICP证030173号 <img src=//www.baidu.com/img/gs.gif> </p> </div> </div> </div> </body> </html>