import java.io.*; import java.net.URL; import java.net.URLConnection; /** * @author ljy * @version V1.0 * @Package com.biz.eisp.taskjob.job * @Description: TODO * @date 2018/10/18 11:11 */ public class pageTest { public static void main(String[] args) throws IOException { // URL url = new URL("http://www.baidu.com"); // URLConnection connection = url.openConnection(); // InputStream is = connection.getInputStream(); // OutputStream os = new FileOutputStream("d:/data.txt"); // byte[] buffer = new byte[1024]; // int flag = 0; // while (-1 != (flag = is.read(buffer, 0, buffer.length))) { // os.write(buffer, 0, flag); // System.out.println(new String(buffer, 0, flag, "utf-8")); // } // os.close(); // is.close(); //上面这个控制台打印的会有乱码 d:/data.txt文件中没有乱码 //下面这个没有乱码 URL u = new URL("http://www.baidu.com"); BufferedReader buffr = new BufferedReader(new InputStreamReader(new BufferedInputStream(u.openStream()), "utf-8")); StringBuffer sb = new StringBuffer(); String line = null; while((line=buffr.readLine())!=null) { sb.append(line); } buffr.close(); System.out.println(sb.toString()); } }
java 爬去百度首页HTML源码
最新推荐文章于 2022-07-01 10:36:17 发布