package test; /** * Created by yuantongqin on 2016/12/5. */ import java.io.*; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class getHtml2 { public void getHtmlPicture(String httpUrl) { URL url; BufferedInputStream in; FileOutputStream file; try { System.out.println("取网络图片"); String fileName = httpUrl.substring(httpUrl.lastIndexOf("/")); String filePath = "F:\\FocuSimple\\test\\src\\pic\\"; url = new URL(httpUrl); in = new BufferedInputStream(url.openStream()); file = new FileOutputStream(new File(filePath+fileName)); int t; while ((t = in.read()) != -1) { file.write(t); } file.close(); in.close(); System.out.println("图片获取成功"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public String getHtmlCode(String httpUrl) throws IOException { String content =""; URL uu = new URL(httpUrl); // 创建URL类对象 BufferedReader ii = new BufferedReader(new InputStreamReader(uu .openStream())); // //使用openStream得到一输入流并由此构造一个BufferedReader对象 String input; while ((input = ii.readLine()) != null) { // 建立读取循环,并判断是否有读取值 content += input; } ii.close(); // return "js中内容"; return content; } public void get(String url) throws IOException { String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")"; String searchImgReg2 = "http://[/,+&=\\.\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF|jpeg)"; // String searchImgReg2 = "(?x)(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF|jpeg)))"; String content = this.getHtmlCode(url); System.out.println(content); Pattern pattern = Pattern.compile(searchImgReg2); Matcher matcher = pattern.matcher(content); // while (matcher.find()) { this.getHtmlPicture(url+matcher.group(3)); // String group = matcher.group(); // System.out.println(group); // mList.add(group); // // } // // pattern = Pattern.compile(searchImgReg2); // matcher = pattern.matcher(content); while (matcher.find()) { String group = matcher.group(); System.out.println(group); mList.add(group); // this.getHtmlPicture(matcher.group(3)); } } static List<String> mList = new ArrayList<String>(); public static void main(String[] args) throws IOException { // String url = "http://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B8%DF%C7%E5%B1%DA%D6%BD&fr=ala&ala=1&pos=0&alatpl=wallpaper&oriquery=%E9%AB%98%E6%B8%85%E5%A3%81%E7%BA%B8"; String url = "http://www.daimg.com/photo/list_4_1.html"; getHtml2 gcp = new getHtml2(); gcp.get(url); System.out.println("==长度=="+mList.size()); } }
java 从网页中获取图片路径与从js中获取图片路径
最新推荐文章于 2023-06-13 17:12:53 发布