java正则 链接_java获取url内容及正则匹配链接图片地址 | 学步园

import java.net.*;

import java.util.ArrayList;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import java.io.*;

public class QuestionResult {

private Socket socket;

private String host = "www.01hr.com";

private int port = 80;

private String filePath = "E://index.txt";

private ArrayList urlList, imgList;

// 建立socket链接

public void initSocket() throws Exception {

socket = new Socket(host, port);

}

// 取得页面内容方法1

public void getHttpUrlContent() throws Exception {

URL url = new URL("http://www.01hr.com");

InputStream input = url.openStream();

BufferedReader br = new BufferedReader(new InputStreamReader(input));

String data;

File file = new File(filePath);

FileWriter fw = new FileWriter(file);

BufferedWriter bw = new BufferedWriter(fw);

while ((data = br.readLine()) != null) {

matchUrl(data);

matchImage(data);

bw.write(data);

bw.newLine();

}

bw.close();

fw.close();

br.close();

input.close();

}

// 取得页面内容方法2

public void getHttpSocketContent() throws Exception {

StringBuffer sb = new StringBuffer("GET " + "/index.html"

+ " HTTP/1.1/r/n");

sb.append("Host: " + host + "/r/n");

sb.append("Accept: */*/r/n");

sb.append("Accept-Language: zh-cn/r/n");

sb.append("Accept-Encoding: gzip, deflate/r/n");

sb.append("User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)/r/n");

sb.append("Connection: Keep-Alive/r/n/r/n");

OutputStream socketOut = socket.getOutputStream();

socketOut.write(sb.toString().getBytes());

InputStream socketIn = socket.getInputStream();

BufferedReader br = new BufferedReader(new InputStreamReader(socketIn));

File file = new File(filePath);

FileWriter fw = new FileWriter(file);

BufferedWriter bw = new BufferedWriter(fw);

String data;

int i = 0;

while ((data = br.readLine()) != null) {

if (i > 9) {// 不显示返回报文

matchUrl(data);

matchImage(data);

bw.write(data);

bw.newLine();

}

i++;

}

bw.close();

fw.close();

br.close();

socketIn.close();

socketOut.close();

socket.close();

}

//匹配链接内容

public void matchUrl(String str) {

urlList = new ArrayList();

String regEx = "

Pattern pat = Pattern.compile(regEx);

Matcher mat = pat.matcher(str);

while (mat.find()) {

urlList.add(mat.group(1));

}

}

//匹配图片内容

public void matchImage(String str) {

imgList = new ArrayList();

  String regEx = "

Pattern pat = Pattern.compile(regEx);

Matcher mat = pat.matcher(str);

while (mat.find()) {

imgList.add(mat.group(1));

}

}

//打印匹配后的地址

public void printContent(String contentName,ArrayList al){

System.out.println(contentName);

for(int i=0;i

System.out.println(al.get(i));

}

}

public static void main(String args[]) throws Exception {

QuestionResult client = new QuestionResult();

/* client.initSocket();//取得页面内容方法2

client.getHttpSocketContent();*/

client.getHttpUrlContent();取得页面内容方法1

client.printContent("---------------链接地址打印---------------",client.urlList);

client.printContent("---------------图片地址打印---------------",client.imgList);

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值