实例:
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <regex.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static const char * HREF_PATTERN = "<img [^>]*src=\"\\s*\\([^ >\"]*\\)\\s*\"";
int main(int argc, char** argv){
int n ,len, count;
regex_t re;
const size_t nmatch = 2;
regmatch_t matchptr[nmatch];
char *p = NULL;
char str[1024*1024];
memset(str, 0, sizeof(str));
n = 0;
count = 0;
//operate file and read contents
int fd = open(argv[1], O_RDONLY);
if (fd < 0) {
printf("file:%s open error\n", argv[1]);
return -1;
}
while ((n = read(fd, str+count, 1024)) != 0) {
if (n == -1) {
printf("file read error\n");
return -1;
}
count += n;
}
close(fd);
printf("\nfile read over! begin URL analyse now...\n");
p = str;
if (regcomp(&re, HREF_PATTERN, 0) != 0) {/* compile error */
printf("compile regex error\n");
}
while (regexec(&re, p, nmatch, matchptr, 0) != REG_NOMATCH) {
len = (matchptr[1].rm_eo - matchptr[1].rm_so);
p = p + matchptr[1].rm_so;
char *tmp = (char *)calloc(len+1, 1);
strncpy(tmp, p, len);
tmp[len] = '\0';
p = p + len + (matchptr[0].rm_eo - matchptr[1].rm_eo);
printf("%s\n", tmp);
}
return 0;
}
运行结果:
gcc test.c -o test
./test www.zol.com.cn_webcenter_map.html
file read over! begin URL analyse now...
https://dg-fd.zol-img.com.cn/t_s2000x2000/g5/M00/08/00/ChMkJ1YYZveITXKkAAADVZen7iIAADfxQO_-UMAAANt785.png
https://dg-fd.zol-img.com.cn/t_s2000x2000/g4/M00/06/07/Cg-4zFUCTDGIbtftAAAB8xYSy2YAAWoVALpfBUAAAIL673.png
https://dg-fd.zol-img.com.cn/t_s2000x2000/g4/M08/06/08/Cg-4zFUCVDuIXpdBAAABlOjlfjUAAWoXAP__gQAAAH8892.png
https://dg-fd.zol-img.com.cn/t_s2000x2000/g5/M00/08/0C/ChMkJ1ez0HiILQTIAAAPmt4wdOoAAUgUAN2Y3IAAA-y501.png