Accelerated C++ Exercise 6-0 (查找URL)

#ifndef GRUAD_urls_h
#define GRUAD_urls_h

#include <vector>
#include <string>

std::vector<std::string> find_urls(const std::string&);
bool not_url_char(char );
#endif
#include <algorithm>   
#include <cctype>   
#include <string>   
#include <vector>   
   
#include "urls.h"   
   
using std::find;   
using std::find_if;   
   
using std::isalnum;   
using std::isalpha;   
using std::isdigit;   
   
using std::search;   
using std::string;   
using std::vector;   
   
bool not_url_char(char);   
   
string::const_iterator   
url_end(string::const_iterator, string::const_iterator);   
   
string::const_iterator   
url_beg(string::const_iterator, string::const_iterator);   
vector<string> find_urls(const string& s)   
{   
    vector<string> ret;   
    typedef string::const_iterator iter;   
    iter b = s.begin(), e = s.end();   
   
    // look through the entire input   
    while (b != e) {   
   
        // look for one or more letters followed by `://'   
        b = url_beg(b, e);   
   
        // if we found it   
        if (b != e) {   
            // get the rest of the \s-1URL\s0   
            iter after = url_end(b, e);   
   
            // remember the \s-1URL\s0   
            ret.push_back(string(b, after));   
   
            // advance `b' and check for more \s-1URL\s0s on this line   
            b = after;   
        }   
    }   
    return ret;   
}   
   
string::const_iterator   
url_end(string::const_iterator b, string::const_iterator e)   
{   
    return find_if(b, e, not_url_char);   
}   
   
bool not_url_char(char c)   
{   
    // characters, in addition to alphanumerics, that can appear in a \s-1URL\s0   
    static const string url_ch = "~;/?:@=&$-_.+!*'(),";   
   
    // see whether `c' can appear in a \s-1URL\s0 and return the negative   
    return !(isalnum(c) ||   
             find(url_ch.begin(), url_ch.end(), c) != url_ch.end());   
}   
   
string::const_iterator   
url_beg(string::const_iterator b, string::const_iterator e)   
{   
    static const string sep = "://";   
   
    typedef string::const_iterator iter;   
   
    // `i' marks where the separator was found   
    iter i = b;   
   
    while ((i = search(i, e, sep.begin(), sep.end())) != e) {   
   
        // make sure the separator isn't at the beginning or end of the line   
        if (i != b && i + sep.size() != e) {   
   
            // `beg' marks the beginning of the protocol-name   
            iter beg = i;   
            while (beg != b && isalpha(beg[-1]))   
                --beg;   
   
            // is there at least one appropriate character before and after the separator?   
            if (beg != i && !not_url_char(i[sep.size()]))   
                return beg;   
        }   
   
        // the separator we found wasn't part of a \s-1URL\s0; advance `i' past this separator   
        i += sep.size();   
    }   
    return e;   
}   

#include <algorithm>   
#include <cctype>   
#include <iostream>   
#include <string>   
#include <vector>   
#include <fstream>
   
#include "urls.h"   
   
using std::cout;   
using std::cin;   
using std::endl;   
using std::find_if;   
using std::getline;   
using std::string;   
using std::vector;   
using std::ifstream;
   
   
int main() {   
        string s;   
		ifstream infile;  
		infile.open("1.txt");   
		if(!infile.is_open()){  
			cout<<"不可以打开文件"<<std::endl;  
			system("pause");  
			exit(1);  
		}  
		while (getline(infile, s)) {   
                vector<string> v = find_urls(s);   
                for (vector<string>::const_iterator i = v.begin();   
                        i != v.end(); ++i)   
                        cout << *i << endl;   
        }   
		system("pause");
        return 0;   
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值