Erlang 从URL中截取尾部文件名

      分析项目edownloader时看到的代码:              

     从 “http://www.aaaa.com.cn/download/bbb/ccc.exe”  中截取ccc.exe

uri.hrl

-record(uri, {scheme,       %% "http", "ftp"
              user_info="", %% [] | "srp"
              host="",      %% "somewhere.net"
              port="",      %% [] | 80 | 8080
              path="",      %% "/here/there/everytwhere"
              raw_query="", %% "id=12345&name=fred+johnson". undecoded.
              frag="",      %% "some anchor"
              raw           %% original raw uri
             }).

uri.erl

from_string(Uri) ->
	%{http,ELS}
    {Scheme, Uri1} = parse_scheme(Uri),
	
	%{www.xxx.com:port,ELS}
    {Authority, Uri2} = parse_authority(Uri1),
    
	%{[],"www.xxx.com:port"}
	{UserInfo, HostPort} = parse_user_info(Authority),
	
	%{www.xxx.com,port}
    {Host, Port} = parse_host_port(HostPort),
	
	%
    {Path, Uri3} = parse_path(Uri2),
    {Query, Uri4} = parse_query(Uri3),
    Frag = parse_frag(Uri4),
    new(Scheme, UserInfo, Host, Port, Path, Query, Frag, Uri).

%http
parse_scheme(Uri) ->
    parse_scheme(Uri, []).

parse_scheme([$: | Uri], Acc) ->
    {lists:reverse(Acc), Uri};
parse_scheme([], Acc) ->
    {[], lists:reverse(Acc)};
parse_scheme([C | Rest], Acc) ->
    parse_scheme(Rest, [C | Acc]).

parse_authority("//" ++ Uri) ->
    parse_authority(Uri, "");
parse_authority(Uri) ->
    Uri.

%www.xxx.com
parse_authority([$/ | Rest], Acc) ->
    {lists:reverse(Acc), [$/ | Rest]};
parse_authority([], Acc) ->
    {lists:reverse(Acc), []};
parse_authority([C | Rest], Acc) ->
    parse_authority(Rest, [C | Acc]).

parse_user_info(Authority) ->
    parse_user_info(Authority, []).

parse_user_info([$@ | HostPort], Acc) ->
    {lists:reverse(Acc), HostPort};
parse_user_info([], Acc) ->
    {[], lists:reverse(Acc)};
parse_user_info([C | HostPort], Acc) ->
    parse_user_info(HostPort, [C | Acc]).

parse_host_port(HostPort) ->
    case string:tokens(HostPort, ":") of
        [Host] -> {Host, ""};
        [Host, Port] -> {Host, list_to_integer(Port)};
        _ -> throw({uri_error, {invalid_host_port, HostPort}})
    end.

parse_path(Uri) ->
    parse_path(Uri, []).

parse_path([C | Uri], Acc) when C == $?; C == $# ->
    {lists:reverse(Acc), [C | Uri]};
parse_path([], Acc) ->
    {lists:reverse(Acc), ""};
parse_path([C | Uri], Acc) ->
    parse_path(Uri, [C | Acc]).

parse_query([$? | Uri]) ->
    parse_query(Uri, []);
parse_query(Uri) ->
    {"", Uri}.

parse_query([$# | Uri], Acc) ->
    {lists:reverse(Acc), [$# | Uri]};
parse_query([], Acc) ->
    {lists:reverse(Acc), ""};
parse_query([C | Rest], Acc) ->
    parse_query(Rest, [C | Acc]).

parse_frag([$# | Frag]) ->
    unquote(Frag);
parse_frag("") ->
    "";
parse_frag(Data) ->
    throw({uri_error, {data_left_after_parsing, Data}}).

new(Scheme, UserInfo, Host, Port, Path, Query, Frag, Uri) ->
    update_raw(#uri{scheme = Scheme,
                    user_info = unquote(UserInfo),
                    host = Host,
                    port = Port,
                    path = unquote(Path),
                    raw_query = Query,
                    frag = unquote(Frag),
                    raw = Uri}).

new(Scheme, UserInfo, Host, Port, Path, Query, Frag) ->
    update_raw(#uri{scheme = Scheme,
                    user_info = unquote(UserInfo),
                    host = Host,
                    port = Port,
                    path = unquote(Path),
                    raw_query = Query,
                    frag = unquote(Frag)}).

update_raw(Uri) ->
    Uri#uri{raw = iolist_to_string(to_iolist(Uri))}.

iolist_to_string(Str) ->
    binary_to_list(iolist_to_binary(Str)).

unquote(Str) ->
    unquote(Str, []).

unquote([], Acc) ->
    lists:reverse(Acc);
unquote([$+ | Str], Acc) ->
    unquote(Str, [$  | Acc]);
unquote([$\%, A, B | Str], Acc) ->
    unquote(Str, [erlang:list_to_integer([A, B], 16) | Acc]);
unquote([C | Str], Acc) ->
    unquote(Str, [C | Acc]).


edownload_util.erl

filename_from_uri(Uri) when is_list(Uri) ->
    filename_from_uri(uri:from_string(Uri));
filename_from_uri(Uri) when is_record(Uri, uri) ->
    Path = Uri#uri.path
    %, {match, Start, Len} = regexp:match(Path, "([^/]+){1}quot;)
    , {match, [{Start, Len},_]} = re:run(Path, "([^/]+){1}quot;)
    , string:substr(Path, Start+1, Len)
.

3> edownload_util:filename_from_uri("http://www.baidu.com/download/emessager.apk").
结果:"emessager.apk"


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值