This task (the reverse of URL encoding and distinct from URL parser) is to provide a function or mechanism to convert an URL-encoded string into its original unencoded form.
中文:任务是给你一个编码后的url,将它解码为编码前的格式。
Cases:
"http%3A%2F%2Ffoo%20bar%2F" --> "http://foo bar/"
"google.com/search?q=%60Abdu%27l-Bah%C3%A1" --> "google.com/search?q=`Abdu'l-Bahá"
Solution:
编码的字符原则是:将所有的符号,编程%+该符号对应的16进制。只有找到所有的%16进制进行解码。
中文:任务是给你一个编码后的url,将它解码为编码前的格式。
Cases:
"http%3A%2F%2Ffoo%20bar%2F" --> "http://foo bar/"
"google.com/search?q=%60Abdu%27l-Bah%C3%A1" --> "google.com/search?q=`Abdu'l-Bahá"
Solution:
编码的字符原则是:将所有的符号,编程%+该符号对应的16进制。只有找到所有的%16进制进行解码。
+ --> ' '
C/C++:
#include<stdio.h>
#include<string.h>
//c++
#include<iostream>
#include<string>
#include "Poco/URI.h"
using namespace std;
inline int ishex(char x){//16
return (x >= '0' && x <= '9') ||
(x >= 'a' && x <= 'f') ||
(x >= 'A' && x<= 'F');
}
int decode(const char *s, char *dev){
char *o;
const char *end = s + strlen(s);
int c;
for(o = dev; s<= end; o++){
c = *s++;
if(c == '+') c = ' ';
else if (c == '%' && (!ishex( *s++) ||
!ishex( *s++) ||
!sscanf(s - 2, "%2x", &c))) //from s-2 start two chars in c
return 1;
if(dev) *o = c;//
}
return o - dev; //length
}
//c++ or g++
string decode(string encoded){
string decoded;
Poco::URI::decode(eccoded, decoded);
return decoded;
}
int main(){
const char *url = "http%3A%2F%2ffoo+bar%2fabcd";
string encoded("http%3A%2F%2ffoo+bar%2fabcd");
char out[strlen(url) + 1]; //output decoded url
printf("legth: %d\n", decode(url,0)); //get length of decoded url
puts(decode(url,out) < 0 ? "bad string": out);
cout<<decode(encoded)<<endl;
return 0;
}
Java:
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
public class Main
{
public static void main(String[] args) throws UnsupportedEncodingException
{
String encoded = "http%3A%2F%2Ffoo%20bar%2F";
String normal = URLDecoder.decode(encoded, "utf-8");
System.out.println(normal);
}
}
Python3:
import urllib
print(urllib.parse.unquote("http%3A%2F%2Ffoo%20bar%2F"))
参考(copy):http://rosettacode.org/wiki/URL_decoding#C