// 我看过的两本书 PHP 的书中提及到 PHP6 的新特性,其中两个是 namespace 和 unicode, // 从 PHP5.3 开始,php 已经支持 namespace 了; 而 Unicode 在 PHP5.3 中还没有发布。 // // 看书看到 PHP 的多字节处理,想到前段时间还会自己截取 中英混合的字符串,那时候花了很长时间来看 utf-8, // gbk, gb2312, gb18030, unicode 之类的,现在看有了多字节处理的扩展不需要那么麻烦了。 // // php5 在两个扩展中支持字符编码和多字节问题: // iconv 和 mbstring // 两个之间的主要区别: // iconv 使用的一个外部的库;而 mbstring 使用的是 php 帮定的库。 // 因为 iconv (只少在最新的 linux 版本中)支持更多的编码,所以 iconv 可能是通用话最好的选择。 /** * iconv 函数 */ iconv_get_encoding() // 0/1 // Retrieve internal configuration variables of iconv extension iconv_mime_decode_headers() // 1/3 // Decodes multiple MIME header fields at once // 返回数组 // Returns an associative array that holds a whole set of MIME header fields specified // by encoded_headers on success, or FALSE if an error occurs during the decoding. iconv_mime_decode() // 1/3 // Decodes a MIME header field // Returns a decoded MIME field on success, or FALSE if an error occurs during the decoding. iconv_mime_encode() // 2/3 // Composes a MIME header field // Returns an encoded MIME field on success, or FALSE if an error occurs during the encoding. iconv_set_encoding() // 2/2 // Set current setting for character encoding conversion // Returns TRUE on success or FALSE on failure. // iconv_set_encoding() 可以用来设置内部编码和输出编码; // 内部编码设置可以影响一系列处理字符串的函数。输出编码选项丝毫不会影响那些功能, // 但是可以用来于 ob_iconv_handler 输出缓冲处理器组合使用. // 如果开启,php 将自动把输出到浏览器的文本从内部编码转换为输出编码。 // -- ob_start( 'ob_iconv_handler'); iconv_set_encoding( 'internal_encoding', 'UTF-8'); iconv_set_encoding( 'output_encoding', 'ISO-8859-1'); print_r( iconv_get_encoding() ); $text = <<<TEXT GNU is Not Unix, PHing Is Not GNU make, myhere is not here. 你好! 吉林大学。 TEXT; echo $text; // 程序不会输出汉字,因为这些汉字在 utf-8 编码里占两个(或以上)个字节,而 ISO-8859-1 只用一个字节编码。 // -- iconv_strlen() // 1/2 // Returns the character count of string iconv_strpos() // 2/4 // Finds position of first occurrence of a needle within a haystack iconv_strrpos() // 2/4 iconv_substr() // 2/4 // Cut out part of a string // Returns the portion of str specified by the offset and length parameters. // If str is shorter than offset characters long, FALSE will be returned. iconv() // 3/3 // Convert string to requested character encoding // If you append the string //TRANSLIT to out_charset transliteration is activated. // This means that when a character can't be represented in the target charset, // it can be approximated through one or several similarly looking characters. // If you append the string //IGNORE, characters that cannot be represented // in the target charset are silently discarded. Otherwise, str is cut from // the first illegal character and an E_NOTICE is generated. ob_iconv_handler() // 2/2 // Convert character encoding as output buffer handler // Converts the string encoded in internal_encoding to output_encoding . // /** * mbstring 函数 */ // Function Overloading Feature // mbstring supports a 'function overloading' feature which // enables you to add multibyte awareness to such an application // without code modification by overloading multibyte counterparts // on the standard string functions. // For example, mb_substr() is called instead of substr() // if function overloading is enabled. This feature makes // it easy to port applications that only support // single-byte encodings to a multibyte environment in many cases. //- // To use function overloading, set mbstring.func_overload in php.ini // to a positive value that represents a combination of bitmasks // specifying the categories of functions to be overloaded. // It should be set to 1 to overload the mail() function. // 2 for string functions, 4 for regular expression functions. // For example, if it is set to 7, mail, strings and // regular expression functions will be overloaded. // The list of overloaded functions are shown below. //- // Note: It is not recommended to use the function overloading option // in the per-directory context, // because it's not confirmed yet to be stable enough // in a production environment and may lead to undefined behaviour. // - // If you are connecting to a database with PHP, // it is recommended that you use the same character encoding // for both the database and the internal encoding for ease of use and better performance. // -- mb_convert_encoding() // 2/3 // Convert character encoding mb_internal_encoding() // Set/Get internal character encoding mb_output_handler() // Callback function converts character encoding in output buffer mb_strlen() // Get string length mb_http_input() // Detect HTTP input character encoding mb_http_output() // Set/Get HTTP output character encoding // ...