<?php
function isUTF8($str) {
return preg_match('/^([\x09\x0A\x0D\x20-\x7E]|[\xC2][\xA0-\xBF]|[\xC3-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/', $str);
}
function isISO88591($str) {
return preg_match('/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/', $str);
}
function isCP1252($str) {
return preg_match('/^([\x09\x0A\x0D\x20-\x7E\x80\x82-\x8C\x8E\x91-\x9C\x9E-\xFF])*$/', $str);
}
// Generate a UTF-8 encoded character from the code point
function utf8Char($codePoint){
$char = '';
if ($codePoint < 0){
return false;
} elseif ($codePoint <= 0x007f) {
$char .= chr($codePoint);
} elseif ($codePoint <= 0x07ff) {
$char .= chr(0xc0 | ($codePoint >> 6));
$char .= chr(0x80 | ($codePoint & 0x003f));
} elseif ($codePoint == 0xFEFF) {
// nop -- zap the BOM
} elseif ($codePoint >= 0xD800 && $codePoint <= 0xDFFF) {
// found a surrogate
return false;
} elseif($codePoint <= 0xffff) {
$char .= chr(0xe0 | ($codePoint >> 12));
$char .= chr(0x80 | (($codePoint >> 6) & 0x003f));
$char .= chr(0x80 | ($codePoint & 0x003f));
} elseif($codePoint <= 0x10ffff) {
$char .= chr(0xf0 | ($codePoint >> 18));
$char .= chr(0x80 | (($codePoint >> 12) & 0x3f));
$char .= chr(0x80 | (($codePoint >> 6) & 0x3f));
$char .= chr(0x80 | ($codePoint & 0x3f));
} else {
// out of range
return false;
}
return $char;
}
// Callback function for utf8FromCP1252()
function utf8FromCP1252Char($char) {
$utf8CodePoint = array(
128 => 0x20AC,
129 => '',
130 => 0x201A,
131 => 0x0192,
132 => 0x201E,
133 => 0x2026,
134 => 0x2020,
135 => 0x2021,
136 => 0x02C6,
137 => 0x2030,
138 => 0x0160,
139 => 0x2039,
140 => 0x0152,
141 => '',
142 => 0x017D,
143 => '',
144 => '',
145 => 0x2018,
146 => 0x2019,
147 => 0x201C,
148 => 0x201D,
149 => 0x2022,
150 => 0x2013,
151 => 0x2014,
152 => 0x02DC,
153 => 0x2122,
154 => 0x0161,
155 => 0x203A,
156 => 0x0153,
157 => '',
158 => 0x017E,
159 => 0x0178);
$cp1252CodePoint = ord($char);
return utf8Char($utf8CodePoint[$cp1252CodePoint]);
}
// Convert the encoding of a string from Windows-1252 to UTF-8
function utf8FromCP1252($string) {
if (isCP1252($string)) {
$utf8String = utf8_encode($string);
return preg_replace_callback('|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|', create_function('$s','return utf8FromCP1252Char($s[1]);'), $utf8String);
} else {
return '';
}
}
?>
转自http://lachy.id.au/dev/2005/11/encoding-functions-source
PHP转编码函数
转载于:https://my.oschina.net/766/blog/211311