If you are loading html content from any website, in "utf-8" encoding, when meta width content-type is not first child of HEAD, it would not be acknowledged by parser (encoding); So you can make this fix:
function domLoadHTML($html)
{$testDOM = new DOMDocument('1.0', 'UTF-8');
$testDOM->loadHTML($html);
$charset = NULL;
$searchInElemnt = function(&$item) use (&$searchInElemnt, &$charset)
{if($item->childNodes)
{foreach($item->childNodes as $childItem)
{switch($childItem->nodeName)
{case 'html':
case 'head':
$searchInElemnt($childItem);
break;
case 'meta':
$attributes = array();
foreach ($childItem->attributes as $attr)
{$attributes[mb_strtoupper($attr->localName)] = $attr->nodeValue;
}
if(array_key_exists('HTTP-EQUIV', $attributes) && (mb_strtoupper($attributes['HTTP-EQUIV']) == 'CONTENT-TYPE') && array_key_exists('CONTENT', $attributes) && preg_match('~[\s]*;[\s]*charset[\s]*=[\s]*([^\s]+)~', $attributes['CONTENT'], $matches))
{$charset = preg_replace('~[\s\']~', '', $matches[1]);
}
}
}
}
};
$searchInElemnt($testDOM);
if(isset($charset))
{$dom = new DOMDocument('1.0', $charset);
$dom->loadHTML('<?xml encoding="'.$charset.'">'.$html);
foreach ($dom->childNodes as $item)
if($item->nodeType == XML_PI_NODE)
{$dom->removeChild($item);
}
$dom->encoding = $charset;
}
else
{$dom = $testDOM;
}
return $dom;
};