คำเตือน: DOMDocument :: loadHTML (): htmlParseEntityRef: expected ';' ในเอนทิตี

Question 1

$html = file_get_contents("http://www.somesite.com/");

$dom = new DOMDocument();
$dom->loadHTML($html);

echo $dom;

พ่น

Warning: DOMDocument::loadHTML(): htmlParseEntityRef: expecting ';' in Entity,
Catchable fatal error: Object of class DOMDocument could not be converted to string in test.php on line 10

Question 2

หากต้องการทำให้คำเตือนระเหยคุณสามารถใช้ libxml_use_internal_errors(true)

// create new DOMDocument
$document = new \DOMDocument('1.0', 'UTF-8');

// set error level
$internalErrors = libxml_use_internal_errors(true);

// load HTML
$document->loadHTML($html);

// Restore error level
libxml_use_internal_errors($internalErrors);

Question 3

ฉันจะพนันได้เลยว่าหากคุณดูที่มาของhttp://www.somesite.com/คุณจะพบอักขระพิเศษที่ยังไม่ได้แปลงเป็น HTML อาจจะเป็นดังนี้:

<a href="/script.php?foo=bar&hello=world">link</a>

ควรจะเป็น

<a href="/script.php?foo=bar&amp;hello=world">link</a>

Question 4

$dom->@loadHTML($html);

สิ่งนี้ไม่ถูกต้องให้ใช้สิ่งนี้แทน:

@$dom->loadHTML($html);

Question 5

มีข้อผิดพลาด 2 ประการ: ประการที่สองเป็นเพราะ $ dom ไม่ใช่สตริง แต่เป็นอ็อบเจ็กต์จึงไม่สามารถ "สะท้อน" ได้ ข้อผิดพลาดแรกคือคำเตือนจาก loadHTML ซึ่งเกิดจากไวยากรณ์ที่ไม่ถูกต้องของเอกสาร html ที่จะโหลด (อาจเป็น& (ampersand) ที่ใช้เป็นตัวคั่นพารามิเตอร์และไม่ได้มาสก์เป็นเอนทิตีด้วย &)

คุณเพิกเฉยและกดทับข้อความแสดงข้อผิดพลาดนี้ (ไม่ใช่ข้อผิดพลาดเพียงแค่ข้อความ!) โดยเรียกใช้ฟังก์ชันด้วยตัวดำเนินการควบคุมข้อผิดพลาด "@" ( http://www.php.net/manual/en/language.operators.errorcontrol php )

@$dom->loadHTML($html);

Question 6

สาเหตุของข้อผิดพลาดร้ายแรงของคุณคือDOMDocumentไม่มีเมธอด __toString () ดังนั้นจึงไม่สามารถสะท้อนกลับได้

คุณอาจกำลังมองหา

echo $dom->saveHTML();

Question 7

โดยไม่คำนึงถึงเสียงสะท้อน (ซึ่งจะต้องแทนที่ด้วย print_r หรือ var_dump) หากมีการโยนข้อยกเว้นวัตถุควรว่างเปล่า:

DOMNodeList Object
(
)

วิธีการแก้

ตั้งค่าrecoverเป็นจริงและstrictErrorCheckingเป็นเท็จ

$content = file_get_contents($url);

$doc = new DOMDocument();
$doc->recover = true;
$doc->strictErrorChecking = false;
$doc->loadHTML($content);

ใช้การเข้ารหัสเอนทิตีของ php กับเนื้อหาของมาร์กอัปซึ่งเป็นแหล่งที่มาของข้อผิดพลาดที่พบบ่อยที่สุด

Question 8

แทนที่ง่าย

$dom->loadHTML($html);

ด้วยความแข็งแกร่งยิ่งขึ้น ...

libxml_use_internal_errors(true);

if (!$DOM->loadHTML($page))
    {
        $errors="";
        foreach (libxml_get_errors() as $error)  {
            $errors.=$error->message."<br/>";
        }
        libxml_clear_errors();
        print "libxml errors:<br>$errors";
        return;
    }

Question 9

$html = file_get_contents("http://www.somesite.com/");

$dom = new DOMDocument();
$dom->loadHTML(htmlspecialchars($html));

echo $dom;

ลองดู

Question 10

อีกวิธีหนึ่งที่เป็นไปได้คือ

$sContent = htmlspecialchars($sHTML);
$oDom = new DOMDocument();
$oDom->loadHTML($sContent);
echo html_entity_decode($oDom->saveHTML());

Question 11

ฉันรู้ว่านี่เป็นคำถามเก่า แต่ถ้าคุณต้องการแก้ไขเครื่องหมาย '&' ที่ผิดรูปแบบใน HTML ของคุณ คุณสามารถใช้รหัสที่คล้ายกับสิ่งนี้:

$page = file_get_contents('http://www.example.com');
$page = preg_replace('/\s+/', ' ', trim($page));
fixAmps($page, 0);
$dom->loadHTML($page);


function fixAmps(&$html, $offset) {
    $positionAmp = strpos($html, '&', $offset);
    $positionSemiColumn = strpos($html, ';', $positionAmp+1);

    $string = substr($html, $positionAmp, $positionSemiColumn-$positionAmp+1);

    if ($positionAmp !== false) { // If an '&' can be found.
        if ($positionSemiColumn === false) { // If no ';' can be found.
            $html = substr_replace($html, '&amp;', $positionAmp, 1); // Replace straight away.
        } else if (preg_match('/&(#[0-9]+|[A-Z|a-z|0-9]+);/', $string) === 0) { // If a standard escape cannot be found.
            $html = substr_replace($html, '&amp;', $positionAmp, 1); // This mean we need to escape the '&' sign.
            fixAmps($html, $positionAmp+5); // Recursive call from the new position.
        } else {
            fixAmps($html, $positionAmp+1); // Recursive call from the new position.
        }
    }
}

Question 12

วิธีแก้ปัญหาอื่นที่เป็นไปได้คือไฟล์ของคุณอาจเป็นไฟล์ประเภท ASCII เพียงแค่เปลี่ยนประเภทไฟล์ของคุณ

Question 13

แม้ว่าหลังจากนี้รหัสของฉันจะทำงานได้ดีดังนั้นฉันจึงเพิ่งลบข้อความเตือนทั้งหมดด้วยคำสั่งนี้ที่บรรทัดที่ 1

<?php error_reporting(E_ERROR); ?>