问题
I have an XML that has empty element tags:
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
I am trying to avoid adding this element tag to my DB, but I could not. This is my whole code that is also running as a cronjob.. in order to update my records.. I saw some userful information about you guys... but I could not understand what I am missing.. and could not find a solution.. I still have manufacturer empty records in my DB, although I am checking (or at least I am trying to check) if manufacturer element is empty do not add it do DB.. Could you give me some explanation or solution to this problem? thanks!!
<?php
header('Content-type: text/html; charset=UTF-8') ;
//connection info here
// Create connection
$conn = new mysqli($servername, $dbuser, $password, $dbname,3306);
// Check connection
if ($conn->connect_error) {
die("Connection failed: " . $conn->connect_error);
}
// Change character set to utf8
$conn->set_charset("utf8");
date_default_timezone_set('Europe/Athens');
//$date = strtotime("now");
$sql = $conn->query("SELECT * FROM business_xml WHERE activate=1 AND business_id=54"); //business_id = 54
while($exe_xml = mysqli_fetch_object($sql)) {
$value_of = 0;
$diff = 0;
if($exe_xml->date_modified == NULL) {
$value_of = 1;
}
else {
$modified = $exe_xml->date_modified;
$now = strtotime("now");
$diff = $now - $modified;
}
//if($diff > 3599 || $value_of == 1) { //$diff = 3600 means one hour difference
//get values first
$date_modified = strtotime("now");
$business_id = $exe_xml->business_id;
$xml_link = $exe_xml->xml_link;
//UPDATE modified date
$update_business_xml = $conn->query('UPDATE business_xml SET date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '"');
$product_xml_link = $xml_link;
$product_xml_link = $conn->real_escape_string($product_xml_link);
$product_xml_link = trim(stripslashes($product_xml_link));
if($product_xml_link) {
$count_errors = 0;
//query to find products ids
//if exists UPDATE, else INSERT
$query_ids = $conn->query('SELECT pid FROM products WHERE business_id=54');
$rows_ids = mysqli_num_rows($query_ids);
$count_id = 0;
if($rows_ids > 0) {
while($exe_ids = mysqli_fetch_object($query_ids)) {
$arr_ids[$count_id] = $exe_ids->pid;
$count_id++;
}
}
$reader = new XMLReader();
$reader->open($product_xml_link);
while($reader->read()) {
if($reader->nodeType == XMLReader::ELEMENT && $reader->name == 'product' ) {
//For each node to type "product"
$product = new SimpleXMLElement($reader->readOuterXml());
$pid = $product->id;
$name = $product->name;
$name = mb_strtolower($name);
$mpn = $product->mpn;
$ean = $product->ean;
$sku = $product->sku;
$link = $product->link;
$price = $product->price;
$category_id = $product->category->attributes();
$category_path = $product->category;
$category_path = mb_strtolower($category_path);
$image = $product->image;
$availability = $product->availability;
$size = $product->size;
$size = mb_strtolower($size);
$color = $product->color;
$color = mb_strtolower($color);
$weight = $product->weight;
$description = $product->description;
$manufacturer = $product->manufacturer;
$manufacturer = trim($manufacturer);
$instock = "Y";
$product_image = $image;
$check_product_url = $link;
$exist_pids = 0;
if(empty($pid) || empty($image) || empty($price) || empty($name) || empty($link) || empty($manufacturer)) {
//update status for this pid product, cause there is an empty value, which it is required
$update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
$count_errors++;
}
else {
$date_modified = strtotime("now");
for($i = 0; $i < $rows_ids; $i++) {
if($arr_ids[$i] == $pid) {
$exist_pids = 1;
}
} //end for loop
if($category_id == 613 || $category_id == 604 || $category_id == 635) {
//forbidden products
//update status for this pid product, cause there is an empty value, which it is required
$update_business_xml = $conn->query('UPDATE products SET status=0 WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
$count_errors++;
}
else {
if($exist_pids == 1) {
$update_business_xml = $conn->query('UPDATE products SET name="' . $name . '", price="' . $price . '", product_link="' . $link . '", image="' . $image . '", size="' . $size . '", color="' . $color . '" date_modified="' . $date_modified . '" WHERE business_id="' . $business_id . '" AND pid= "' . $pid . '"');
}
else {
//$date_added = strtotime("now");
$status = 1;
$date = date('d-m-Y H:i:s');
$insert_business_xml = $conn->query('INSERT INTO products (business_id,pid,name,category,product_link,price,size,color,weight,description,manufacturer,mpn,ean,image,sku,instock,availability,status,date_added) VALUES("'.$business_id.'",
"' . mysqli_real_escape_string($conn,stripslashes($pid)) . '",
"' . mysqli_real_escape_string($conn,stripslashes($name)) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($category_path))) . '",
"' . $check_product_url . '",
"' . mysqli_real_escape_string($conn,stripslashes($price)) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($size)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($color)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($weight))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($description))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim(strtolower($manufacturer)))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($mpn))) . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($ean))) . '",
"' . $product_image . '",
"' . mysqli_real_escape_string($conn,stripslashes(trim($sku))) . '",
"' . mysqli_real_escape_string($conn,stripslashes($instock)) . '",
"' . mysqli_real_escape_string($conn,stripslashes($availability)) . '",
"' . $status . '", "' . $date . '")');
}
}
}
} //end if $reader-nodeType
} //end while loop
$reader->close();
$insert_messages = "Your XML file has been updated successfully! We found <strong>" . $count_errors . "</strong> errors. In case errors found, please check your dashboard!";
echo $insert_messages;
} //end $product_xml_link
//} //end if diff 3600
} //end while loop
?>
回答1:
Actually the "manufacturer" element is not empty. It contains three nodes.
$xml = <<<'XML'
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
XML;
$document = new DOMDocument();
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
var_dump(get_class($childNode), $childNode->textContent);
}
Output:
string(7) "DOMText"
string(1) "
"
string(15) "DOMCdataSection"
string(1) " "
string(7) "DOMText"
string(1) "
"
The text nodes are whitespace nodes, they contain the linebreaks. Everything in a DOM is a kind of node, even the attribute values are text child nodes of the attribute nodes. You can avoid having whitespace nodes in your DOM. Set the $preserveWhiteSpace property before parsing.
$document = new DOMDocument();
$document->preserveWhiteSpace = FALSE;
$document->loadXml($xml);
foreach ($document->documentElement->childNodes as $childNode) {
var_dump(get_class($childNode), $childNode->textContent);
}
Output:
string(15) "DOMCdataSection"
string(1) " "
With that only the CDATA section is left. It contains a space. You could use trim() on the text content and validate that the result is an empty string to see if it is "empty". You can do that directly on the "manufacturer" element node:
$document = new DOMDocument();
$document->loadXml($xml);
var_dump(trim($document->documentElement->textContent) === '');
Output:
bool(true)
Another way is to use the normalize-space(). It replaces all groups of whitespaces with a single space and trims the result. So to fetch the text content of a node if it contains not only whitespaces:
$xml = <<<'XML'
<list>
<manufacturer>
<![CDATA[ ]]>
</manufacturer>
<manufacturer>
<![CDATA[ not only spaces ]]>
</manufacturer>
</list>
XML;
$document = new DOMDocument();
$document->loadXml($xml);
$xpath = new DOMXpath($document);
foreach ($xpath->evaluate('//manufacturer[normalize-space(.) != ""]') as $manufacturer) {
var_dump(trim($manufacturer->textContent));
}
Output:
string(15) "not only spaces"
来源:https://stackoverflow.com/questions/45213764/check-if-xml-element-tag-empty