Ok, as it is WordPress problem and it sadly goes a little deeper, I need to remove each representation of parent div and its inside:
For the UTF-8 issue, I found a hack at the PHP-manual
So my functions looks as follows:
function rem_fi_cat() {
/* This function removes images from _within_ the article.
* If these images are enclosed in a "wp-caption" div-tag.
* If the articles are post formatted as "image".
* Only on home-page, front-page an in category/archive-pages.
*/
if ( (is_home() || is_front_page() || is_category()) && has_post_format( 'image' ) ) {
$document = new DOMDocument();
$content = get_the_content( '', true );
if( '' != $content ) {
/* incl. UTF-8 "hack" as described at
* http://www.php.net/manual/en/domdocument.loadhtml.php#95251
*/
$document->loadHTML( '' . $content );
foreach ($doc->childNodes as $item) {
if ($item->nodeType == XML_PI_NODE) {
$doc->removeChild($item); // remove hack
$doc->encoding = 'UTF-8'; // insert proper
}
}
$xpath = new DOMXPath( $document );
$pDivs = $xpath->query(".//div[@class='wp-caption']");
foreach ( $pDivs as $div ) {
$div->parentNode->removeChild( $div );
}
echo preg_replace( "/.*(.*)<\/div>.*/s", "$1", $document->saveHTML() );
}
}
}