bug 1190: RSS may contain HTML markup => decode it and then encode it again
authorMichael Tänzer <neo@nhng.de>
Wed, 14 Aug 2013 20:45:32 +0000 (22:45 +0200)
committerMichael Tänzer <neo@nhng.de>
Wed, 14 Aug 2013 20:45:32 +0000 (22:45 +0200)
Signed-off-by: Michael Tänzer <neo@nhng.de>
pages/index/0.php

index a2c2e5a..db46673 100644 (file)
 
                $query = "./description";
                $nodeList = $xpath->query($query, $item);
-               $description = recode_string("UTF8..html" , $nodeList->item(0)->nodeValue);
+               $description = $nodeList->item(0)->nodeValue;
+               // The description may contain HTML entities => convert them
+               $description = html_entity_decode($description, ENT_COMPAT | ENT_HTML401, 'UTF-8');
+               // Description may contain HTML markup and unicode characters => encode them
+               // If we didn't decode and then encode again, (i.e. take the content
+               // as it is in the RSS feed) we might inject harmful markup
+               $description = recode_string("UTF8..html", $description);
 
                printf("<h3> %s </h3>\n", $title);
                printf("<p> %s </p>\n", $description);