From bf5344ce04b8f37b658f481a09f1742f84c182a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20T=C3=A4nzer?= Date: Wed, 14 Aug 2013 22:45:32 +0200 Subject: bug 1190: RSS may contain HTML markup => decode it and then encode it again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michael Tänzer --- pages/index/0.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'pages/index/0.php') diff --git a/pages/index/0.php b/pages/index/0.php index a2c2e5a..db46673 100644 --- a/pages/index/0.php +++ b/pages/index/0.php @@ -53,7 +53,13 @@ $query = "./description"; $nodeList = $xpath->query($query, $item); - $description = recode_string("UTF8..html" , $nodeList->item(0)->nodeValue); + $description = $nodeList->item(0)->nodeValue; + // The description may contain HTML entities => convert them + $description = html_entity_decode($description, ENT_COMPAT | ENT_HTML401, 'UTF-8'); + // Description may contain HTML markup and unicode characters => encode them + // If we didn't decode and then encode again, (i.e. take the content + // as it is in the RSS feed) we might inject harmful markup + $description = recode_string("UTF8..html", $description); printf("

%s

\n", $title); printf("

%s

\n", $description); -- cgit v1.2.1