bug 1011: Escape all special characters, not only non-ASCII ones while
authorMichael Tänzer <neo@nhng.de>
Tue, 31 Jan 2012 02:30:29 +0000 (03:30 +0100)
committerMichael Tänzer <neo@nhng.de>
Tue, 31 Jan 2012 02:30:29 +0000 (03:30 +0100)
keeping the PO file intact

Signed-off-by: Michael Tänzer <neo@nhng.de>
locale/Makefile
locale/escape_special_chars.php [new file with mode: 0755]

index b703fb2..1517066 100644 (file)
@@ -112,8 +112,7 @@ $(LANGS:%=$(MO_FILE_TEMPLATE)): $(MO_FILE_TEMPLATE): $(PO_FILE_TEMPLATE)
 $(LANGS:%=$(PO_FILE_TEMPLATE)):
        mkdir -p $(@D)
        wget --output-document - '$(@:$(PO_FILE_TEMPLATE)=$(PO_URL_TEMPLATE))' | \
-               # convert UTF-8 characters to HTML entities \
-               php -r 'while (!feof(STDIN)) echo mb_convert_encoding(fgets(STDIN), "HTML-ENTITIES", "UTF-8");' \
+               php -f escape_special_chars.php \
                > $@
 
 
diff --git a/locale/escape_special_chars.php b/locale/escape_special_chars.php
new file mode 100755 (executable)
index 0000000..4ec0d9a
--- /dev/null
@@ -0,0 +1,65 @@
+#!/usr/bin/php -q
+<?php
+/*
+LibreSSL - CAcert web application
+Copyright (C) 2004-2012  CAcert Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+/* Convert special characters in UTF-8 encoded PO files to HTML entities */
+
+
+function is_msgstr($line) {
+       return substr_compare($line, 'msgstr', 0, strlen('msgstr')) === 0;
+}
+
+function is_msgid($line) {
+       return substr_compare($line, 'msgid', 0, strlen('msgid')) === 0;
+}
+
+// Skip the metadata (first msgid/msgstr pair)
+while (!feof(STDIN)) {
+       $line = fgets(STDIN);
+       if ($line === false) {
+               exit(0); //EOF after newline mostly
+       }
+       
+       echo $line;
+       
+       if (is_msgstr($line)) {
+               break;
+       }
+}
+
+// determines if the current line belongs to a msgid or a msgstr
+$msgstr = false;
+
+while (!feof(STDIN)) {
+       $line = fgets(STDIN);
+       if ($line === false) {
+               exit(0); //EOF after newline mostly
+       }
+       
+       if (is_msgstr($line)) {
+               $msgstr = true;
+       } elseif (is_msgid($line)) {
+               $msgstr = false;
+       }
+       
+       if ($msgstr) {
+               $line = htmlentities($line, ENT_NOQUOTES, "UTF-8");
+       }
+       echo $line;
+}