From 3e1b783adbbf65c592d03b19cd153958be61ee31 Mon Sep 17 00:00:00 2001 From: Jehan Date: Mon, 20 Dec 2021 15:05:34 +0100 Subject: [PATCH] build: drop codepage conversion, use UTF-8 for Win install lang files. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewriting commit f8cdec1883e by Jernej Simončič for the development code (which has a slightly different list of languages). Also adding the meson version of this change. Since now InnoSetup fully supports UTF-8 isl (as long as they have a BOM), let's stop converting translations. Then we also avoid all conversion errors and get a simpler/more robust build process. --- build/windows/installer/lang/Makefile.am | 81 ++++++++++----------- build/windows/installer/lang/extractlang.sh | 15 ++-- build/windows/installer/lang/meson.build | 53 +++++++------- 3 files changed, 71 insertions(+), 78 deletions(-) diff --git a/build/windows/installer/lang/Makefile.am b/build/windows/installer/lang/Makefile.am index a3cc8532ad..7d7c2a9c0b 100644 --- a/build/windows/installer/lang/Makefile.am +++ b/build/windows/installer/lang/Makefile.am @@ -8,55 +8,54 @@ stamp_files = \ CLEANFILES = $(isl_files) $(stamp_files) languages = \ - ca:[ca]:WINDOWS-1252 \ - cs:[cs]:WINDOWS-1250 \ - da:[da]:WINDOWS-1252 \ - de:[de]:WINDOWS-1252 \ - el:[el]:WINDOWS-1253 \ - en::WINDOWS-1252 \ - en_GB:[en_GB]:UTF-8 \ - eo:[eo]:ISO-8859-3 \ - es:[es]:WINDOWS-1252 \ - eu:[eu]:WINDOWS-1252 \ - fi:[fi]:WINDOWS-1252 \ - fr:[fr]:WINDOWS-1252 \ - he:[he]:WINDOWS-1255 \ - hu:[hu]:WINDOWS-1250 \ - id:[id]:ISO-8859-1 \ - is:[is]:WINDOWS-1252 \ - it:[it]:WINDOWS-1252 \ - ja:[ja]:CP932 \ - kab:[kab]:UTF-8 \ - ko:[ko]:EUC-KR \ - lt:[lt]:ISO-8859-13 \ - lv:[lv]:ISO-8859-13 \ - mr:[mr]:UTF-8 \ - ms:[ms]:UTF-8 \ - nl:[nl]:WINDOWS-1252 \ - pl:[pl]:WINDOWS-1250 \ - pt:[pt]:WINDOWS-1252 \ - pt_BR:[pt_BR]:WINDOWS-1252 \ - ro:[ro]:WINDOWS-1250 \ - ru:[ru]:WINDOWS-1251 \ - sk:[sk]:WINDOWS-1250 \ - sl:[sl]:WINDOWS-1250 \ - sv:[sv]:WINDOWS-1252 \ - tr:[tr]:WINDOWS-1254 \ - uk:[uk]:WINDOWS-1251 \ - vi:[vi]:WINDOWS-1258 \ - zh_CN:[zh_CN]:WINDOWS-936 \ - zh_TW:[zh_TW]:CP950 + ca:[ca] \ + cs:[cs] \ + da:[da] \ + de:[de] \ + el:[el] \ + en: \ + en_GB:[en_GB] \ + eo:[eo] \ + es:[es] \ + eu:[eu] \ + fi:[fi] \ + fr:[fr] \ + he:[he] \ + hu:[hu] \ + id:[id] \ + is:[is] \ + it:[it] \ + ja:[ja] \ + kab:[kab] \ + ko:[ko] \ + lt:[lt] \ + lv:[lv] \ + mr:[mr] \ + ms:[ms] \ + nl:[nl] \ + pl:[pl] \ + pt:[pt] \ + pt_BR:[pt_BR] \ + ro:[ro] \ + ru:[ru] \ + sk:[sk] \ + sl:[sl] \ + sv:[sv] \ + tr:[tr] \ + uk:[uk] \ + vi:[vi] \ + zh_CN:[zh_CN] \ + zh_TW:[zh_TW] stamp-isl: setup.isl Makefile.am @ \ for lang in $(languages); do \ code=`echo $$lang | cut -d: -f1`; \ prefix=`echo $$lang | cut -d: -f2 | sed 's/[][]/\\\\\\0/g'`; \ - encoding=`echo $$lang | cut -d: -f3`; \ isl="$(builddir)/$$code.setup.isl"; \ echo " GEN $$isl"; \ - sed '/^\w\+'"$$prefix"'=/{s/\(.\)'"$$prefix"'/\1/;n};/^\w.*=/d' $< | \ - iconv -c -f UTF-8 -t $$encoding > "$$isl"; \ + echo -ne "\\xEF\\xBB\\xBF" > "$$isl"; \ + sed '/^\w\+'"$$prefix"'=/{s/\(.\)'"$$prefix"'/\1/;n};/^\w.*=/d' $< >> "$$isl"; \ done && \ echo timestamp > stamp-isl diff --git a/build/windows/installer/lang/extractlang.sh b/build/windows/installer/lang/extractlang.sh index f156ded5cd..fec8994005 100644 --- a/build/windows/installer/lang/extractlang.sh +++ b/build/windows/installer/lang/extractlang.sh @@ -1,16 +1,11 @@ #!/bin/bash prefix=$1 -encoding=$2 -infile=$3 +infile=$2 #replace [] with \[\] prefix=$(echo "$prefix" | sed 's/[][]/\\\0/g') -#echo to stdout -sed '/^\w\+'"$prefix"'=/{s/\(.\)'"$prefix"'/\1/;n};/^\w.*=/d' "$infile" \ -| iconv -c -f UTF-8 -t "$encoding" - -# TODO: currently we silently discard non-convertible characters with -c -# option on iconv. Eventually we would want to just use UTF-8 for all -# language files (ideally), instead of folloding the LanguageCodePage of -# the main .isl file as provided in the issrc repository. +# InnoSetup now supports UTF-8 for all languages, but it requires a BOM +# at the start of the file. +echo -ne "\\xEF\\xBB\\xBF"; +sed '/^\w\+'"$prefix"'=/{s/\(.\)'"$prefix"'/\1/;n};/^\w.*=/d' "$infile" diff --git a/build/windows/installer/lang/meson.build b/build/windows/installer/lang/meson.build index 1dd4c606de..977e29b050 100644 --- a/build/windows/installer/lang/meson.build +++ b/build/windows/installer/lang/meson.build @@ -15,43 +15,43 @@ isl_input = custom_target('setup.isl', languages = [ { 'code': 'ca', }, - { 'code': 'cs', 'encoding': 'WINDOWS-1250', }, + { 'code': 'cs', }, { 'code': 'da', }, { 'code': 'de', }, - { 'code': 'el', 'encoding': 'WINDOWS-1253', }, + { 'code': 'el', }, { 'code': 'en', 'prefix': '', }, - { 'code': 'en_GB', 'encoding': 'UTF-8', }, - { 'code': 'eo', 'encoding': 'ISO-8859-3', }, + { 'code': 'en_GB', }, + { 'code': 'eo', }, { 'code': 'es', }, { 'code': 'eu', }, { 'code': 'fi', }, { 'code': 'fr', }, - { 'code': 'he', 'encoding': 'WINDOWS-1255', }, - { 'code': 'hu', 'encoding': 'WINDOWS-1250', }, - { 'code': 'id', 'encoding': 'ISO-8859-1', }, + { 'code': 'he', }, + { 'code': 'hu', }, + { 'code': 'id', }, { 'code': 'is', }, { 'code': 'it', }, - { 'code': 'ja', 'encoding': 'CP932', }, - { 'code': 'kab', 'encoding': 'UTF-8', }, - { 'code': 'ko', 'encoding': 'EUC-KR', }, - { 'code': 'lt', 'encoding': 'ISO-8859-13', }, - { 'code': 'lv', 'encoding': 'ISO-8859-13', }, - { 'code': 'mr', 'encoding': 'UTF-8', }, - { 'code': 'ms', 'encoding': 'UTF-8', }, + { 'code': 'ja', }, + { 'code': 'kab', }, + { 'code': 'ko', }, + { 'code': 'lt', }, + { 'code': 'lv', }, + { 'code': 'mr', }, + { 'code': 'ms', }, { 'code': 'nl', }, - { 'code': 'pl', 'encoding': 'WINDOWS-1250', }, - { 'code': 'pt', }, + { 'code': 'pl', }, + { 'code': 'pt', }, { 'code': 'pt_BR', }, - { 'code': 'ro', 'encoding': 'WINDOWS-1250', }, - { 'code': 'ru', 'encoding': 'WINDOWS-1251', }, - { 'code': 'sk', 'encoding': 'WINDOWS-1250', }, - { 'code': 'sl', 'encoding': 'WINDOWS-1250', }, + { 'code': 'ro', }, + { 'code': 'ru', }, + { 'code': 'sk', }, + { 'code': 'sl', }, { 'code': 'sv', }, - { 'code': 'tr', 'encoding': 'WINDOWS-1254', }, - { 'code': 'uk', 'encoding': 'WINDOWS-1251', }, - { 'code': 'vi', 'encoding': 'WINDOWS-1258', }, - { 'code': 'zh_CN', 'encoding': 'WINDOWS-936', }, - { 'code': 'zh_TW', 'encoding': 'CP950', }, + { 'code': 'tr', }, + { 'code': 'uk', }, + { 'code': 'vi', }, + { 'code': 'zh_CN', }, + { 'code': 'zh_TW', }, ] extractlang = find_program('extractlang.sh') @@ -60,14 +60,13 @@ all_isl = [] foreach language : languages lang_code = language.get('code') lang_prefix = language.get('prefix', '[@0@]'.format(lang_code)) - lang_encoding= language.get('encoding', 'WINDOWS-1252') islfile = '@0@.setup.isl'.format(lang_code) all_isl += custom_target(islfile, input : isl_input, output: islfile, - command: [ extractlang, lang_prefix, lang_encoding, '@INPUT@' ], + command: [ extractlang, lang_prefix, '@INPUT@' ], capture: true, build_by_default: true, )