build: drop codepage conversion, use UTF-8 for Win install lang files.

Rewriting commit f8cdec1883 by Jernej Simončič for the development
code (which has a slightly different list of languages).

Also adding the meson version of this change.

Since now InnoSetup fully supports UTF-8 isl (as long as they have a
BOM), let's stop converting translations. Then we also avoid all
conversion errors and get a simpler/more robust build process.
This commit is contained in:
Jehan 2021-12-20 15:05:34 +01:00
parent 321cdd9d04
commit 3e1b783adb
3 changed files with 71 additions and 78 deletions

View File

@ -8,55 +8,54 @@ stamp_files = \
CLEANFILES = $(isl_files) $(stamp_files)
languages = \
ca:[ca]:WINDOWS-1252 \
cs:[cs]:WINDOWS-1250 \
da:[da]:WINDOWS-1252 \
de:[de]:WINDOWS-1252 \
el:[el]:WINDOWS-1253 \
en::WINDOWS-1252 \
en_GB:[en_GB]:UTF-8 \
eo:[eo]:ISO-8859-3 \
es:[es]:WINDOWS-1252 \
eu:[eu]:WINDOWS-1252 \
fi:[fi]:WINDOWS-1252 \
fr:[fr]:WINDOWS-1252 \
he:[he]:WINDOWS-1255 \
hu:[hu]:WINDOWS-1250 \
id:[id]:ISO-8859-1 \
is:[is]:WINDOWS-1252 \
it:[it]:WINDOWS-1252 \
ja:[ja]:CP932 \
kab:[kab]:UTF-8 \
ko:[ko]:EUC-KR \
lt:[lt]:ISO-8859-13 \
lv:[lv]:ISO-8859-13 \
mr:[mr]:UTF-8 \
ms:[ms]:UTF-8 \
nl:[nl]:WINDOWS-1252 \
pl:[pl]:WINDOWS-1250 \
pt:[pt]:WINDOWS-1252 \
pt_BR:[pt_BR]:WINDOWS-1252 \
ro:[ro]:WINDOWS-1250 \
ru:[ru]:WINDOWS-1251 \
sk:[sk]:WINDOWS-1250 \
sl:[sl]:WINDOWS-1250 \
sv:[sv]:WINDOWS-1252 \
tr:[tr]:WINDOWS-1254 \
uk:[uk]:WINDOWS-1251 \
vi:[vi]:WINDOWS-1258 \
zh_CN:[zh_CN]:WINDOWS-936 \
zh_TW:[zh_TW]:CP950
ca:[ca] \
cs:[cs] \
da:[da] \
de:[de] \
el:[el] \
en: \
en_GB:[en_GB] \
eo:[eo] \
es:[es] \
eu:[eu] \
fi:[fi] \
fr:[fr] \
he:[he] \
hu:[hu] \
id:[id] \
is:[is] \
it:[it] \
ja:[ja] \
kab:[kab] \
ko:[ko] \
lt:[lt] \
lv:[lv] \
mr:[mr] \
ms:[ms] \
nl:[nl] \
pl:[pl] \
pt:[pt] \
pt_BR:[pt_BR] \
ro:[ro] \
ru:[ru] \
sk:[sk] \
sl:[sl] \
sv:[sv] \
tr:[tr] \
uk:[uk] \
vi:[vi] \
zh_CN:[zh_CN] \
zh_TW:[zh_TW]
stamp-isl: setup.isl Makefile.am
@ \
for lang in $(languages); do \
code=`echo $$lang | cut -d: -f1`; \
prefix=`echo $$lang | cut -d: -f2 | sed 's/[][]/\\\\\\0/g'`; \
encoding=`echo $$lang | cut -d: -f3`; \
isl="$(builddir)/$$code.setup.isl"; \
echo " GEN $$isl"; \
sed '/^\w\+'"$$prefix"'=/{s/\(.\)'"$$prefix"'/\1/;n};/^\w.*=/d' $< | \
iconv -c -f UTF-8 -t $$encoding > "$$isl"; \
echo -ne "\\xEF\\xBB\\xBF" > "$$isl"; \
sed '/^\w\+'"$$prefix"'=/{s/\(.\)'"$$prefix"'/\1/;n};/^\w.*=/d' $< >> "$$isl"; \
done && \
echo timestamp > stamp-isl

View File

@ -1,16 +1,11 @@
#!/bin/bash
prefix=$1
encoding=$2
infile=$3
infile=$2
#replace [] with \[\]
prefix=$(echo "$prefix" | sed 's/[][]/\\\0/g')
#echo to stdout
sed '/^\w\+'"$prefix"'=/{s/\(.\)'"$prefix"'/\1/;n};/^\w.*=/d' "$infile" \
| iconv -c -f UTF-8 -t "$encoding"
# TODO: currently we silently discard non-convertible characters with -c
# option on iconv. Eventually we would want to just use UTF-8 for all
# language files (ideally), instead of folloding the LanguageCodePage of
# the main .isl file as provided in the issrc repository.
# InnoSetup now supports UTF-8 for all languages, but it requires a BOM
# at the start of the file.
echo -ne "\\xEF\\xBB\\xBF";
sed '/^\w\+'"$prefix"'=/{s/\(.\)'"$prefix"'/\1/;n};/^\w.*=/d' "$infile"

View File

@ -15,43 +15,43 @@ isl_input = custom_target('setup.isl',
languages = [
{ 'code': 'ca', },
{ 'code': 'cs', 'encoding': 'WINDOWS-1250', },
{ 'code': 'cs', },
{ 'code': 'da', },
{ 'code': 'de', },
{ 'code': 'el', 'encoding': 'WINDOWS-1253', },
{ 'code': 'el', },
{ 'code': 'en', 'prefix': '', },
{ 'code': 'en_GB', 'encoding': 'UTF-8', },
{ 'code': 'eo', 'encoding': 'ISO-8859-3', },
{ 'code': 'en_GB', },
{ 'code': 'eo', },
{ 'code': 'es', },
{ 'code': 'eu', },
{ 'code': 'fi', },
{ 'code': 'fr', },
{ 'code': 'he', 'encoding': 'WINDOWS-1255', },
{ 'code': 'hu', 'encoding': 'WINDOWS-1250', },
{ 'code': 'id', 'encoding': 'ISO-8859-1', },
{ 'code': 'he', },
{ 'code': 'hu', },
{ 'code': 'id', },
{ 'code': 'is', },
{ 'code': 'it', },
{ 'code': 'ja', 'encoding': 'CP932', },
{ 'code': 'kab', 'encoding': 'UTF-8', },
{ 'code': 'ko', 'encoding': 'EUC-KR', },
{ 'code': 'lt', 'encoding': 'ISO-8859-13', },
{ 'code': 'lv', 'encoding': 'ISO-8859-13', },
{ 'code': 'mr', 'encoding': 'UTF-8', },
{ 'code': 'ms', 'encoding': 'UTF-8', },
{ 'code': 'ja', },
{ 'code': 'kab', },
{ 'code': 'ko', },
{ 'code': 'lt', },
{ 'code': 'lv', },
{ 'code': 'mr', },
{ 'code': 'ms', },
{ 'code': 'nl', },
{ 'code': 'pl', 'encoding': 'WINDOWS-1250', },
{ 'code': 'pt', },
{ 'code': 'pl', },
{ 'code': 'pt', },
{ 'code': 'pt_BR', },
{ 'code': 'ro', 'encoding': 'WINDOWS-1250', },
{ 'code': 'ru', 'encoding': 'WINDOWS-1251', },
{ 'code': 'sk', 'encoding': 'WINDOWS-1250', },
{ 'code': 'sl', 'encoding': 'WINDOWS-1250', },
{ 'code': 'ro', },
{ 'code': 'ru', },
{ 'code': 'sk', },
{ 'code': 'sl', },
{ 'code': 'sv', },
{ 'code': 'tr', 'encoding': 'WINDOWS-1254', },
{ 'code': 'uk', 'encoding': 'WINDOWS-1251', },
{ 'code': 'vi', 'encoding': 'WINDOWS-1258', },
{ 'code': 'zh_CN', 'encoding': 'WINDOWS-936', },
{ 'code': 'zh_TW', 'encoding': 'CP950', },
{ 'code': 'tr', },
{ 'code': 'uk', },
{ 'code': 'vi', },
{ 'code': 'zh_CN', },
{ 'code': 'zh_TW', },
]
extractlang = find_program('extractlang.sh')
@ -60,14 +60,13 @@ all_isl = []
foreach language : languages
lang_code = language.get('code')
lang_prefix = language.get('prefix', '[@0@]'.format(lang_code))
lang_encoding= language.get('encoding', 'WINDOWS-1252')
islfile = '@0@.setup.isl'.format(lang_code)
all_isl += custom_target(islfile,
input : isl_input,
output: islfile,
command: [ extractlang, lang_prefix, lang_encoding, '@INPUT@' ],
command: [ extractlang, lang_prefix, '@INPUT@' ],
capture: true,
build_by_default: true,
)