Add updated script for transforming ftl text

This commit is contained in:
Damien Elmes 2023-07-06 22:27:09 +10:00
parent 516abf7cfa
commit fc0bff4166
8 changed files with 505 additions and 266 deletions

View File

@ -164,7 +164,7 @@ fn build_rsbridge(build: &mut Build) -> Result<()> {
pub fn check_rust(build: &mut Build) -> Result<()> {
let inputs = inputs![
glob!("{rslib/**,pylib/rsbridge/**,build/**,tools/workspace-hack/**}"),
glob!("{rslib/**,pylib/rsbridge/**,ftl/**,build/**,tools/workspace-hack/**}"),
"Cargo.lock",
"Cargo.toml",
"rust-toolchain.toml",

View File

@ -15,8 +15,9 @@ use garbage_collection::write_ftl_json;
use garbage_collection::DeprecateEntriesArgs;
use garbage_collection::GarbageCollectArgs;
use garbage_collection::WriteJsonArgs;
use string::string_operation;
use string::StringArgs;
use crate::string::string_operation;
use crate::string::StringCommand;
#[derive(Parser)]
struct Cli {
@ -41,10 +42,9 @@ enum Command {
/// and adding a deprecation warning. An entry is considered unused if
/// cannot be found in a source or JSON file.
Deprecate(DeprecateEntriesArgs),
/// Copy or move a key from one ftl file to another, including all its
/// translations. Source and destination should be e.g.
/// ftl/core-repo/core.
String(StringArgs),
/// Operations on individual messages and their translations.
#[clap(subcommand)]
String(StringCommand),
}
fn main() -> Result<()> {

View File

@ -1,207 +0,0 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use anki_io::read_to_string;
use anki_io::write_file;
use anki_io::write_file_if_changed;
use anki_io::ToUtf8PathBuf;
use anyhow::Context;
use anyhow::Result;
use camino::Utf8Component;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use clap::Args;
use clap::ValueEnum;
use fluent_syntax::ast::Entry;
use fluent_syntax::parser;
use crate::serialize;
#[derive(Clone, ValueEnum, PartialEq, Eq, Debug)]
pub enum StringOperation {
Copy,
Move,
}
#[derive(Args)]
pub struct StringArgs {
operation: StringOperation,
/// The folder which contains the different languages as subfolders, e.g.
/// ftl/core-repo/core
src_lang_folder: Utf8PathBuf,
dst_lang_folder: Utf8PathBuf,
/// E.g. 'actions-run'. File will be inferred from the prefix.
src_key: String,
/// If not specified, the key & file will be the same as the source key.
dst_key: Option<String>,
}
pub fn string_operation(args: StringArgs) -> Result<()> {
let old_key = &args.src_key;
let new_key = args.dst_key.as_ref().unwrap_or(old_key);
let src_ftl_file = ftl_file_from_key(old_key);
let dst_ftl_file = ftl_file_from_key(new_key);
let mut entries: HashMap<&str, Entry<String>> = HashMap::new();
// Fetch source strings
let src_langs = all_langs(&args.src_lang_folder)?;
for lang in &src_langs {
let ftl_path = lang.join(&src_ftl_file);
if !ftl_path.exists() {
continue;
}
let entry = get_entry(&ftl_path, old_key);
if let Some(entry) = entry {
entries.insert(lang.file_name().unwrap(), entry);
} else {
// the key might be missing from some languages, but it should not be missing
// from the template
assert_ne!(lang, "templates");
}
}
// Apply to destination
let dst_langs = all_langs(&args.dst_lang_folder)?;
for lang in &dst_langs {
let ftl_path = lang.join(&dst_ftl_file);
if !ftl_path.exists() {
continue;
}
if let Some(entry) = entries.get(lang.file_name().unwrap()) {
println!("Updating {ftl_path}");
write_entry(&ftl_path, new_key, entry.clone())?;
}
}
if let Some(template_dir) = additional_template_folder(&args.dst_lang_folder) {
// Our templates are also stored in the source tree, and need to be updated too.
let ftl_path = template_dir.join(&dst_ftl_file);
println!("Updating {ftl_path}");
write_entry(
&ftl_path,
new_key,
entries.get("templates").unwrap().clone(),
)?;
}
if args.operation == StringOperation::Move {
// Delete the old key
for lang in &src_langs {
let ftl_path = lang.join(&src_ftl_file);
if !ftl_path.exists() {
continue;
}
if delete_entry(&ftl_path, old_key)? {
println!("Deleted entry from {ftl_path}");
}
}
if let Some(template_dir) = additional_template_folder(&args.src_lang_folder) {
let ftl_path = template_dir.join(&src_ftl_file);
if delete_entry(&ftl_path, old_key)? {
println!("Deleted entry from {ftl_path}");
}
}
}
Ok(())
}
fn additional_template_folder(dst_folder: &Utf8Path) -> Option<Utf8PathBuf> {
// ftl/core-repo/core -> ftl/core
// ftl/qt-repo/qt -> ftl/qt
let adjusted_path = Utf8PathBuf::from_iter(
[Utf8Component::Normal("ftl")]
.into_iter()
.chain(dst_folder.components().skip(2)),
);
if adjusted_path.exists() {
Some(adjusted_path)
} else {
None
}
}
fn all_langs(lang_folder: &Utf8Path) -> Result<Vec<Utf8PathBuf>> {
std::fs::read_dir(lang_folder)
.with_context(|| format!("reading {:?}", lang_folder))?
.filter_map(Result::ok)
.map(|e| Ok(e.path().utf8()?))
.collect()
}
fn ftl_file_from_key(old_key: &str) -> String {
format!("{}.ftl", old_key.split('-').next().unwrap())
}
fn get_entry(fname: &Utf8Path, key: &str) -> Option<Entry<String>> {
let content = fs::read_to_string(fname).unwrap();
let resource = parser::parse(content).unwrap();
for entry in resource.body {
if let Entry::Message(message) = entry {
if message.id.name == key {
return Some(Entry::Message(message));
}
}
}
None
}
fn write_entry(path: &Utf8Path, key: &str, mut entry: Entry<String>) -> Result<()> {
if let Entry::Message(message) = &mut entry {
message.id.name = key.to_string();
}
let content = if Path::new(path).exists() {
fs::read_to_string(path).unwrap()
} else {
String::new()
};
let mut resource = parser::parse(content).unwrap();
resource.body.push(entry);
let mut modified = serialize::serialize(&resource);
// escape leading dots
modified = modified.replace(" +.", " +{\".\"}");
// ensure the resulting serialized file is valid by parsing again
let _ = parser::parse(modified.clone()).unwrap();
// it's ok, write it out
Ok(write_file(path, modified)?)
}
fn delete_entry(path: &Utf8Path, key: &str) -> Result<bool> {
let content = read_to_string(path)?;
let mut resource = parser::parse(content).unwrap();
let mut did_change = false;
resource.body.retain(|entry| {
!if let Entry::Message(message) = entry {
if message.id.name == key {
did_change = true;
true
} else {
false
}
} else {
false
}
});
let mut modified = serialize::serialize(&resource);
// escape leading dots
modified = modified.replace(" +.", " +{\".\"}");
// ensure the resulting serialized file is valid by parsing again
let _ = parser::parse(modified.clone()).unwrap();
// it's ok, write it out
write_file_if_changed(path, modified)?;
Ok(did_change)
}

103
ftl/src/string/copy.rs Normal file
View File

@ -0,0 +1,103 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::assert_ne;
use std::collections::HashMap;
use std::println;
use camino::Utf8PathBuf;
use clap::Args;
use fluent_syntax::ast::Entry;
use crate::string;
#[derive(Args)]
pub struct CopyOrMoveArgs {
/// The folder which contains the different languages as subfolders, e.g.
/// ftl/core-repo/core
src_lang_folder: Utf8PathBuf,
dst_lang_folder: Utf8PathBuf,
/// E.g. 'actions-run'. File will be inferred from the prefix.
src_key: String,
/// If not specified, the key & file will be the same as the source key.
dst_key: Option<String>,
}
#[derive(Debug, Eq, PartialEq)]
pub(super) enum CopyOrMove {
Copy,
Move,
}
pub(super) fn copy_or_move(mode: CopyOrMove, args: CopyOrMoveArgs) -> anyhow::Result<()> {
let old_key = &args.src_key;
let new_key = args.dst_key.as_ref().unwrap_or(old_key);
let src_ftl_file = string::ftl_file_from_key(old_key);
let dst_ftl_file = string::ftl_file_from_key(new_key);
let mut entries: HashMap<&str, Entry<String>> = HashMap::new();
// Fetch source strings
let src_langs = string::all_langs(&args.src_lang_folder)?;
for lang in &src_langs {
let ftl_path = lang.join(&src_ftl_file);
if !ftl_path.exists() {
continue;
}
let entry = string::get_entry(&ftl_path, old_key);
if let Some(entry) = entry {
entries.insert(lang.file_name().unwrap(), entry);
} else {
// the key might be missing from some languages, but it should not be missing
// from the template
assert_ne!(lang, "templates");
}
}
// Apply to destination
let dst_langs = string::all_langs(&args.dst_lang_folder)?;
for lang in &dst_langs {
let ftl_path = lang.join(&dst_ftl_file);
if !ftl_path.exists() {
continue;
}
if let Some(entry) = entries.get(lang.file_name().unwrap()) {
println!("Updating {ftl_path}");
string::write_entry(&ftl_path, new_key, entry.clone())?;
}
}
if let Some(template_dir) = string::additional_template_folder(&args.dst_lang_folder) {
// Our templates are also stored in the source tree, and need to be updated too.
let ftl_path = template_dir.join(&dst_ftl_file);
println!("Updating {ftl_path}");
string::write_entry(
&ftl_path,
new_key,
entries.get("templates").unwrap().clone(),
)?;
}
if mode == CopyOrMove::Move {
// Delete the old key
for lang in &src_langs {
let ftl_path = lang.join(&src_ftl_file);
if !ftl_path.exists() {
continue;
}
if string::delete_entry(&ftl_path, old_key)? {
println!("Deleted entry from {ftl_path}");
}
}
if let Some(template_dir) = string::additional_template_folder(&args.src_lang_folder) {
let ftl_path = template_dir.join(&src_ftl_file);
if string::delete_entry(&ftl_path, old_key)? {
println!("Deleted entry from {ftl_path}");
}
}
}
Ok(())
}

147
ftl/src/string/mod.rs Normal file
View File

@ -0,0 +1,147 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
mod copy;
mod transform;
use std::fs;
use std::path::Path;
use anki_io::read_to_string;
use anki_io::write_file_if_changed;
use anki_io::ToUtf8PathBuf;
use anyhow::anyhow;
use anyhow::Context;
use anyhow::Result;
use camino::Utf8Component;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use clap::Subcommand;
use copy::CopyOrMoveArgs;
use fluent_syntax::ast::Entry;
use fluent_syntax::ast::Resource;
use fluent_syntax::parser;
use itertools::Itertools;
use crate::serialize;
use crate::string::copy::copy_or_move;
use crate::string::copy::CopyOrMove;
use crate::string::transform::transform;
use crate::string::transform::TransformArgs;
#[derive(Subcommand)]
pub enum StringCommand {
/// Copy a key from one ftl file to another, including all its
/// translations. Source and destination should be e.g.
/// ftl/core-repo/core.
Copy(CopyOrMoveArgs),
/// Move a key from one ftl file to another, including all its
/// translations. Source and destination should be e.g.
/// ftl/core-repo/core.
Move(CopyOrMoveArgs),
/// Apply a regex find&replace to the template and translations.
Transform(TransformArgs),
}
pub fn string_operation(args: StringCommand) -> anyhow::Result<()> {
match args {
StringCommand::Copy(args) => copy_or_move(CopyOrMove::Copy, args),
StringCommand::Move(args) => copy_or_move(CopyOrMove::Move, args),
StringCommand::Transform(args) => transform(args),
}
}
fn additional_template_folder(dst_folder: &Utf8Path) -> Option<Utf8PathBuf> {
// ftl/core-repo/core -> ftl/core
// ftl/qt-repo/qt -> ftl/qt
let adjusted_path = Utf8PathBuf::from_iter(
[Utf8Component::Normal("ftl")]
.into_iter()
.chain(dst_folder.components().skip(2)),
);
if adjusted_path.exists() {
Some(adjusted_path)
} else {
None
}
}
fn all_langs(lang_folder: &Utf8Path) -> Result<Vec<Utf8PathBuf>> {
std::fs::read_dir(lang_folder)
.with_context(|| format!("reading {:?}", lang_folder))?
.filter_map(Result::ok)
.map(|e| Ok(e.path().utf8()?))
.collect()
}
fn ftl_file_from_key(old_key: &str) -> String {
format!("{}.ftl", old_key.split('-').next().unwrap())
}
fn parse_file(ftl_path: &Utf8Path) -> Result<Resource<String>> {
let content = read_to_string(ftl_path).unwrap();
parser::parse(content).map_err(|(_, errs)| {
anyhow!(
"while reading {ftl_path}: {}",
errs.into_iter().map(|err| err.to_string()).join(", ")
)
})
}
/// True if changed.
fn serialize_file(path: &Utf8Path, resource: &Resource<String>) -> Result<bool> {
let mut text = serialize::serialize(resource);
// escape leading dots
text = text.replace(" +.", " +{\".\"}");
// ensure the resulting serialized file is valid by parsing again
let _ = parser::parse(text.clone()).unwrap();
// it's ok, write it out
Ok(write_file_if_changed(path, text)?)
}
fn get_entry(fname: &Utf8Path, key: &str) -> Option<Entry<String>> {
let resource = parse_file(fname).unwrap();
for entry in resource.body {
if let Entry::Message(message) = entry {
if message.id.name == key {
return Some(Entry::Message(message));
}
}
}
None
}
fn write_entry(path: &Utf8Path, key: &str, mut entry: Entry<String>) -> Result<()> {
if let Entry::Message(message) = &mut entry {
message.id.name = key.to_string();
}
let content = if Path::new(path).exists() {
fs::read_to_string(path).unwrap()
} else {
String::new()
};
let mut resource = parser::parse(content).unwrap();
resource.body.push(entry);
serialize_file(path, &resource)?;
Ok(())
}
fn delete_entry(path: &Utf8Path, key: &str) -> Result<bool> {
let mut resource = parse_file(path)?;
let mut did_change = false;
resource.body.retain(|entry| {
!if let Entry::Message(message) = entry {
if message.id.name == key {
did_change = true;
true
} else {
false
}
} else {
false
}
});
serialize_file(path, &resource)
}

234
ftl/src/string/transform.rs Normal file
View File

@ -0,0 +1,234 @@
// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
use std::borrow::Cow;
use anki_io::paths_in_dir;
use anyhow::Result;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use clap::Args;
use clap::ValueEnum;
use fluent_syntax::ast::Entry;
use fluent_syntax::ast::Expression;
use fluent_syntax::ast::InlineExpression;
use fluent_syntax::ast::Message;
use fluent_syntax::ast::Pattern;
use fluent_syntax::ast::PatternElement;
use fluent_syntax::ast::Resource;
use regex::Regex;
use crate::string::parse_file;
use crate::string::serialize_file;
#[derive(Args)]
pub struct TransformArgs {
/// The folder which contains the different languages as subfolders, e.g.
/// ftl/core-repo/core
lang_folder: Utf8PathBuf,
// What should be replaced.
target: TransformTarget,
regex: String,
replacement: String,
// limit replacement to a single key
// #[clap(long)]
// key: Option<String>,
}
#[derive(ValueEnum, Clone, PartialEq, Eq)]
pub enum TransformTarget {
Text,
Variable,
}
pub fn transform(args: TransformArgs) -> Result<()> {
let regex = Regex::new(&args.regex)?;
for lang in super::all_langs(&args.lang_folder)? {
for ftl in paths_in_dir(&lang)? {
transform_ftl(&ftl, &regex, &args)?;
}
}
if let Some(template_dir) = super::additional_template_folder(&args.lang_folder) {
// Our templates are also stored in the source tree, and need to be updated too.
for ftl in paths_in_dir(&template_dir)? {
transform_ftl(&ftl, &regex, &args)?;
}
}
Ok(())
}
fn transform_ftl(ftl: &Utf8Path, regex: &Regex, args: &TransformArgs) -> Result<()> {
let mut resource = parse_file(ftl)?;
if transform_ftl_inner(&mut resource, regex, args) {
println!("Updating {ftl}");
serialize_file(ftl, &resource)?;
}
Ok(())
}
fn transform_ftl_inner(
resource: &mut Resource<String>,
regex: &Regex,
args: &TransformArgs,
) -> bool {
let mut changed = false;
for entry in &mut resource.body {
if let Entry::Message(Message {
value: Some(value), ..
}) = entry
{
changed |= transform_pattern(value, regex, args);
}
}
changed
}
/// True if changed.
fn transform_pattern(pattern: &mut Pattern<String>, regex: &Regex, args: &TransformArgs) -> bool {
let mut changed = false;
for element in &mut pattern.elements {
match args.target {
TransformTarget::Text => {
changed |= transform_text(element, regex, args);
}
TransformTarget::Variable => {
changed |= transform_variable(element, regex, args);
}
}
}
changed
}
fn transform_variable(
pattern: &mut PatternElement<String>,
regex: &Regex,
args: &TransformArgs,
) -> bool {
let mut changed = false;
let mut maybe_update = |val: &mut String| {
if let Cow::Owned(new_val) = regex.replace_all(val, &args.replacement) {
changed = true;
*val = new_val;
}
};
if let PatternElement::Placeable { expression } = pattern {
match expression {
Expression::Select { selector, variants } => {
if let InlineExpression::VariableReference { id } = selector {
maybe_update(&mut id.name)
}
for variant in variants {
changed |= transform_pattern(&mut variant.value, regex, args);
}
}
Expression::Inline(expression) => {
if let InlineExpression::VariableReference { id } = expression {
maybe_update(&mut id.name)
}
}
}
}
changed
}
fn transform_text(
pattern: &mut PatternElement<String>,
regex: &Regex,
args: &TransformArgs,
) -> bool {
let mut changed = false;
let mut maybe_update = |val: &mut String| {
if let Cow::Owned(new_val) = regex.replace_all(val, &args.replacement) {
changed = true;
*val = new_val;
}
};
match pattern {
PatternElement::TextElement { value } => {
maybe_update(value);
}
PatternElement::Placeable { expression } => match expression {
Expression::Inline(val) => match val {
InlineExpression::StringLiteral { value } => maybe_update(value),
InlineExpression::NumberLiteral { value } => maybe_update(value),
InlineExpression::FunctionReference { .. } => {}
InlineExpression::MessageReference { .. } => {}
InlineExpression::TermReference { .. } => {}
InlineExpression::VariableReference { .. } => {}
InlineExpression::Placeable { .. } => {}
},
Expression::Select { variants, .. } => {
for variant in variants {
changed |= transform_pattern(&mut variant.value, regex, args);
}
}
},
}
changed
}
#[cfg(test)]
mod tests {
use fluent_syntax::parser::parse;
use super::*;
use crate::serialize::serialize;
#[test]
fn transform() -> Result<()> {
let mut resource = parse(
r#"sample-1 = This is a sample
sample-2 =
{ $sample ->
[one] { $sample } sample done
*[other] { $sample } samples done
}"#
.to_string(),
)
.unwrap();
let mut args = TransformArgs {
lang_folder: Default::default(),
target: TransformTarget::Text,
regex: "".to_string(),
replacement: "replaced".to_string(),
};
// no changes
assert!(!transform_ftl_inner(
&mut resource,
&Regex::new("aoeu").unwrap(),
&args
));
// text change
let regex = Regex::new("sample").unwrap();
let mut resource2 = resource.clone();
assert!(transform_ftl_inner(&mut resource2, &regex, &args));
assert_eq!(
&serialize(&resource2),
r#"sample-1 = This is a replaced
sample-2 =
{ $sample ->
[one] { $sample } replaced done
*[other] { $sample } replaceds done
}
"#
);
// variable change
let mut resource2 = resource.clone();
args.target = TransformTarget::Variable;
assert!(transform_ftl_inner(&mut resource2, &regex, &args));
assert_eq!(
&serialize(&resource2),
r#"sample-1 = This is a sample
sample-2 =
{ $replaced ->
[one] { $replaced } sample done
*[other] { $replaced } samples done
}
"#
);
Ok(())
}
}

View File

@ -1,52 +0,0 @@
#!/usr/bin/env python3
# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
"""
Tool to apply transform to an ftl string and its translations.
"""
import glob
import os
from fluent.syntax import parse, serialize
from fluent.syntax.ast import Junk, Message, TextElement
template_root = ".."
template_files = glob.glob(
os.path.join(template_root, "ftl", "*", "*.ftl"), recursive=True
)
translation_root = os.path.join(template_root, "..", "anki-i18n")
translation_files = glob.glob(
os.path.join(translation_root, "*", "*", "*", "*.ftl"), recursive=True
)
target_repls = [
["media-recordingtime", "%0.1f", "{ $secs }"],
]
def transform_string_in_file(path):
obj = parse(open(path, encoding="utf8").read(), with_spans=False)
changed = False
for ent in obj.body:
if isinstance(ent, Junk):
raise Exception(f"file had junk! {path} {ent}")
if isinstance(ent, Message):
key = ent.id.name
for target_key, src, dst in target_repls:
if key == target_key:
for elem in ent.value.elements:
if isinstance(elem, TextElement):
newval = elem.value.replace(src, dst)
if newval != elem.value:
elem.value = newval
changed = True
if changed:
open(path, "w", encoding="utf8").write(serialize(obj))
print("updated", path)
for path in template_files + translation_files:
transform_string_in_file(path)

View File

@ -188,6 +188,20 @@ pub fn read_dir_files(path: impl AsRef<Path>) -> Result<ReadDirFiles> {
})
}
/// A shortcut for gathering the utf8 paths in a folder into a vec. Will
/// abort if any dir entry is unreadable. Does not gather files from subfolders.
pub fn paths_in_dir(path: impl AsRef<Path>) -> Result<Vec<Utf8PathBuf>> {
read_dir_files(path.as_ref())?
.map(|entry| {
let entry = entry.context(FileIoSnafu {
path: path.as_ref(),
op: FileOp::Read,
})?;
entry.path().utf8()
})
.collect()
}
/// True if name does not contain any path separators.
pub fn filename_is_safe(name: &str) -> bool {
let mut components = Path::new(name).components();