pt-BR thesaurus: remove idx file from source.
idx fles are created in build. Change-Id: I32cef18b22aad876d3caf4f3d00e776c5aa1e195 Reviewed-on: https://gerrit.libreoffice.org/c/dictionaries/+/129963 Reviewed-by: Rafael Lima <rafael.palma.lima@gmail.com> Reviewed-by: Olivier Hallot <olivier.hallot@libreoffice.org> Tested-by: Olivier Hallot <olivier.hallot@libreoffice.org>
This commit is contained in:
parent
96ecbe7fb1
commit
86f503136b
|
@ -13,8 +13,6 @@ $(eval $(call gb_Dictionary_add_root_files,dict-pt-BR,\
|
|||
dictionaries/pt_BR/hyph_pt_BR.dic \
|
||||
dictionaries/pt_BR/pt_BR.aff \
|
||||
dictionaries/pt_BR/pt_BR.dic \
|
||||
dictionaries/pt_BR/th_pt_BR.dat \
|
||||
dictionaries/pt_BR/th_pt_BR.idx \
|
||||
dictionaries/pt_BR/Lightproof.components \
|
||||
dictionaries/pt_BR/Lightproof.py \
|
||||
dictionaries/pt_BR/Linguistic.xcu \
|
||||
|
@ -58,5 +56,9 @@ $(eval $(call gb_Dictionary_add_propertyfiles,dict-pt-BR,dialog,\
|
|||
dictionaries/pt_BR/dialog/pt_BR_en_US.properties \
|
||||
))
|
||||
|
||||
$(eval $(call gb_Dictionary_add_thesauri,dict-pt-BR,\
|
||||
dictionaries/pt_BR/th_pt_BR.dat \
|
||||
))
|
||||
|
||||
|
||||
# vim: set noet sw=4 ts=4:
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# perl program to take a thesaurus structured text data file
|
||||
# and create the proper sorted index file (.idx)
|
||||
#
|
||||
# typcially invoked as follows:
|
||||
# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
|
||||
#
|
||||
|
||||
sub by_entry {
|
||||
my ($aent, $aoff) = split('\|',$a);
|
||||
my ($bent, $boff) = split('\|',$b);
|
||||
$aent cmp $bent;
|
||||
}
|
||||
|
||||
# main routine
|
||||
my $ne = 0; # number of entries in index
|
||||
my @tindex=(); # the index itself
|
||||
my $foffset = 0; # file position offset into thesaurus
|
||||
my $rec=""; # current string and related pieces
|
||||
my $rl=0; # misc string length
|
||||
my $entry=""; # current word being processed
|
||||
my $nm=0; # number of meaning for the current word
|
||||
my $meaning=""; # current meaning and synonyms
|
||||
my $p; # misc uses
|
||||
my $encoding; # encoding used by text file
|
||||
|
||||
# top line of thesaurus provides encoding
|
||||
$encoding=<STDIN>;
|
||||
$foffset = $foffset + length($encoding);
|
||||
chomp($encoding);
|
||||
|
||||
# read thesaurus line by line
|
||||
# first line of every block is an entry and meaning count
|
||||
while ($rec=<STDIN>){
|
||||
$rl = length($rec);
|
||||
chomp($rec);
|
||||
($entry, $nm) = split('\|',$rec);
|
||||
$p = 0;
|
||||
while ($p < $nm) {
|
||||
$meaning=<STDIN>;
|
||||
$rl = $rl + length($meaning);
|
||||
chomp($meaning);
|
||||
$p++;
|
||||
}
|
||||
push(@tindex,"$entry|$foffset");
|
||||
$ne++;
|
||||
$foffset = $foffset + $rl;
|
||||
}
|
||||
|
||||
# now we have all of the information
|
||||
# so sort it and then output the encoding, count and index data
|
||||
@tindex = sort by_entry @tindex;
|
||||
print STDOUT "$encoding\n";
|
||||
print STDOUT "$ne\n";
|
||||
foreach $one (@tindex) {
|
||||
print STDOUT "$one\n";
|
||||
}
|
||||
|
26480
pt_BR/th_pt_BR.idx
26480
pt_BR/th_pt_BR.idx
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue