academic/cld2: Added (Compact Language Detection)
Signed-off-by: Robby Workman <rworkman@slackbuilds.org>
This commit is contained in:
parent
12cd740b63
commit
d4f5065b3f
|
@ -0,0 +1,9 @@
|
|||
The Compact Language Detection library can detect the language of UTF8-encoded
|
||||
text. CLD2 supports over 160 languages and can parse both plain text and HTML.
|
||||
|
||||
The Slackware script builds two shared libraries, libcld2 and libcld2_full. The
|
||||
libcld2 library can recognise the core 83 languages while the libcld2_full
|
||||
library has support for the entire set of 160+ language tables.
|
||||
|
||||
The script also builds a commandline tool, cld2, which is compiled against the
|
||||
libcld2_full library.
|
|
@ -0,0 +1,124 @@
|
|||
#!/bin/sh
|
||||
|
||||
# SlackBuild script for cld2
|
||||
|
||||
# Copyright 2013 Benjamin Trigona-Harany <slackbuilds@jaxartes.net>
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
PRGNAM=cld2
|
||||
VERSION=${VERSION:-20130728}
|
||||
BUILD=${BUILD:-1}
|
||||
TAG=${TAG:-_SBo}
|
||||
|
||||
if [ -z "$ARCH" ]; then
|
||||
case "$( uname -m )" in
|
||||
i?86) ARCH=i486 ;;
|
||||
arm*) ARCH=arm ;;
|
||||
*) ARCH=$( uname -m ) ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
CWD=$(pwd)
|
||||
TMP=${TMP:-/tmp/SBo}
|
||||
PKG=$TMP/package-$PRGNAM
|
||||
OUTPUT=${OUTPUT:-/tmp}
|
||||
|
||||
if [ "$ARCH" = "i486" ]; then
|
||||
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
|
||||
LIBDIRSUFFIX=""
|
||||
elif [ "$ARCH" = "i686" ]; then
|
||||
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
|
||||
LIBDIRSUFFIX=""
|
||||
elif [ "$ARCH" = "x86_64" ]; then
|
||||
SLKCFLAGS="-O2 -fPIC"
|
||||
LIBDIRSUFFIX="64"
|
||||
else
|
||||
SLKCFLAGS="-O2"
|
||||
LIBDIRSUFFIX=""
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
rm -rf $PKG
|
||||
mkdir -p $TMP $PKG $OUTPUT
|
||||
rm -rf $TMP/$PRGNAM-$VERSION
|
||||
cd $TMP
|
||||
tar xvf $CWD/$PRGNAM-$VERSION.tar.bz2
|
||||
cd $PRGNAM-$VERSION
|
||||
|
||||
( cd internal
|
||||
g++ -shared $SLKCFLAGS \
|
||||
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||
tote.cc utf8statetable.cc \
|
||||
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
|
||||
cld2_generated_quadchrome0715.cc cld2_generated_deltaoctachrome0614.cc \
|
||||
cld2_generated_distinctoctachrome0604.cc cld_generated_score_quad_octa_1024_256.cc \
|
||||
-o libcld2.so
|
||||
|
||||
g++ -shared $SLKCFLAGS \
|
||||
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
|
||||
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
|
||||
generated_entities.cc generated_language.cc generated_ulscript.cc \
|
||||
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
|
||||
tote.cc utf8statetable.cc \
|
||||
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
|
||||
cld_generated_cjk_delta_bi_32.cc generated_distinct_bi_0.cc \
|
||||
cld2_generated_quad0720.cc cld2_generated_deltaocta0527.cc \
|
||||
cld2_generated_distinctocta0527.cc cld_generated_score_quad_octa_1024_256.cc \
|
||||
-o libcld2_full.so
|
||||
|
||||
g++ $SLKCFLAGS \
|
||||
compact_lang_det_test.cc -I. -L. libcld2_full.so \
|
||||
-o cld2
|
||||
|
||||
install -D -m 0755 cld2 $PKG/usr/bin/cld2
|
||||
install -D -m 0755 libcld2.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2.so
|
||||
install -D -m 0755 libcld2_full.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2_full.so
|
||||
)
|
||||
|
||||
# install header files
|
||||
mkdir -p $PKG/usr/include/cld2/{internal,public}
|
||||
( cd internal
|
||||
cp generated_language.h generated_ulscript.h integral_types.h lang_script.h $PKG/usr/include/cld2/internal
|
||||
)
|
||||
( cd public
|
||||
cp compact_lang_det.h encodings.h $PKG/usr/include/cld2/public
|
||||
)
|
||||
|
||||
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" \
|
||||
| grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
|
||||
|
||||
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
|
||||
cp -a \
|
||||
LICENSE \
|
||||
$PKG/usr/doc/$PRGNAM-$VERSION
|
||||
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
|
||||
|
||||
mkdir -p $PKG/install
|
||||
cat $CWD/slack-desc > $PKG/install/slack-desc
|
||||
|
||||
cd $PKG
|
||||
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}
|
|
@ -0,0 +1,10 @@
|
|||
PRGNAM="cld2"
|
||||
VERSION="20130728"
|
||||
HOMEPAGE="https://code.google.com/p/cld2/"
|
||||
DOWNLOAD="http://jaxartes.net/files/cld2-20130728.tar.bz2"
|
||||
MD5SUM="3ead394982e394bbd42525a7f51f9891"
|
||||
DOWNLOAD_x86_64=""
|
||||
MD5SUM_x86_64=""
|
||||
REQUIRES=""
|
||||
MAINTAINER="Benjamin Trigona-Harany"
|
||||
EMAIL="slackbuilds@jaxartes.net"
|
|
@ -0,0 +1,19 @@
|
|||
# HOW TO EDIT THIS FILE:
|
||||
# The "handy ruler" below makes it easier to edit a package description.
|
||||
# Line up the first '|' above the ':' following the base package name, and
|
||||
# the '|' on the right side marks the last column you can put a character in.
|
||||
# You must make exactly 11 lines for the formatting to be correct. It's also
|
||||
# customary to leave one space after the ':' except on otherwise blank lines.
|
||||
|
||||
|-----handy-ruler------------------------------------------------------|
|
||||
cld2: cld2 (Compact Language Detection)
|
||||
cld2:
|
||||
cld2: The Compact Language Detection library can detect the language of
|
||||
cld2: text, even with a very small amount of sample data. CLD2 supports
|
||||
cld2: over 160 language and works on both plain text and HTML.
|
||||
cld2:
|
||||
cld2: Home: https://code.google.com/p/cld2/
|
||||
cld2:
|
||||
cld2:
|
||||
cld2:
|
||||
cld2:
|
Loading…
Reference in New Issue