academic/cld2: Added (Compact Language Detection)

Signed-off-by: Robby Workman <rworkman@slackbuilds.org>
This commit is contained in:
Benjamin Trigona-Harany 2013-10-27 01:16:04 -05:00 committed by Robby Workman
parent 12cd740b63
commit d4f5065b3f
4 changed files with 162 additions and 0 deletions

9
academic/cld2/README Normal file
View File

@ -0,0 +1,9 @@
The Compact Language Detection library can detect the language of UTF8-encoded
text. CLD2 supports over 160 languages and can parse both plain text and HTML.
The Slackware script builds two shared libraries, libcld2 and libcld2_full. The
libcld2 library can recognise the core 83 languages while the libcld2_full
library has support for the entire set of 160+ language tables.
The script also builds a commandline tool, cld2, which is compiled against the
libcld2_full library.

View File

@ -0,0 +1,124 @@
#!/bin/sh
# SlackBuild script for cld2
# Copyright 2013 Benjamin Trigona-Harany <slackbuilds@jaxartes.net>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PRGNAM=cld2
VERSION=${VERSION:-20130728}
BUILD=${BUILD:-1}
TAG=${TAG:-_SBo}
if [ -z "$ARCH" ]; then
case "$( uname -m )" in
i?86) ARCH=i486 ;;
arm*) ARCH=arm ;;
*) ARCH=$( uname -m ) ;;
esac
fi
CWD=$(pwd)
TMP=${TMP:-/tmp/SBo}
PKG=$TMP/package-$PRGNAM
OUTPUT=${OUTPUT:-/tmp}
if [ "$ARCH" = "i486" ]; then
SLKCFLAGS="-O2 -march=i486 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "i686" ]; then
SLKCFLAGS="-O2 -march=i686 -mtune=i686"
LIBDIRSUFFIX=""
elif [ "$ARCH" = "x86_64" ]; then
SLKCFLAGS="-O2 -fPIC"
LIBDIRSUFFIX="64"
else
SLKCFLAGS="-O2"
LIBDIRSUFFIX=""
fi
set -e
rm -rf $PKG
mkdir -p $TMP $PKG $OUTPUT
rm -rf $TMP/$PRGNAM-$VERSION
cd $TMP
tar xvf $CWD/$PRGNAM-$VERSION.tar.bz2
cd $PRGNAM-$VERSION
( cd internal
g++ -shared $SLKCFLAGS \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_4.cc generated_distinct_bi_0.cc \
cld2_generated_quadchrome0715.cc cld2_generated_deltaoctachrome0614.cc \
cld2_generated_distinctoctachrome0604.cc cld_generated_score_quad_octa_1024_256.cc \
-o libcld2.so
g++ -shared $SLKCFLAGS \
cldutil.cc cldutil_shared.cc compact_lang_det.cc compact_lang_det_hint_code.cc \
compact_lang_det_impl.cc debug.cc fixunicodevalue.cc \
generated_entities.cc generated_language.cc generated_ulscript.cc \
getonescriptspan.cc lang_script.cc offsetmap.cc scoreonescriptspan.cc \
tote.cc utf8statetable.cc \
cld_generated_cjk_uni_prop_80.cc cld2_generated_cjk_compatible.cc \
cld_generated_cjk_delta_bi_32.cc generated_distinct_bi_0.cc \
cld2_generated_quad0720.cc cld2_generated_deltaocta0527.cc \
cld2_generated_distinctocta0527.cc cld_generated_score_quad_octa_1024_256.cc \
-o libcld2_full.so
g++ $SLKCFLAGS \
compact_lang_det_test.cc -I. -L. libcld2_full.so \
-o cld2
install -D -m 0755 cld2 $PKG/usr/bin/cld2
install -D -m 0755 libcld2.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2.so
install -D -m 0755 libcld2_full.so $PKG/usr/lib${LIBDIRSUFFIX}/libcld2_full.so
)
# install header files
mkdir -p $PKG/usr/include/cld2/{internal,public}
( cd internal
cp generated_language.h generated_ulscript.h integral_types.h lang_script.h $PKG/usr/include/cld2/internal
)
( cd public
cp compact_lang_det.h encodings.h $PKG/usr/include/cld2/public
)
find $PKG -print0 | xargs -0 file | grep -e "executable" -e "shared object" \
| grep ELF | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
cp -a \
LICENSE \
$PKG/usr/doc/$PRGNAM-$VERSION
cat $CWD/$PRGNAM.SlackBuild > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
mkdir -p $PKG/install
cat $CWD/slack-desc > $PKG/install/slack-desc
cd $PKG
/sbin/makepkg -l y -c n $OUTPUT/$PRGNAM-$VERSION-$ARCH-$BUILD$TAG.${PKGTYPE:-tgz}

10
academic/cld2/cld2.info Normal file
View File

@ -0,0 +1,10 @@
PRGNAM="cld2"
VERSION="20130728"
HOMEPAGE="https://code.google.com/p/cld2/"
DOWNLOAD="http://jaxartes.net/files/cld2-20130728.tar.bz2"
MD5SUM="3ead394982e394bbd42525a7f51f9891"
DOWNLOAD_x86_64=""
MD5SUM_x86_64=""
REQUIRES=""
MAINTAINER="Benjamin Trigona-Harany"
EMAIL="slackbuilds@jaxartes.net"

19
academic/cld2/slack-desc Normal file
View File

@ -0,0 +1,19 @@
# HOW TO EDIT THIS FILE:
# The "handy ruler" below makes it easier to edit a package description.
# Line up the first '|' above the ':' following the base package name, and
# the '|' on the right side marks the last column you can put a character in.
# You must make exactly 11 lines for the formatting to be correct. It's also
# customary to leave one space after the ':' except on otherwise blank lines.
|-----handy-ruler------------------------------------------------------|
cld2: cld2 (Compact Language Detection)
cld2:
cld2: The Compact Language Detection library can detect the language of
cld2: text, even with a very small amount of sample data. CLD2 supports
cld2: over 160 language and works on both plain text and HTML.
cld2:
cld2: Home: https://code.google.com/p/cld2/
cld2:
cld2:
cld2:
cld2: