[ucto] 01/01: Imported Upstream version 0.7.0
Joost van Baal
joostvb at moszumanska.debian.org
Sun Jul 5 04:54:20 UTC 2015
This is an automated email from the git hooks/post-receive script.
joostvb pushed a commit to annotated tag upstream/0.7.0
in repository ucto.
commit a70a7fb8f5cdc55c1d8c5a72d906826d3000862d
Author: Joost van Baal-Ilić <joostvb at nusku.mdcc.cx>
Date: Sun Jul 5 06:53:57 2015 +0200
Imported Upstream version 0.7.0
---
ChangeLog | 139 +++++++++++++++++++++
configure | 326 ++++++++++++++++++++++++++----------------------
configure.ac | 8 +-
include/ucto/tokenize.h | 26 +++-
m4/libtool.m4 | 314 +++++++++++++++++++++++++++++++++-------------
m4/ltoptions.m4 | 19 ++-
src/tokenize.cxx | 156 ++++++++++++++++-------
src/ucto.cxx | 8 +-
8 files changed, 703 insertions(+), 293 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 509f56c..4a11737 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,142 @@
+2014-11-26 16:17 sloot
+
+ * [r17871] configure.ac, src/tokenize.cxx, tests/testfolia.ok,
+ tests/testfolia2.ok: made ucto Part aware.
+ 2 tests fail atm
+
+2014-11-21 17:04 mvgompel
+
+ * [r17865] include/ucto/tokenize.h, src/tokenize.cxx,
+ tests/partest.nl.tok, tests/partest.nl.txt,
+ tests/partest2.nl.tok, tests/partest2.nl.txt,
+ tests/partest2_folia.nl.txt, tests/partest2_folia.nl.xml,
+ tests/partest_folia.nl.txt, tests/partest_folia.nl.xml,
+ tests/test.py: Re-added memory saving code: tokens of entire
+ input not retained in memory prior to output + added extra tests
+ + updated test.py framework to deal with FoLiA tests
+
+2014-11-21 11:52 mvgompel
+
+ * [r17857] .travis.yml: yet another fix for travis
+
+2014-11-21 10:23 mvgompel
+
+ * [r17856] .travis.yml: (travis-ci) show some logs after failure
+
+2014-11-21 10:05 mvgompel
+
+ * [r17854] tests/testall: fix
+
+2014-11-21 09:49 mvgompel
+
+ * [r17852] tests/testall, tests/testone: tests return proper return
+ codes now
+
+2014-11-21 09:38 mvgompel
+
+ * [r17851] .travis.yml: travis-ci: forgot libfolia as dependency
+
+2014-11-21 09:31 mvgompel
+
+ * [r17850] .travis.yml: fix for travis
+
+2014-11-20 20:23 mvgompel
+
+ * [r17846] .travis.yml: travis update
+
+2014-11-20 20:08 mvgompel
+
+ * [r17845] .travis.yml: update for travis, output tests to stderr
+ instead of stdout
+
+2014-11-20 19:55 mvgompel
+
+ * [r17844] tests/test.de.tok.V, tests/test.de.txt: fixed german
+ test
+
+2014-11-20 17:54 mvgompel
+
+ * [r17843] .travis.yml: another update for travis-ci
+
+2014-11-20 17:20 mvgompel
+
+ * [r17842] .travis.yml: fix for travis-ci build
+
+2014-11-20 17:07 mvgompel
+
+ * [r17841] .travis.yml: added travis-ci
+
+2014-11-20 16:12 sloot
+
+ * [r17840] src/tokenize.cxx: shut up!
+
+2014-11-20 16:04 sloot
+
+ * [r17839] include/ucto/tokenize.h, src/tokenize.cxx, src/ucto.cxx:
+ added experimental punctuation filter.
+ Filters most punctuation. but NOT when rules like ABBREVIATION
+ SUFFIX etc are applicable.
+
+2014-11-20 16:00 sloot
+
+ * [r17838] tests/testall, tests/testpunctfilter,
+ tests/testpunctfilter.ok: added a test for the experimantal
+ punctuation filter
+
+2014-11-20 14:03 sloot
+
+ * [r17837] tests/quotetest_folgert8.nl.txt,
+ tests/quotetest_folgert8.nl.txt.disabled: disable this one too
+
+2014-11-20 14:01 sloot
+
+ * [r17836] tests/munten.nl.txt, tests/munten.nl.txt.disabled,
+ tests/units.nl.txt, tests/units.nl.txt.disabled: disable 2 tests
+
+2014-11-20 13:56 sloot
+
+ * [r17835] include/ucto/tokenize.h, src/tokenize.cxx: Reverted the
+ pevious 2 patches.
+ The fist was wrong. The second didn't fix it.
+
+2014-11-20 13:39 sloot
+
+ * [r17834] tests/testpunctuation.ok: accept suboptimal solution for
+ now
+
+2014-11-20 13:03 sloot
+
+ * [r17833] tests/testfoliain.ok, tests/testslash.ok: more libfolia
+ bumps
+
+2014-11-20 13:01 sloot
+
+ * [r17832] tests/testfolia.ok, tests/testfolia2.ok: bumped libfolia
+ version
+
+2014-10-23 20:45 mvgompel
+
+ * [r17756] include/ucto/tokenize.h, src/tokenize.cxx: attempted fix
+ for FoLiA output
+
+2014-10-23 20:22 mvgompel
+
+ * [r17755] include/ucto/tokenize.h, src/tokenize.cxx: Reducing the
+ unnecessarily high memory usage, especially when no FoLiA output
+ is requested
+
+2014-10-20 09:21 mvgompel
+
+ * [r17746] configure.ac: macro fix
+
+2014-10-19 11:49 mvgompel
+
+ * [r17737] configure.ac: added compiler fallback
+
+2014-09-23 08:27 sloot
+
+ * [r17695] NEWS, configure.ac, src/tokenize.cxx: bumping
+
2014-09-22 10:16 sloot
* [r17687] src/ucto.cxx: typo
diff --git a/configure b/configure
index bd55878..b93a2e9 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ucto 0.6.0.
+# Generated by GNU Autoconf 2.69 for ucto 0.7.0.
#
# Report bugs to <timbl at uvt.nl>.
#
@@ -589,8 +589,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ucto'
PACKAGE_TARNAME='ucto'
-PACKAGE_VERSION='0.6.0'
-PACKAGE_STRING='ucto 0.6.0'
+PACKAGE_VERSION='0.7.0'
+PACKAGE_STRING='ucto 0.7.0'
PACKAGE_BUGREPORT='timbl at uvt.nl'
PACKAGE_URL=''
@@ -1350,7 +1350,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ucto 0.6.0 to adapt to many kinds of systems.
+\`configure' configures ucto 0.7.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1420,7 +1420,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ucto 0.6.0:";;
+ short | recursive ) echo "Configuration of ucto 0.7.0:";;
esac
cat <<\_ACEOF
@@ -1443,7 +1443,7 @@ Optional Features:
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
- --with-pic try to use only PIC/non-PIC objects [default=use
+ --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-sysroot=DIR Search for dependent libraries within DIR
@@ -1549,7 +1549,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ucto configure 0.6.0
+ucto configure 0.7.0
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2078,7 +2078,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ucto $as_me 0.6.0, which was
+It was created by ucto $as_me 0.7.0, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2941,7 +2941,7 @@ fi
# Define the identity of the package.
PACKAGE='ucto'
- VERSION='0.6.0'
+ VERSION='0.7.0'
cat >>confdefs.h <<_ACEOF
@@ -3059,7 +3059,7 @@ if test -z "$CXX"; then
CXX=$CCC
else
if test -n "$ac_tool_prefix"; then
- for ac_prog in g++
+ for ac_prog in g++ c++
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
@@ -3103,7 +3103,7 @@ fi
fi
if test -z "$CXX"; then
ac_ct_CXX=$CXX
- for ac_prog in g++
+ for ac_prog in g++ c++
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
@@ -5333,6 +5333,11 @@ else
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -5359,7 +5364,8 @@ else
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -5372,7 +5378,7 @@ else
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -5760,10 +5766,6 @@ freebsd* | dragonfly*)
fi
;;
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
haiku*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -5801,8 +5803,8 @@ irix5* | irix6* | nonstopux*)
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -6442,13 +6444,13 @@ old_postuninstall_cmds=
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -6595,6 +6597,7 @@ for ac_symprfx in "" "_"; do
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK '"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -6883,7 +6886,7 @@ ia64-*-hpux*)
rm -rf conftest*
;;
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
@@ -6899,9 +6902,19 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
;;
- ppc64-*linux*|powerpc64-*linux*)
+ powerpc64le-*)
+ LD="${LD-ld} -m elf32lppclinux"
+ ;;
+ powerpc64-*)
LD="${LD-ld} -m elf32ppclinux"
;;
s390x-*linux*)
@@ -6920,7 +6933,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
x86_64-*linux*)
LD="${LD-ld} -m elf_x86_64"
;;
- ppc*-*linux*|powerpc*-*linux*)
+ powerpcle-*)
+ LD="${LD-ld} -m elf64lppc"
+ ;;
+ powerpc-*)
LD="${LD-ld} -m elf64ppc"
;;
s390*-*linux*|s390*-*tpf*)
@@ -6983,7 +6999,7 @@ $as_echo "$lt_cv_cc_needs_belf" >&6; }
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -6994,7 +7010,20 @@ sparc*-*solaris*)
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -7634,7 +7663,13 @@ else
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&5
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&5
@@ -7645,6 +7680,7 @@ else
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
if ${lt_cv_ld_exported_symbols_list+:} false; then :
@@ -7677,6 +7713,7 @@ rm -f core conftest.err conftest.$ac_objext \
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
$as_echo_n "checking for -force_load linker flag... " >&6; }
if ${lt_cv_ld_force_load+:} false; then :
@@ -7698,7 +7735,9 @@ _LT_EOF
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&5
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&5
@@ -8114,7 +8153,22 @@ fi
# Check whether --with-pic was given.
if test "${with_pic+set}" = set; then :
- withval=$with_pic; pic_mode="$withval"
+ withval=$with_pic; lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac
else
pic_mode=default
fi
@@ -8192,6 +8246,10 @@ LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+
+
+
test -z "$LN_S" && LN_S="ln -s"
@@ -8651,7 +8709,9 @@ lt_prog_compiler_static=
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
lt_prog_compiler_wl='-Xlinker '
- lt_prog_compiler_pic='-Xcompiler -fPIC'
+ if test -n "$lt_prog_compiler_pic"; then
+ lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+ fi
;;
esac
else
@@ -8695,7 +8755,7 @@ lt_prog_compiler_static=
lt_prog_compiler_static='-non_shared'
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
# old Intel for x86_64 which still supported -KPIC.
ecc*)
@@ -8742,18 +8802,33 @@ lt_prog_compiler_static=
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ lt_prog_compiler_pic='-KPIC'
+ lt_prog_compiler_static='-Bstatic'
+ lt_prog_compiler_wl='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl='-Wl,'
;;
+ *Intel*\ [CF]*Compiler*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fPIC'
+ lt_prog_compiler_static='-static'
+ ;;
+ *Portland\ Group*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fpic'
+ lt_prog_compiler_static='-Bstatic'
+ ;;
esac
;;
esac
@@ -9115,7 +9190,6 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
hardcode_direct=no
hardcode_direct_absolute=no
hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld=
hardcode_libdir_separator=
hardcode_minus_L=no
hardcode_shlibpath_var=unsupported
@@ -9368,8 +9442,7 @@ _LT_EOF
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
- hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld='-rpath $libdir'
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
@@ -9749,6 +9822,7 @@ fi
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, )='true'
enable_shared_with_static_runtimes=yes
+ exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
old_postinstall_cmds='chmod 644 $oldlib'
@@ -9794,6 +9868,7 @@ fi
hardcode_shlibpath_var=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec=''
fi
@@ -9822,10 +9897,6 @@ fi
hardcode_shlibpath_var=no
;;
- freebsd1*)
- ld_shlibs=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -9838,7 +9909,7 @@ fi
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
hardcode_direct=yes
hardcode_minus_L=yes
@@ -9877,7 +9948,6 @@ fi
fi
if test "$with_gnu_ld" = no; then
hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_flag_spec_ld='+b $libdir'
hardcode_libdir_separator=:
hardcode_direct=yes
hardcode_direct_absolute=yes
@@ -10501,11 +10571,6 @@ esac
-
-
-
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -10595,7 +10660,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -10604,7 +10669,7 @@ aix3*)
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -10669,7 +10734,7 @@ beos*)
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -10808,7 +10873,7 @@ darwin* | rhapsody*)
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -10816,10 +10881,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -10827,7 +10888,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -10845,7 +10906,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -10864,19 +10925,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -10937,7 +10987,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -10953,7 +11003,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -10990,9 +11040,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11086,7 +11136,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -11155,7 +11205,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11180,7 +11230,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -11204,7 +11254,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -11235,7 +11285,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11245,7 +11295,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -12170,7 +12220,6 @@ export_dynamic_flag_spec_CXX=
hardcode_direct_CXX=no
hardcode_direct_absolute_CXX=no
hardcode_libdir_flag_spec_CXX=
-hardcode_libdir_flag_spec_ld_CXX=
hardcode_libdir_separator_CXX=
hardcode_minus_L_CXX=no
hardcode_shlibpath_var_CXX=unsupported
@@ -12754,6 +12803,7 @@ fi
hardcode_shlibpath_var_CXX=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec_CXX=''
fi
@@ -12798,7 +12848,7 @@ fi
esac
;;
- freebsd[12]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
ld_shlibs_CXX=no
@@ -12814,9 +12864,6 @@ fi
ld_shlibs_CXX=yes
;;
- gnu*)
- ;;
-
haiku*)
archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
link_all_deplibs_CXX=yes
@@ -12978,7 +13025,7 @@ fi
inherit_rpath_CXX=yes
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -13474,6 +13521,7 @@ _lt_libdeps_save_CFLAGS=$CFLAGS
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -13837,7 +13885,7 @@ lt_prog_compiler_static_CXX=
;;
esac
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# KAI C++ Compiler
@@ -14263,7 +14311,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
@@ -14419,8 +14469,6 @@ esac
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -14446,7 +14494,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -14455,7 +14503,7 @@ aix3*)
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -14520,7 +14568,7 @@ beos*)
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -14657,7 +14705,7 @@ darwin* | rhapsody*)
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -14665,10 +14713,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -14676,7 +14720,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -14694,7 +14738,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -14713,19 +14757,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -14786,7 +14819,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -14802,7 +14835,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -14839,9 +14872,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -14935,7 +14968,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -15004,7 +15037,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -15029,7 +15062,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15053,7 +15086,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15084,7 +15117,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -15094,7 +15127,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15233,6 +15266,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
ac_config_commands="$ac_config_commands libtool"
@@ -15808,12 +15843,12 @@ if test -n "$folia_CFLAGS"; then
pkg_cv_folia_CFLAGS="$folia_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.11 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "folia >= 0.11 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.13 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "folia >= 0.13 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_folia_CFLAGS=`$PKG_CONFIG --cflags "folia >= 0.11 " 2>/dev/null`
+ pkg_cv_folia_CFLAGS=`$PKG_CONFIG --cflags "folia >= 0.13 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -15825,12 +15860,12 @@ if test -n "$folia_LIBS"; then
pkg_cv_folia_LIBS="$folia_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.11 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "folia >= 0.11 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.13 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "folia >= 0.13 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_folia_LIBS=`$PKG_CONFIG --libs "folia >= 0.11 " 2>/dev/null`
+ pkg_cv_folia_LIBS=`$PKG_CONFIG --libs "folia >= 0.13 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -15851,14 +15886,14 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- folia_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "folia >= 0.11 " 2>&1`
+ folia_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "folia >= 0.13 " 2>&1`
else
- folia_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "folia >= 0.11 " 2>&1`
+ folia_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "folia >= 0.13 " 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$folia_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (folia >= 0.11 ) were not met:
+ as_fn_error $? "Package requirements (folia >= 0.13 ) were not met:
$folia_PKG_ERRORS
@@ -16535,7 +16570,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ucto $as_me 0.6.0, which was
+This file was extended by ucto $as_me 0.7.0, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -16601,7 +16636,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ucto config.status 0.6.0
+ucto config.status 0.7.0
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -16738,6 +16773,7 @@ pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
@@ -16820,7 +16856,6 @@ with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld='`$ECHO "$hardcode_libdir_flag_spec_ld" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
@@ -16892,7 +16927,6 @@ with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`'
allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld_CXX='`$ECHO "$hardcode_libdir_flag_spec_ld_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
@@ -16931,6 +16965,7 @@ _LTECHO_EOF'
# Quote evaled strings.
for var in SHELL \
ECHO \
+PATH_SEPARATOR \
SED \
GREP \
EGREP \
@@ -16981,7 +17016,6 @@ with_gnu_ld \
allow_undefined_flag \
no_undefined_flag \
hardcode_libdir_flag_spec \
-hardcode_libdir_flag_spec_ld \
hardcode_libdir_separator \
exclude_expsyms \
include_expsyms \
@@ -17015,7 +17049,6 @@ with_gnu_ld_CXX \
allow_undefined_flag_CXX \
no_undefined_flag_CXX \
hardcode_libdir_flag_spec_CXX \
-hardcode_libdir_flag_spec_ld_CXX \
hardcode_libdir_separator_CXX \
exclude_expsyms_CXX \
include_expsyms_CXX \
@@ -17829,8 +17862,8 @@ $as_echo X"$file" |
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -17884,6 +17917,9 @@ SHELL=$lt_SHELL
# An echo program that protects backslashes.
ECHO=$lt_ECHO
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
# The host system.
host_alias=$host_alias
host=$host
@@ -18185,10 +18221,6 @@ no_undefined_flag=$lt_no_undefined_flag
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator
@@ -18531,10 +18563,6 @@ no_undefined_flag=$lt_no_undefined_flag_CXX
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_CXX
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
diff --git a/configure.ac b/configure.ac
index e504a6f..51c4c01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,10 +1,10 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
-# $Id: configure.ac 17549 2014-08-25 09:46:17Z sloot $
+# $Id: configure.ac 17872 2014-11-26 16:26:43Z sloot $
# $URL: https://ilk.uvt.nl/svn/sources/ucto/trunk/configure.ac $
AC_PREREQ(2.59)
-AC_INIT([ucto], [0.6.0], [timbl at uvt.nl])
+AC_INIT([ucto], [0.7.0], [timbl at uvt.nl])
AM_INIT_AUTOMAKE
AC_CONFIG_SRCDIR([configure.ac])
AC_CONFIG_MACRO_DIR([m4])
@@ -21,7 +21,7 @@ else
fi
# Checks for programs.
-AC_PROG_CXX( [g++] )
+AC_PROG_CXX( [g++ c++] )
if $cxx_flags_were_set; then
CXXFLAGS=$CXXFLAGS
@@ -88,7 +88,7 @@ AC_ARG_WITH(folia,
[PKG_CONFIG_PATH="$withval/lib/pkgconfig:$PKG_CONFIG_PATH"],
[PKG_CONFIG_PATH="$prefix/lib/pkgconfig:$PKG_CONFIG_PATH"])
AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
-PKG_CHECK_MODULES([folia], [folia >= 0.11] )
+PKG_CHECK_MODULES([folia], [folia >= 0.13] )
CXXFLAGS="$folia_CFLAGS $CXXFLAGS"
LIBS="$folia_LIBS $LIBS"
diff --git a/include/ucto/tokenize.h b/include/ucto/tokenize.h
index 9d0c0e5..ae88928 100644
--- a/include/ucto/tokenize.h
+++ b/include/ucto/tokenize.h
@@ -1,5 +1,5 @@
/*
- $Id: tokenize.h 17662 2014-09-15 15:07:27Z sloot $
+ $Id: tokenize.h 17865 2014-11-21 17:04:25Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/ucto/trunk/include/ucto/tokenize.h $
Copyright (c) 2006 - 2014
Tilburg University
@@ -152,8 +152,10 @@ namespace Tokenizer {
// Tokenize a folia document
bool tokenize(folia::Document& );
+ //Tokenize from input stream to a vecto of Tokens
+ std::vector<Token> tokenizeStream( std::istream&, bool allatonce=true );
+
//Tokenize from input stream to output stream
- std::vector<Token> tokenizeStream( std::istream& );
void tokenize( std::istream&, std::ostream& );
void tokenize( std::istream* in, std::ostream* out){
// for backward compatability
@@ -212,9 +214,16 @@ namespace Tokenizer {
bool getQuoteDetection() const { return detectQuotes; }
//Enable filtering
- bool setFiltering( bool b=true ) { bool t = doFilter; doFilter = b; return t; }
+ bool setFiltering( bool b=true ) {
+ bool t = doFilter; doFilter = b; return t;
+ }
bool getFiltering() const { return doFilter; };
+ bool setPunctFilter( bool b=true ) {
+ bool t = doPunctFilter; doPunctFilter = b; return t;
+ }
+ bool getPunctFilter() const { return doPunctFilter; };
+
// set normalization mode
std::string setNormalization( const std::string& s ) {
return normalizer.setMode( s );
@@ -266,7 +275,7 @@ namespace Tokenizer {
setDocID( id ); return setXMLOutput(b); }
bool setXMLInput( bool b ) { bool t = xmlin; xmlin = b; return t; }
- void outputTokens( std::ostream&, const std::vector<Token>& ) const;
+ void outputTokens( std::ostream&, const std::vector<Token>& ,const bool continued=false) const; //continued should be set to true when outputTokens is invoked multiple times and it is not the first invokation
private:
void tokenizeWord( const UnicodeString&, bool);
@@ -289,7 +298,9 @@ namespace Tokenizer {
void sortRules( std::vector<Rule *>&, std::vector<UnicodeString>& );
void outputTokensDoc( folia::Document&, const std::vector<Token>& ) const;
- void outputTokensXML( folia::FoliaElement *, const std::vector<Token>& ) const;
+ void outputTokensDoc_init( folia::Document& ) const;
+
+ int outputTokensXML( folia::FoliaElement *, const std::vector<Token>& , int parCount=0 ) const;
void tokenizeElement( folia::FoliaElement * );
void tokenizeSentenceElement( folia::FoliaElement * );
@@ -316,9 +327,12 @@ namespace Tokenizer {
//detect quotes?
bool detectQuotes;
- //filter special characters (default on)?
+ //filter special characters (default on)
bool doFilter;
+ //filter all punctuation characters (default off)
+ bool doPunctFilter;
+
//detect paragraphs?
bool detectPar;
diff --git a/m4/libtool.m4 b/m4/libtool.m4
index 8ff3c76..d7c043f 100644
--- a/m4/libtool.m4
+++ b/m4/libtool.m4
@@ -1,8 +1,8 @@
# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is free software; the Free Software Foundation gives
@@ -11,8 +11,8 @@
m4_define([_LT_COPYING], [dnl
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -146,6 +146,8 @@ AC_REQUIRE([AC_CANONICAL_BUILD])dnl
AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
+_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
+dnl
_LT_DECL([], [host_alias], [0], [The host system])dnl
_LT_DECL([], [host], [0])dnl
_LT_DECL([], [host_os], [0])dnl
@@ -637,7 +639,7 @@ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
configured by $[0], generated by m4_PACKAGE_STRING.
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2011 Free Software Foundation, Inc.
This config.lt script is free software; the Free Software Foundation
gives unlimited permision to copy, distribute and modify it."
@@ -801,6 +803,7 @@ AC_DEFUN([LT_LANG],
m4_case([$1],
[C], [_LT_LANG(C)],
[C++], [_LT_LANG(CXX)],
+ [Go], [_LT_LANG(GO)],
[Java], [_LT_LANG(GCJ)],
[Fortran 77], [_LT_LANG(F77)],
[Fortran], [_LT_LANG(FC)],
@@ -822,6 +825,31 @@ m4_defun([_LT_LANG],
])# _LT_LANG
+m4_ifndef([AC_PROG_GO], [
+############################################################
+# NOTE: This macro has been submitted for inclusion into #
+# GNU Autoconf as AC_PROG_GO. When it is available in #
+# a released version of Autoconf we should remove this #
+# macro and use it instead. #
+############################################################
+m4_defun([AC_PROG_GO],
+[AC_LANG_PUSH(Go)dnl
+AC_ARG_VAR([GOC], [Go compiler command])dnl
+AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
+_AC_ARG_VAR_LDFLAGS()dnl
+AC_CHECK_TOOL(GOC, gccgo)
+if test -z "$GOC"; then
+ if test -n "$ac_tool_prefix"; then
+ AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
+ fi
+fi
+if test -z "$GOC"; then
+ AC_CHECK_PROG(GOC, gccgo, gccgo, false)
+fi
+])#m4_defun
+])#m4_ifndef
+
+
# _LT_LANG_DEFAULT_CONFIG
# -----------------------
m4_defun([_LT_LANG_DEFAULT_CONFIG],
@@ -852,6 +880,10 @@ AC_PROVIDE_IFELSE([AC_PROG_GCJ],
m4_ifdef([LT_PROG_GCJ],
[m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
+AC_PROVIDE_IFELSE([AC_PROG_GO],
+ [LT_LANG(GO)],
+ [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
+
AC_PROVIDE_IFELSE([LT_PROG_RC],
[LT_LANG(RC)],
[m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
@@ -954,7 +986,13 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -962,6 +1000,7 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
rm -rf libconftest.dylib*
rm -f conftest.*
fi])
+
AC_CACHE_CHECK([for -exported_symbols_list linker flag],
[lt_cv_ld_exported_symbols_list],
[lt_cv_ld_exported_symbols_list=no
@@ -973,6 +1012,7 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
[lt_cv_ld_exported_symbols_list=no])
LDFLAGS="$save_LDFLAGS"
])
+
AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
[lt_cv_ld_force_load=no
cat > conftest.c << _LT_EOF
@@ -990,7 +1030,9 @@ _LT_EOF
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -1035,8 +1077,8 @@ _LT_EOF
])
-# _LT_DARWIN_LINKER_FEATURES
-# --------------------------
+# _LT_DARWIN_LINKER_FEATURES([TAG])
+# ---------------------------------
# Checks for linker and compiler features on darwin
m4_defun([_LT_DARWIN_LINKER_FEATURES],
[
@@ -1047,6 +1089,8 @@ m4_defun([_LT_DARWIN_LINKER_FEATURES],
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
_LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+ m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
+ [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes])
else
_LT_TAGVAR(whole_archive_flag_spec, $1)=''
fi
@@ -1268,7 +1312,7 @@ ia64-*-hpux*)
rm -rf conftest*
;;
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
@@ -1280,9 +1324,19 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
+ ;;
+ powerpc64le-*)
+ LD="${LD-ld} -m elf32lppclinux"
;;
- ppc64-*linux*|powerpc64-*linux*)
+ powerpc64-*)
LD="${LD-ld} -m elf32ppclinux"
;;
s390x-*linux*)
@@ -1301,7 +1355,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
x86_64-*linux*)
LD="${LD-ld} -m elf_x86_64"
;;
- ppc*-*linux*|powerpc*-*linux*)
+ powerpcle-*)
+ LD="${LD-ld} -m elf64lppc"
+ ;;
+ powerpc-*)
LD="${LD-ld} -m elf64ppc"
;;
s390*-*linux*|s390*-*tpf*)
@@ -1330,14 +1387,27 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if AC_TRY_EVAL(ac_compile); then
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -1414,13 +1484,13 @@ old_postuninstall_cmds=
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -1600,6 +1670,11 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -1626,7 +1701,8 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -1639,7 +1715,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -2185,7 +2261,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -2194,7 +2270,7 @@ aix3*)
;;
aix[[4-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -2259,7 +2335,7 @@ beos*)
;;
bsdi[[45]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -2398,7 +2474,7 @@ m4_if([$1], [],[
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -2406,10 +2482,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -2417,7 +2489,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[[123]]*) objformat=aout ;;
+ freebsd[[23]].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -2435,7 +2507,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[[01]]* | freebsdelf3.[[01]]*)
@@ -2454,19 +2526,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -2527,7 +2588,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[[3-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -2543,7 +2604,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -2580,9 +2641,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2657,7 +2718,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -2726,7 +2787,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2751,7 +2812,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2775,7 +2836,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2806,7 +2867,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2816,7 +2877,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -3197,10 +3258,6 @@ freebsd* | dragonfly*)
fi
;;
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
haiku*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -3238,8 +3295,8 @@ irix5* | irix6* | nonstopux*)
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -3658,6 +3715,7 @@ for ac_symprfx in "" "_"; do
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK ['"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -3990,7 +4048,7 @@ m4_if([$1], [CXX], [
;;
esac
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# KAI C++ Compiler
@@ -4242,7 +4300,9 @@ m4_if([$1], [CXX], [
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Xcompiler -fPIC'
+ if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
+ fi
;;
esac
else
@@ -4287,7 +4347,7 @@ m4_if([$1], [CXX], [
_LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
# old Intel for x86_64 which still supported -KPIC.
ecc*)
@@ -4334,18 +4394,33 @@ m4_if([$1], [CXX], [
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
;;
+ *Intel*\ [[CF]]*Compiler*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ ;;
+ *Portland\ Group*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ ;;
esac
;;
esac
@@ -4505,7 +4580,9 @@ m4_if([$1], [CXX], [
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
_LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
@@ -4533,7 +4610,6 @@ m4_if([$1], [CXX], [
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -4787,8 +4863,7 @@ _LT_EOF
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
_LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir'
+ _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
_LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
_LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
@@ -5084,6 +5159,7 @@ _LT_EOF
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
_LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
@@ -5130,10 +5206,6 @@ _LT_EOF
_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
;;
- freebsd1*)
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -5146,7 +5218,7 @@ _LT_EOF
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_minus_L, $1)=yes
@@ -5185,7 +5257,6 @@ _LT_EOF
fi
if test "$with_gnu_ld" = no; then
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
_LT_TAGVAR(hardcode_libdir_separator, $1)=:
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
@@ -5627,9 +5698,6 @@ _LT_TAGDECL([], [no_undefined_flag], [1],
_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
[Flag to hardcode $libdir into a binary during linking.
This must work even if $libdir does not exist])
-_LT_TAGDECL([], [hardcode_libdir_flag_spec_ld], [1],
- [[If ld is used when linking, flag to hardcode $libdir into a binary
- during linking. This must work even if $libdir does not exist]])
_LT_TAGDECL([], [hardcode_libdir_separator], [1],
[Whether we need a single "-rpath" flag with a separated argument])
_LT_TAGDECL([], [hardcode_direct], [0],
@@ -5787,7 +5855,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -6157,7 +6224,7 @@ if test "$_lt_caught_CXX_error" != yes; then
esac
;;
- freebsd[[12]]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
_LT_TAGVAR(ld_shlibs, $1)=no
@@ -6173,9 +6240,6 @@ if test "$_lt_caught_CXX_error" != yes; then
_LT_TAGVAR(ld_shlibs, $1)=yes
;;
- gnu*)
- ;;
-
haiku*)
_LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
_LT_TAGVAR(link_all_deplibs, $1)=yes
@@ -6337,7 +6401,7 @@ if test "$_lt_caught_CXX_error" != yes; then
_LT_TAGVAR(inherit_rpath, $1)=yes
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -6918,12 +6982,18 @@ public class foo {
}
};
_LT_EOF
+], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
+package foo
+func foo() {
+}
+_LT_EOF
])
_lt_libdeps_save_CFLAGS=$CFLAGS
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
dnl Parse the compiler output and extract the necessary
@@ -7120,7 +7190,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7253,7 +7322,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7440,6 +7508,77 @@ CFLAGS=$lt_save_CFLAGS
])# _LT_LANG_GCJ_CONFIG
+# _LT_LANG_GO_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Go compiler
+# are suitably defined. These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GO_CONFIG],
+[AC_REQUIRE([LT_PROG_GO])dnl
+AC_LANG_SAVE
+
+# Source file extension for Go test sources.
+ac_ext=go
+
+# Object file extension for compiled Go test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="package main; func main() { }"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='package main; func main() { }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GOC-"gccgo"}
+CFLAGS=$GOFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# Go did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+ _LT_COMPILER_NO_RTTI($1)
+ _LT_COMPILER_PIC($1)
+ _LT_COMPILER_C_O($1)
+ _LT_COMPILER_FILE_LOCKS($1)
+ _LT_LINKER_SHLIBS($1)
+ _LT_LINKER_HARDCODE_LIBPATH($1)
+
+ _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GO_CONFIG
+
+
# _LT_LANG_RC_CONFIG([TAG])
# -------------------------
# Ensure that the configuration variables for the Windows resource compiler
@@ -7509,6 +7648,13 @@ dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
+# LT_PROG_GO
+# ----------
+AC_DEFUN([LT_PROG_GO],
+[AC_CHECK_TOOL(GOC, gccgo,)
+])
+
+
# LT_PROG_RC
# ----------
AC_DEFUN([LT_PROG_RC],
diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4
index 17cfd51..5d9acd8 100644
--- a/m4/ltoptions.m4
+++ b/m4/ltoptions.m4
@@ -326,9 +326,24 @@ dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
- [AS_HELP_STRING([--with-pic],
+ [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
- [pic_mode="$withval"],
+ [lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
diff --git a/src/tokenize.cxx b/src/tokenize.cxx
index cc1ac75..077bdcd 100644
--- a/src/tokenize.cxx
+++ b/src/tokenize.cxx
@@ -1,5 +1,5 @@
/*
- $Id: tokenize.cxx 17682 2014-09-18 14:09:27Z sloot $
+ $Id: tokenize.cxx 17873 2014-11-26 17:11:31Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/ucto/trunk/src/tokenize.cxx $
Copyright (c) 2006 - 2014
Tilburg University
@@ -298,7 +298,9 @@ namespace Tokenizer {
linenum(0),
inputEncoding( "UTF-8" ), eosmark("<utt>"),
tokDebug(0), verbose(false),
- detectBounds(true), detectQuotes(false), doFilter(true), detectPar(true),
+ detectBounds(true), detectQuotes(false),
+ doFilter(true), doPunctFilter(false),
+ detectPar(true),
paragraphsignal(true),
sentenceperlineoutput(false), sentenceperlineinput(false),
lowercase(false), uppercase(false),
@@ -335,7 +337,7 @@ namespace Tokenizer {
}
}
- vector<Token> TokenizerClass::tokenizeStream( istream& IN ) {
+ vector<Token> TokenizerClass::tokenizeStream( istream& IN, bool allatonce) {
vector<Token> outputTokens;
bool done = false;
bool bos = true;
@@ -343,8 +345,7 @@ namespace Tokenizer {
do {
done = !getline( IN, line );
linenum++;
- if (tokDebug > 0)
- *Log(theErrLog) << "[tokenize] Read input line " << linenum << endl;
+ if (tokDebug > 0) *Log(theErrLog) << "[tokenize] Read input line " << linenum << endl;
stripCR( line );
if ( sentenceperlineinput )
line += string(" ") + folia::UnicodeToUTF8(eosmark);
@@ -352,13 +353,12 @@ namespace Tokenizer {
if ( (done) || (line.empty()) ){
signalParagraph();
numS = countSentences(true); //count full sentences in token buffer, force buffer to empty!
- }
- else {
+ } else {
if ( passthru )
passthruLine( line, bos );
else
tokenizeLine( line );
- numS = countSentences(); //count full sentences in token buffer
+ numS = countSentences(); //count full sentences in token buffer
}
if ( numS > 0 ) { //process sentences
if (tokDebug > 0) *Log(theErrLog) << "[tokenize] " << numS << " sentence(s) in buffer, processing..." << endl;
@@ -369,30 +369,49 @@ namespace Tokenizer {
//clear processed sentences from buffer
if (tokDebug > 0) *Log(theErrLog) << "[tokenize] flushing " << numS << " sentence(s) from buffer..." << endl;
flushSentences(numS);
- }
- else {
+ if (!allatonce) return outputTokens;
+ } else {
if (tokDebug > 0) *Log(theErrLog) << "[tokenize] No sentences yet, reading on..." << endl;
}
} while (!done);
return outputTokens;
}
+
+
+
folia::Document TokenizerClass::tokenize( istream& IN ) {
- vector<Token> v = tokenizeStream( IN );
folia::Document doc( "id='" + docid + "'" );
- outputTokensDoc( doc, v );
+ outputTokensDoc_init( doc);
+ folia::FoliaElement *root = doc.doc()->index(0);
+ int parCount = 0;
+ vector<Token> buffer;
+ do {
+ vector<Token> v = tokenizeStream( IN , false);
+ for (vector<Token>::iterator iter = v.begin(); iter != v.end(); iter++) {
+ if (iter->role & NEWPARAGRAPH) {
+ //process the buffer
+ parCount= outputTokensXML( root, buffer, parCount);
+ buffer.clear();
+ }
+ buffer.push_back( *iter);
+ }
+ } while (!IN.eof());
+ if (!buffer.empty()) outputTokensXML( root, buffer, parCount);
return doc;
}
void TokenizerClass::tokenize( istream& IN, ostream& OUT) {
- vector<Token> v = tokenizeStream( IN );
if (xmlout) {
- folia::Document doc( "id='" + docid + "'" );
- outputTokensDoc( doc, v );
+ folia::Document doc = tokenize( IN );
OUT << doc << endl;
- }
- else {
- outputTokens( OUT, v );
+ } else {
+ int i = 0;
+ do {
+ vector<Token> v = tokenizeStream( IN , false);
+ if (!v.empty()) outputTokens( OUT, v , (i>0) );
+ i++;
+ } while (!IN.eof());
OUT << endl;
}
}
@@ -523,8 +542,7 @@ namespace Tokenizer {
flushSentences(numS);
}
- void TokenizerClass::outputTokensDoc( folia::Document& doc,
- const vector<Token>& tv ) const {
+ void TokenizerClass::outputTokensDoc_init( folia::Document& doc ) const {
doc.addStyle( "text/xsl", "folia.xsl" );
if ( passthru ){
doc.declare( folia::AnnotationType::TOKEN, "passthru", "annotator='ucto', annotatortype='auto', datetime='now()'" );
@@ -535,16 +553,20 @@ namespace Tokenizer {
}
folia::Text *text = new folia::Text( "id='" + docid + ".text'" );
doc.append( text );
+ }
+
+ void TokenizerClass::outputTokensDoc( folia::Document& doc,
+ const vector<Token>& tv ) const {
folia::FoliaElement *root = doc.doc()->index(0);
outputTokensXML(root, tv );
}
- void TokenizerClass::outputTokensXML( folia::FoliaElement *root,
- const vector<Token>& tv ) const {
+ int TokenizerClass::outputTokensXML( folia::FoliaElement *root, const vector<Token>& tv, int parCount ) const {
short quotelevel = 0;
folia::FoliaElement *lastS = 0;
- int parCount = 0;
if (tokDebug > 0) {
+ *Log(theErrLog) << "[outputTokensXML] root=<" << root->classname()
+ << ">" << endl;
*Log(theErrLog) << "[outputTokensXML] root-id=" << root->id() << endl;
}
bool root_is_sentence = false;
@@ -557,6 +579,7 @@ namespace Tokenizer {
|| root->isinstance( folia::Head_t )
|| root->isinstance( folia::Note_t )
|| root->isinstance( folia::Item_t )
+ || root->isinstance( folia::Part_t )
|| root->isinstance( folia::Caption_t )
|| root->isinstance( folia::Event_t ) ){
root_is_structure_element = true;
@@ -566,14 +589,13 @@ namespace Tokenizer {
for ( size_t i=0; i < tv.size(); i++) {
if ( ( !root_is_structure_element && !root_is_sentence ) &&
( (tv[i].role & NEWPARAGRAPH) || !in_paragraph ) ) {
- parCount++;
if ( in_paragraph ){
appendText( root, outputclass );
root = root->parent();
}
if (tokDebug > 0) *Log(theErrLog) << "[outputTokensXML] Creating paragraph" << endl;
folia::KWargs args;
- args["id"] = root->doc()->id() + ".p." + toString(parCount);
+ args["id"] = root->doc()->id() + ".p." + toString(++parCount);
folia::FoliaElement *p = new folia::Paragraph( root->doc(), args );
// *Log(theErrLog) << "created " << p << endl;
root->append( p );
@@ -589,9 +611,15 @@ namespace Tokenizer {
}
if (( tv[i].role & BEGINOFSENTENCE) && (!root_is_sentence)) {
- if (tokDebug > 0) *Log(theErrLog) << "[outputTokensXML] Creating sentence in '" << root->id() << "' ()" << endl;
folia::KWargs args;
- args["generate_id"] = root->id();
+ if ( root->id().empty() )
+ args["generate_id"] = root->parent()->id();
+ else
+ args["generate_id"] = root->id();
+ if (tokDebug > 0) {
+ *Log(theErrLog) << "[outputTokensXML] Creating sentence in '"
+ << args["generate_id"] << "'" << endl;
+ }
folia::FoliaElement *s = new folia::Sentence( root->doc(), args );
// *Log(theErrLog) << "created " << s << endl;
root->append( s );
@@ -633,6 +661,7 @@ namespace Tokenizer {
if ( tv.size() > 0 ){
appendText( root, outputclass );
}
+ return parCount;
}
ostream& operator<<( ostream& os, const TokenRole& tok ){
@@ -645,15 +674,15 @@ namespace Tokenizer {
return os;
}
- void TokenizerClass::outputTokens( ostream& OUT,
- const vector<Token>& toks ) const {
+ void TokenizerClass::outputTokens( ostream& OUT, const vector<Token>& toks, const bool continued) const {
+ //continued should be set to true when outputTokens is invoked multiple times and it is not the first invokation, makes paragraph boundaries work over multiple calls
short quotelevel = 0;
for ( size_t i = 0; i < toks.size(); i++) {
- if ((detectPar) && ( toks[i].role & NEWPARAGRAPH) && (!verbose) && (i != 0) ) {
+ if ((detectPar) && ( toks[i].role & NEWPARAGRAPH) && (!verbose) && ((i != 0) || (continued)) ) {
+ //output paragraph separator
if (sentenceperlineoutput) {
OUT << endl;
- }
- else {
+ } else {
OUT << endl << endl;
}
}
@@ -1215,12 +1244,21 @@ namespace Tokenizer {
else {
type = &type_unknown;
}
- if (bos) {
- tokens.push_back( Token( type, word , BEGINOFSENTENCE ) );
- bos = false;
+ if ( doPunctFilter
+ && ( type == &type_punctuation || type == &type_currency ||
+ type == &type_emoticon ) ) {
+ if (tokDebug >= 2 )
+ *Log(theErrLog) << " [passThruLine] skipped PUNCTUATION ["
+ << input << "]" << endl;
}
else {
- tokens.push_back( Token( type, word ) );
+ if (bos) {
+ tokens.push_back( Token( type, word , BEGINOFSENTENCE ) );
+ bos = false;
+ }
+ else {
+ tokens.push_back( Token( type, word ) );
+ }
}
alpha = false;
num = false;
@@ -1262,12 +1300,21 @@ namespace Tokenizer {
else {
type = &type_unknown;
}
- if (bos) {
- tokens.push_back( Token( type, word , BEGINOFSENTENCE ) );
- bos = false;
+ if ( doPunctFilter
+ && ( type == &type_punctuation || type == &type_currency ||
+ type == &type_emoticon ) ) {
+ if (tokDebug >= 2 )
+ *Log(theErrLog) << " [passThruLine] skipped PUNCTUATION ["
+ << input << "]" << endl;
}
else {
- tokens.push_back( Token( type, word ) );
+ if (bos) {
+ tokens.push_back( Token( type, word , BEGINOFSENTENCE ) );
+ bos = false;
+ }
+ else {
+ tokens.push_back( Token( type, word ) );
+ }
}
}
}
@@ -1525,10 +1572,19 @@ namespace Tokenizer {
type = &type_unknown;
}
}
- Token T( type, input, space ? NOROLE : NOSPACE );
- tokens.push_back( T );
- if (tokDebug >= 2)
- *Log(theErrLog) << " [tokenizeWord] added token " << T << endl;
+ if ( doPunctFilter
+ && ( type == &type_punctuation || type == &type_currency ||
+ type == &type_emoticon ) ) {
+ if (tokDebug >= 2 )
+ *Log(theErrLog) << " [tokenizeWord] skipped PUNCTUATION ["
+ << input << "]" << endl;
+ }
+ else {
+ Token T( type, input, space ? NOROLE : NOSPACE );
+ tokens.push_back( T );
+ if (tokDebug >= 2)
+ *Log(theErrLog) << " [tokenizeWord] added token " << T << endl;
+ }
}
else {
for ( unsigned int i = 0; i < rules.size(); i++) {
@@ -1555,8 +1611,16 @@ namespace Tokenizer {
if ( tokDebug >= 4 )
*Log(theErrLog) << "\tTOKEN match[" << m << "] = "
<< matches[m] << endl;
- if ( post.length() > 0 ) space = false;
- tokens.push_back( Token( &rules[i]->id, matches[m], space ? NOROLE : NOSPACE ) );
+ if ( doPunctFilter
+ && (&rules[i]->id)->startsWith("PUNCTUATION") ){
+ if (tokDebug >= 2 )
+ *Log(theErrLog) << " [tokenizeWord] skipped PUNCTUATION ["
+ << matches[m] << "]" << endl;
+ }
+ else {
+ if ( post.length() > 0 ) space = false;
+ tokens.push_back( Token( &rules[i]->id, matches[m], space ? NOROLE : NOSPACE ) );
+ }
}
}
else if ( tokDebug >=4 )
diff --git a/src/ucto.cxx b/src/ucto.cxx
index 2c67771..dcc1f1d 100644
--- a/src/ucto.cxx
+++ b/src/ucto.cxx
@@ -1,5 +1,5 @@
/*
- $Id: ucto.cxx 17687 2014-09-22 10:16:56Z sloot $
+ $Id: ucto.cxx 17839 2014-11-20 16:04:12Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/ucto/trunk/src/ucto.cxx $
Copyright (c) 1998 - 2014
ILK - Tilburg University
@@ -55,6 +55,7 @@ void usage(){
<< "\t-v - Verbose mode" << endl
<< "\t-s <string> - End-of-Sentence marker (default: <utt>)" << endl
<< "\t--passthru - Don't tokenize, but perform input decoding and simple token role detection" << endl
+ << "\t--filterpunct - remove all punctuation from the output" << endl
<< "\t-P - Disable paragraph detection" << endl
<< "\t-S - Disable sentence detection!" << endl
<< "\t-Q - Enable quote detection (experimental)" << endl
@@ -78,6 +79,7 @@ int main( int argc, char *argv[] ){
bool paragraphdetection = true;
bool quotedetection = false;
bool dofiltering = true;
+ bool dopunctfilter = false;
bool splitsentences = true;
bool xmlin = false;
bool xmlout = false;
@@ -97,7 +99,7 @@ int main( int argc, char *argv[] ){
try {
TiCC::CL_Options Opts( "d:e:fhlPQunmN:vVSL:c:s:x:FX",
- "passthru,textclass:,inputclass:,outputclass:,id:");
+ "filterpunct,passthru,textclass:,inputclass:,outputclass:,id:");
Opts.init(argc, argv );
if ( Opts.extract( 'h' ) ){
usage();
@@ -112,6 +114,7 @@ int main( int argc, char *argv[] ){
}
Opts.extract('e', inputEncoding );
dofiltering = !Opts.extract( 'f' );
+ dopunctfilter = Opts.extract( "filterpunct" );
paragraphdetection = !Opts.extract( 'P' );
splitsentences = !Opts.extract( 'S' );
xmlin = Opts.extract( 'F' );
@@ -241,6 +244,7 @@ int main( int argc, char *argv[] ){
tokenizer.setNormalization( normalization );
tokenizer.setInputEncoding( inputEncoding );
tokenizer.setFiltering(dofiltering);
+ tokenizer.setPunctFilter(dopunctfilter);
tokenizer.setInputClass(inputclass);
tokenizer.setOutputClass(outputclass);
tokenizer.setXMLOutput(xmlout, docid);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/ucto.git
More information about the debian-science-commits
mailing list