[hfst] 01/02: Imported Upstream version 3.8.2~r4145

Tino Didriksen tinodidriksen-guest at moszumanska.debian.org
Fri Dec 19 18:11:17 UTC 2014


This is an automated email from the git hooks/post-receive script.

tinodidriksen-guest pushed a commit to branch master
in repository hfst.

commit 0b8a798b927fd758a05b5509ed5df9ba19466578
Author: Tino Didriksen <mail at tinodidriksen.com>
Date:   Fri Dec 19 18:09:10 2014 +0000

    Imported Upstream version 3.8.2~r4145
---
 ChangeLog                                          | 424 ++++++++++++++++
 ChangeLog.old                                      | 535 +++++++++++++++++++++
 NEWS                                               |  12 +
 check_installation/copy-tool-tests.sh              |   2 +
 configure.ac                                       |  16 +-
 libhfst/src/HfstTokenizer.cc                       | 130 ++++-
 libhfst/src/HfstTokenizer.h                        |   9 +
 libhfst/src/HfstTransducer.cc                      |  43 ++
 libhfst/src/HfstTransducer.h                       |   4 +
 libhfst/src/Makefile.am                            |   2 +-
 libhfst/src/implementations/ConvertOlTransducer.cc | 174 +++----
 libhfst/src/implementations/HfstTransitionGraph.h  | 149 ++++--
 .../implementations/optimized-lookup/convert.cc    |  58 +--
 .../src/implementations/optimized-lookup/convert.h | 246 ++++++----
 .../src/implementations/optimized-lookup/pmatch.cc | 268 ++++++++---
 .../src/implementations/optimized-lookup/pmatch.h  |  40 +-
 .../implementations/optimized-lookup/transducer.cc |   5 +
 .../implementations/optimized-lookup/transducer.h  |   5 +-
 libhfst/src/parsers/LexcCompiler.cc                | 158 ++++--
 libhfst/src/parsers/LexcCompiler.h                 |  12 +-
 libhfst/src/parsers/XreCompiler.cc                 |  41 +-
 libhfst/src/parsers/XreCompiler.h                  |  30 ++
 libhfst/src/parsers/lexc-lexer.ll                  |   3 -
 libhfst/src/parsers/lexc-parser.yy                 |  72 ++-
 libhfst/src/parsers/pmatch_lex.ll                  |  36 ++
 libhfst/src/parsers/pmatch_parse.yy                | 280 +++++++++--
 libhfst/src/parsers/pmatch_utils.cc                | 255 ++++++++--
 libhfst/src/parsers/pmatch_utils.h                 |   9 +-
 libhfst/src/parsers/xre_lex.ll                     |   2 +
 libhfst/src/parsers/xre_parse.yy                   |  19 +-
 libhfst/src/parsers/xre_utils.cc                   |  83 +++-
 libhfst/src/parsers/xre_utils.h                    |   5 +
 test/tools/Makefile.am                             |   8 +-
 test/tools/lexc-compiler-functionality.sh          |   4 +-
 test/tools/warn.one-sided-flags.lexc               |   7 +
 test/tools/warn.one-sided-flags.lexc.flag.result   | Bin 0 -> 461 bytes
 test/tools/warn.one-sided-flags.lexc.result        | Bin 0 -> 347 bytes
 .../xfail.sublexicon-defined-more-than-once.lexc   |  15 +
 tools/src/HfstStrings2FstTokenizer.cc              |  20 +-
 tools/src/hfst-compose.cc                          |  62 ++-
 tools/src/hfst-fst2strings.cc                      |   4 +-
 tools/src/hfst-lexc-compiler.cc                    |  26 +-
 tools/src/hfst-pmatch.cc                           |  17 +-
 tools/src/hfst-proc2.cc                            |   3 +-
 tools/src/hfst-summarize.cc                        |  61 ++-
 tools/src/parsers/XfstCompiler.cc                  | 192 ++++++--
 tools/src/parsers/XfstCompiler.h                   |   3 +-
 tools/src/parsers/hfst-xfst.cc                     |   1 +
 tools/src/parsers/test/Makefile.am                 |   8 +-
 tools/src/parsers/test/compile_replace_1.output    |   8 +
 tools/src/parsers/test/compile_replace_1.xfst      |  12 +
 tools/src/parsers/test/compile_replace_2.output    |   8 +
 tools/src/parsers/test/compile_replace_2.xfst      |  10 +
 tools/src/parsers/test/compile_replace_3.output    |  17 +
 tools/src/parsers/test/compile_replace_3.xfst      |  21 +
 tools/src/parsers/test/merge.att                   |  39 ++
 tools/src/parsers/test/merge.xfst                  |  17 +
 tools/src/parsers/test/merge_weighted.att          |  39 ++
 tools/src/parsers/test/merge_weighted.xfst         |  18 +
 tools/src/parsers/test/one_transition_regex.att    |  10 +
 tools/src/parsers/test/one_transition_regex.xfst   |  11 +
 tools/src/parsers/test/test.sh                     |  30 +-
 62 files changed, 3195 insertions(+), 603 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 56e3859..50487c4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,427 @@
+2014-12-17 13:28  eaxelson
+
+	* configure.ac, libhfst/src/HfstTransducer.cc,
+	  libhfst/src/parsers/XreCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.h,
+	  libhfst/src/parsers/xre_lex.ll,
+	  tools/src/parsers/XfstCompiler.cc: Small fixes to
+	  compile-replace. Also restarting the char counter every time a
+	  new xre parsing is started. Removed the xml2 dependecy in
+	  configure.
+
+2014-12-12 10:49  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc: Omit
+	  unnecessary step in stringification
+
+2014-12-12 10:22  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h: Another
+	  slight speed improvement
+
+2014-12-12 09:56  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc: Some
+	  double free insurance related to previous commit
+
+2014-12-12 09:53  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h: Some more
+	  pmatch runtime speedup (around 5-10% in most cases)
+
+2014-12-11 21:54  hardwick
+
+	* libhfst/src/implementations/ConvertOlTransducer.cc: Remove
+	  commented-out lines (same ones I was intending in the previous
+	  commit)
+
+2014-12-11 21:08  hardwick
+
+	* libhfst/src/implementations/ConvertOlTransducer.cc: Remove
+	  extraenous index table fitting test
+
+2014-12-11 20:33  hardwick
+
+	* libhfst/src/implementations/ConvertOlTransducer.cc,
+	  libhfst/src/implementations/optimized-lookup/convert.h: Further
+	  improvements to conversion to optimized lookup format
+	  
+	  This speeds up conversion a bit more, and also improves packing
+	  in one case by around 25% (implying that this commit *does*
+	  change the
+	  output of the conversion, but in a functionally equivalent way).
+	  
+	  The space gain was mainly accomplished by changing what happens
+	  when we fail
+	  to find a suitable index table location several times a row
+	  starting in the
+	  same place. We used to jump up to the previous successful
+	  location and its
+	  indices, now we keep trying to fill in the gaps between those
+	  first.
+	  
+	  This commit also removes the state-relabeling facility, because
+	  it didn't seem
+	  to be doing anything (perhaps something in HfstBasicTransducer
+	  has changed;
+	  it seems state numbers can no longer be non-contiguous). So we
+	  now assume
+	  states are contiguous and iterated in order. If this assumption
+	  is broken,
+	  this needs to be changes. But as I said earlier, looks like
+	  relabeling wasn't
+	  doing anything now anyway.
+
+2014-12-11 13:25  hardwick
+
+	* libhfst/src/implementations/ConvertOlTransducer.cc,
+	  libhfst/src/implementations/optimized-lookup/convert.cc,
+	  libhfst/src/implementations/optimized-lookup/convert.h: Large
+	  speedup in conversion to optimized-lookup format
+	  
+	  In some cases this speeds up conversion by up to 80%. This
+	  involved
+	  a) making the code a bit more convoluted by using multiple data
+	  structures
+	  where there used to be just one
+	  b) omitting some space-oriented optimizations that appear to be
+	  usually
+	  very minor and occasionally even harmful
+	  
+	  While I've tested this with the major transducers I've found,
+	  it's not
+	  unthinkable that refactoring such hairy code as this has
+	  introduced bugs.
+	  No new functionality is introduced so it's ok to roll this back
+	  if it causes
+	  problems.
+
+2014-12-05 16:03  eaxelson
+
+	* tools/src/parsers/test/Makefile.am,
+	  tools/src/parsers/test/compile_replace_1.output,
+	  tools/src/parsers/test/compile_replace_1.xfst,
+	  tools/src/parsers/test/compile_replace_2.output,
+	  tools/src/parsers/test/compile_replace_2.xfst,
+	  tools/src/parsers/test/compile_replace_3.output,
+	  tools/src/parsers/test/compile_replace_3.xfst,
+	  tools/src/parsers/test/test.sh: Added test cases for
+	  compile-replace.
+
+2014-12-05 14:49  eaxelson
+
+	* tools/src/parsers/test/Makefile.am,
+	  tools/src/parsers/test/merge.att,
+	  tools/src/parsers/test/merge.xfst,
+	  tools/src/parsers/test/merge_weighted.att,
+	  tools/src/parsers/test/merge_weighted.xfst,
+	  tools/src/parsers/test/test.sh: Added test cases for merge
+	  operation.
+
+2014-12-05 12:22  eaxelson
+
+	* tools/src/hfst-fst2strings.cc: Fixed an error in hfst-fst2strings
+	  --print-separator where two consecutive lines of -- were printed
+	  between non-empty transducers in some cases.
+
+2014-12-04 13:04  eaxelson
+
+	* libhfst/src/parsers/xre_utils.cc,
+	  tools/src/parsers/XfstCompiler.cc: Now minimizing the merging
+	  automaton before merge operation so that epsilons do not cut a
+	  succesfull merge path. Also allowing epsilon-to-regexp-marker
+	  transitions in the merge filter.
+
+2014-12-03 15:35  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  tools/src/parsers/XfstCompiler.cc: Improvements to
+	  compile-replace function, now it should work for input and output
+	  sides of a transducer.
+
+2014-12-03 09:36  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Revert bungled change to
+	  precedence order
+
+2014-12-02 20:34  eaxelson
+
+	* libhfst/src/HfstTransducer.cc,
+	  libhfst/src/parsers/XreCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.h,
+	  libhfst/src/parsers/xre_lex.ll, tools/src/parsers/hfst-xfst.cc:
+	  Now xre compiler of function merge does not increment the char
+	  counter, making it possible to have many merge operators inside
+	  one regex.
+
+2014-12-02 16:07  eaxelson
+
+	* libhfst/src/HfstTransducer.cc, libhfst/src/HfstTransducer.h,
+	  libhfst/src/parsers/XreCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.h,
+	  libhfst/src/parsers/xre_utils.cc: Added a constructor
+	  XreCompiler(XreConstructorArguments & args) to facilitate passing
+	  xre variables to merge function which needs them in its internal
+	  xre compiler.
+
+2014-12-02 14:06  eaxelson
+
+	* libhfst/src/parsers/xre_utils.cc: Now using internal starptr
+	  variables in functions hfst::xre::compile and
+	  hfst::xre::compile_first instead of global hfst::xre::startptr.
+	  This should fix the strange memory errors which occurred when
+	  calling merge operation inside a regular expression.
+
+2014-12-02 12:33  eaxelson
+
+	* libhfst/src/HfstTransducer.cc,
+	  libhfst/src/implementations/HfstTransitionGraph.h: Now merge
+	  operation filters out non-optimal paths.
+
+2014-11-27 14:49  eaxelson
+
+	* libhfst/src/HfstTokenizer.cc, libhfst/src/HfstTokenizer.h:
+	  Tentatively added a function 'tokenize_and_align_flag_diacritics'
+	  to HfstTokenizer.
+
+2014-11-26 16:50  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h: Runtime
+	  speed improvements
+	  Prereserve table vectors, eliminate special_symbols map
+
+2014-11-26 13:00  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h,
+	  libhfst/src/implementations/optimized-lookup/transducer.h,
+	  libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h, tools/src/hfst-pmatch.cc,
+	  tools/src/hfst-proc2.cc: Profiling support with Counter() and
+	  --profile & a bunch of smaller changes
+	  
+	  Counter(arg) in a ruleset inserts a profiling point, hfst-pmatch
+	  --profile
+	  prints profiling info.
+	  
+	  When --verbose, warn about undefined symbols being interpreted as
+	  labels.
+	  When --flatten, flatten Lst() definitions too.
+
+2014-11-25 14:14  eaxelson
+
+	* tools/src/hfst-compose.cc: Allowing 1-to-n composition of
+	  automata in archives. Fixes bug (or feature request) #277.
+
+2014-11-25 10:54  eaxelson
+
+	* libhfst/src/parsers/LexcCompiler.cc,
+	  libhfst/src/parsers/LexcCompiler.h,
+	  libhfst/src/parsers/lexc-lexer.ll,
+	  libhfst/src/parsers/lexc-parser.yy,
+	  tools/src/hfst-lexc-compiler.cc: Now passing verbosity to
+	  LexcCompiler as an unsigned integer via setVerbosity(uint). Also
+	  made small fixes to warning prints in lexc compiler.
+
+2014-11-24 13:51  eaxelson
+
+	* test/tools/Makefile.am,
+	  test/tools/lexc-compiler-functionality.sh,
+	  test/tools/warn.one-sided-flags.lexc,
+	  test/tools/warn.one-sided-flags.lexc.flag.result,
+	  test/tools/warn.one-sided-flags.lexc.result: Added tests for
+	  one-sided flag diacritics for hfst-lexc.
+
+2014-11-24 13:50  eaxelson
+
+	* libhfst/src/parsers/LexcCompiler.cc,
+	  libhfst/src/parsers/lexc-parser.yy: Now lexc compiler warns about
+	  one-sided flag diacritics in verbose mode.
+
+2014-11-24 13:49  eaxelson
+
+	* libhfst/src/HfstTokenizer.cc, libhfst/src/HfstTokenizer.h: Added
+	  a tokenizing function that warns about symbol pairs, if needed.
+
+2014-11-24 11:07  eaxelson
+
+	* test/tools/Makefile.am,
+	  test/tools/lexc-compiler-functionality.sh,
+	  test/tools/xfail.sublexicon-defined-more-than-once.lexc: Added
+	  test case for previous lexc commit (sublexicon defined more than
+	  once treated as an error).
+
+2014-11-24 10:33  eaxelson
+
+	* libhfst/src/parsers/LexcCompiler.cc,
+	  libhfst/src/parsers/LexcCompiler.h,
+	  libhfst/src/parsers/lexc-parser.yy: Now multiple definitions of
+	  the same lexicon in lexc are treated as an error unless
+	  LexcCompiler::setAllowMultipleLexiconDefinitions(true) is called
+	  first.
+
+2014-11-24 09:04  eaxelson
+
+	* libhfst/src/parsers/lexc-lexer.ll,
+	  libhfst/src/parsers/lexc-parser.yy: Now lexc parser updates the
+	  error status hlexcnerrs when hlexcerror is called. In case of
+	  warnings, the error status is nor updated.
+
+2014-11-20 12:15  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h,
+	  libhfst/src/implementations/optimized-lookup/transducer.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h: Speed
+	  up list arc processing by replacing some maps and sets with
+	  vectors
+
+2014-11-19 09:17  eaxelson
+
+	* libhfst/src/HfstTransducer.cc, libhfst/src/HfstTransducer.h,
+	  libhfst/src/implementations/HfstTransitionGraph.h,
+	  libhfst/src/parsers/LexcCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.h,
+	  libhfst/src/parsers/xre_parse.yy,
+	  libhfst/src/parsers/xre_utils.cc,
+	  libhfst/src/parsers/xre_utils.h,
+	  tools/src/parsers/XfstCompiler.cc,
+	  tools/src/parsers/XfstCompiler.h: Changed list to set in merge
+	  operation. Removed commented code.
+
+2014-11-18 17:00  eaxelson
+
+	* tools/src/hfst-lexc-compiler.cc: Added option --encode-weights to
+	  hfst-lexc.
+
+2014-11-17 13:55  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Reinstate undefined symbols
+	  as valid tokens as per documentation
+
+2014-11-17 13:53  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc: Add defined lists
+
+2014-11-17 13:14  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Add Sigma()
+
+2014-11-17 11:48  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h,
+	  libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Added Lst() and support for
+	  list arcs in runtime
+
+2014-11-17 11:21  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Sync precedence rules
+
+2014-11-17 11:17  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Switch order of precedence
+	  of concatenation and other binary operations
+
+2014-11-17 07:47  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy: Added Lit()
+
+2014-11-13 07:21  eaxelson
+
+	* libhfst/src/HfstTransducer.cc, libhfst/src/HfstTransducer.h,
+	  libhfst/src/implementations/HfstTransitionGraph.h: Modified the
+	  function merge, it now takes as an argument a map of list
+	  symbols.
+
+2014-11-13 07:16  eaxelson
+
+	* libhfst/src/parsers/XreCompiler.cc,
+	  libhfst/src/parsers/XreCompiler.h,
+	  libhfst/src/parsers/xre_parse.yy,
+	  libhfst/src/parsers/xre_utils.cc,
+	  libhfst/src/parsers/xre_utils.h,
+	  tools/src/parsers/XfstCompiler.cc,
+	  tools/src/parsers/test/Makefile.am,
+	  tools/src/parsers/test/one_transition_regex.att,
+	  tools/src/parsers/test/one_transition_regex.xfst,
+	  tools/src/parsers/test/test.sh: Fixed a bug in xre parser, now
+	  definitions and unknowns can be used together in expressions such
+	  as 'regex [def:?] ;'.
+
+2014-11-11 14:06  eaxelson
+
+	* libhfst/src/HfstTokenizer.cc,
+	  tools/src/HfstStrings2FstTokenizer.cc: Now the epsilon symbol is
+	  not added as a multichar symbol to hfst-lookup tokenizer if it is
+	  the empty string. Should fix bug #275.
+
+2014-11-10 21:26  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_utils.cc: Allow \U00NNNNNN syntax for
+	  code points in utf-8 but above U+FFFF
+
+2014-11-10 20:55  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_utils.cc: Allow \UNNNN as well as
+	  \uNNNN
+
+2014-11-10 20:41  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_utils.cc: Allow \uNNNN in range
+	  notation and make some fixes to utf-8 handling
+
+2014-11-10 18:58  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Added utf-8 character range
+	  expressions
+
+2014-11-10 14:57  eaxelson
+
+	* libhfst/src/HfstTransducer.cc, libhfst/src/HfstTransducer.h,
+	  libhfst/src/implementations/HfstTransitionGraph.h,
+	  libhfst/src/parsers/xre_lex.ll, libhfst/src/parsers/xre_parse.yy:
+	  An untested implementation of the merge operation added to
+	  hfst-xfst.
+
+2014-11-10 13:23  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Syntax-level completion of
+	  functions (arg placement still not completely free)
+
+2014-11-04 16:35  eaxelson
+
+	* check_installation/copy-tool-tests.sh: Fixed a small bug in hfst
+	  tool tester.
+
+2014-11-04 10:23  eaxelson
+
+	* ChangeLog, ChangeLog.old, NEWS, configure.ac,
+	  libhfst/src/Makefile.am, swig/setup.py: Ready for release 3.8.1.
+
 2014-10-31 16:44  eaxelson
 
 	* tools/src/hfst-guess.cc: Fixed std::cout into &std::cout in
diff --git a/ChangeLog.old b/ChangeLog.old
index 982c46a..56e3859 100644
--- a/ChangeLog.old
+++ b/ChangeLog.old
@@ -1,3 +1,538 @@
+2014-10-31 16:44  eaxelson
+
+	* tools/src/hfst-guess.cc: Fixed std::cout into &std::cout in
+	  stream pointer comparison.
+
+2014-10-31 14:36  eaxelson
+
+	* back-ends/openfst/src/include/fst/accumulator.h,
+	  back-ends/openfst/src/include/fst/arc-map.h,
+	  back-ends/openfst/src/include/fst/determinize.h,
+	  back-ends/openfst/src/include/fst/encode.h,
+	  back-ends/openfst/src/include/fst/epsnormalize.h,
+	  back-ends/openfst/src/include/fst/equivalent.h,
+	  back-ends/openfst/src/include/fst/factor-weight.h,
+	  back-ends/openfst/src/include/fst/label-reachable.h,
+	  back-ends/openfst/src/include/fst/relabel.h,
+	  back-ends/openfst/src/include/fst/replace-util.h,
+	  back-ends/openfst/src/include/fst/replace.h,
+	  back-ends/openfst/src/include/fst/rmepsilon.h,
+	  back-ends/openfst/src/include/fst/rmfinalepsilon.h,
+	  back-ends/openfst/src/include/fst/sparse-tuple-weight.h,
+	  back-ends/openfst/src/include/fst/state-map.h,
+	  back-ends/openfst/src/include/fst/symbol-table-ops.h,
+	  back-ends/openfst/src/include/fst/synchronize.h,
+	  back-ends/openfst/src/include/fst/test-properties.h,
+	  back-ends/openfst/src/include/fst/util.h, configure.ac,
+	  tools/src/hfst-tagger/src/use_model_src/DataTypes.h,
+	  tools/src/hfst-tagger/src/use_model_src/NewLexicalModel.h: Now
+	  using definitions USE_TR1_UNORDERED_(MAP|SET) when defining what
+	  unordered maps and sets to use.
+
+2014-10-30 15:29  eaxelson
+
+	* configure.ac: Fixed a typo tr2 -> tr1.
+
+2014-10-29 13:50  eaxelson
+
+	* configure.ac, tools/src/HfstAlphabet.h,
+	  tools/src/hfst-tagger/src/use_model_src/NewLexicalModel.h:
+	  Unordered maps and sets are used from std namespace if
+	  -std=gnu++11 is requested.
+
+2014-10-28 11:40  eaxelson
+
+	* swig/setup.py: Forgot to update version number in swig bindings.
+
+2014-10-27 16:40  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy: Various improvements and
+	  additions to function syntax,
+	  particularly empty args and string-args
+
+2014-10-27 14:48  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  tools/src/parsers/XfstCompiler.cc: Added functions for merge
+	  operation in HfstTransitionGraph.
+
+2014-10-24 11:03  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  libhfst/src/implementations/HfstTropicalTransducerTransitionData.h:
+	  Modified weight handling in HfstTransitionGraph::intersect.
+
+2014-10-24 08:38  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/transducer.cc: When
+	  the alphabet is constructed from a symbol table, set identity to
+	  NO_SYM
+	  this was supposed to always happen anyway but didn't matter until
+	  recently
+
+2014-10-22 13:16  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  libhfst/src/implementations/HfstTropicalTransducerTransitionData.h:
+	  Modified intersection algorithms in HfstTransitionGraph.
+
+2014-10-22 10:21  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h: Added
+	  functions to be used in xerox's merge operation.
+
+2014-10-22 10:20  eaxelson
+
+	* tools/src/parsers/XfstCompiler.cc,
+	  tools/src/parsers/xfst-parser.yy: Small fixes to list definitions
+	  in hfst-xfst.
+
+2014-10-17 15:59  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  tools/src/parsers/XfstCompiler.cc,
+	  tools/src/parsers/XfstCompiler.h: Added an implementation for
+	  compile-replace in hfst-xfst, it still needs lot of testing.
+
+2014-10-16 14:36  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  tools/src/parsers/XfstCompiler.cc: Yet some more functions added
+	  to compile-replace.
+
+2014-10-16 11:48  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h,
+	  tools/src/parsers/XfstCompiler.cc: Added more functions for
+	  compile-replace.
+
+2014-10-15 14:23  eaxelson
+
+	* libhfst/src/implementations/HfstTransitionGraph.h: Tentatively
+	  added functions in HfstTransitionGraph to be used in
+	  compile-replace.
+
+2014-10-15 13:26  eaxelson
+
+	* tools/src/parsers/XfstCompiler.cc: Added function
+	  is_well_formed_for_compile_replace to be used in compile-replace
+	  command.
+
+2014-10-15 11:30  eaxelson
+
+	* test/libhfst/test_transducer_functions.cc: Forgot to comment out
+	  debugging prints in tests.
+
+2014-10-15 11:28  eaxelson
+
+	* back-ends/openfst/src/include/fst/interval-set.h,
+	  test/libhfst/test_transducer_functions.cc: Added brackets around
+	  member calls 'Interval.end' and 'Interval.begin' to avoid them
+	  getting confused with std::end() and std::begin() templates in
+	  C++11.
+
+2014-10-15 10:21  eaxelson
+
+	* back-ends/openfst/src/include/fst/interval-set.h: Rolled back
+	  earlier revision in interval-set.h
+
+2014-10-15 09:56  eaxelson
+
+	* back-ends/openfst/src/include/fst/interval-set.h,
+	  libhfst/src/implementations/ConvertTransducerFormat.h,
+	  libhfst/src/implementations/FomaTransducer.h: Made small
+	  modifications for better c++11/c++0x support.
+
+2014-10-15 09:32  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/transducer.cc: Fix
+	  bug where identity wasn't being set to NO_SYMBOL when absent from
+	  alphabet
+
+2014-10-15 08:46  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h: Refuse
+	  to enter flag loops more than once
+	  fixes bug #250 bug - arguably this could be applied to epsilon
+	  loops too,
+	  pending discussion
+
+2014-10-15 08:09  eaxelson
+
+	* man/Makefile.am: Fixed a typo in Makefile.
+
+2014-10-14 15:01  eaxelson
+
+	* tools/src/parsers/XfstCompiler.cc: Now hfst-xfst gives a warning
+	  (or exits) if a binary command tries to access a stack with less
+	  than 2 transducers.
+
+2014-10-14 14:51  eaxelson
+
+	* tools/src/parsers/XfstCompiler.cc,
+	  tools/src/parsers/XfstCompiler.h: Now hfst-xfst exits if a
+	  command tries to access an empty stack if quit-on-fail is ON and
+	  hfst-xfst is not in interactive mode.
+
+2014-10-14 13:44  mpsilfve
+
+	* scripts/hfst-fst2tesseract.xfst: Comment explaining usage of
+	  hfst-fst2tesseract.xfst.
+
+2014-10-14 13:40  mpsilfve
+
+	* scripts/hfst-fst2tesseract.xfst: Added script for converting
+	  morphological analyzers to Tesseract word models.
+
+2014-10-14 12:27  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h:
+	  Improvements to loop finding
+
+2014-10-14 12:08  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h:
+	  Further corrections to loop detection
+	  slowdown back to ~10x but may be improved from here
+
+2014-10-14 11:51  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h: Fix
+	  some cases of overdetecting infinite ambiguity, there's still
+	  some left
+
+2014-10-14 10:51  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc:
+	  Forgot to keep adding the repeated states in the loop detection
+	  phase
+
+2014-10-13 18:15  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h: Only
+	  try to catch infinite ambiguity at epsilon arcs
+	  This is the big speed win and presumably correct.
+
+2014-10-13 17:22  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc:
+	  This order of comparison is a bit faster since sizes never differ
+
+2014-10-13 16:34  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/find_epsilon_loops.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h: Speed
+	  up is_lookup_infinitely_ambiguous() somewhat
+
+2014-10-11 04:10  mie
+
+	* tools/src/hfst-optimized-lookup.cc: Try to avoid using negative
+	  indexes for arrays
+
+2014-10-11 03:59  mie
+
+	* test/tools/Makefile.am,
+	  test/tools/optimized-lookup-functionality.sh: Optimised lookup
+	  tests
+
+2014-10-09 17:22  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/transducer.cc,
+	  libhfst/src/implementations/optimized-lookup/transducer.h:
+	  Runtime handling of identity and unknown
+
+2014-10-09 12:30  hardwick
+
+	* libhfst/src/parsers/pmatch_utils.h: Forgot to remove one thing in
+	  the last commit
+
+2014-10-09 12:22  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc: Take out our own
+	  harmonization hacks now that they're unneeded
+
+2014-10-08 12:45  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Don't use delimiters when
+	  they're not necessary,
+	  also don't insert everything to RTNs anymore and provide the
+	  is_special()
+	  function the previous commit required
+
+2014-10-08 12:29  hardwick
+
+	* libhfst/src/HarmonizeUnknownAndIdentitySymbols.cc: Treat special
+	  pmatch symbols like flag diacritics for harmonization,
+	  also after harmonization add all symbols, including flags, to the
+	  alphabets
+
+2014-10-08 11:51  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Fix bug where delimiters
+	  were shadowing the named transducers' names
+	  also remove extraenous parsing path
+
+2014-10-06 21:33  janiemi
+
+	* test/tools/pmatch-tester.sh, test/tools/pmatch-tests.sh: Updated
+	  pmatch functionality tests.
+	  
+	  Current pmatch syntax: string literals in {...}, symbols in
+	  double quotes.
+	  Added tests: Ins maximizing globally; Difference and character
+	  sets in
+	  named expressions; Named expressions in OptCap, ToUpper; Named
+	  expressions
+	  in replace; Long input lines; Ins should not throw
+	  std::out_of_range;
+	  Disjunction of two Ins expressions.
+	  Added options: --include-tests, --exclude-tests,
+	  --no-number-tests,
+	  --truncate-lines, --truncate-log-lines. Minor new features in
+	  test runner.
+
+2014-10-06 17:27  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll: Add string literal syntax for
+	  standalone %-escaped chars
+	  (they used to be considered symbols which now have to be
+	  defined or cause an error)
+
+2014-10-06 16:44  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc,
+	  libhfst/src/implementations/optimized-lookup/pmatch.h,
+	  libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Use minimization guards to
+	  keep multiple negative contexts separate in disjunctions
+
+2014-10-06 15:36  eaxelson
+
+	* libhfst/src/HfstInputStream.h,
+	  libhfst/src/implementations/HfstTransitionGraph.h,
+	  libhfst/src/parsers/XreCompiler.h, swig/doc/libhfst.py,
+	  swig/hfstBot.py, swig/test/test_examples.py: Fixed some more
+	  spelling errors noticed by lintian.
+
+2014-10-06 14:45  eaxelson
+
+	* tools/src/hfst-lexc-wrapper.cc,
+	  tools/src/hfst-twolc/src/commandline_src/CommandLine.cc: Fixed
+	  spelling errors found by lintian.
+
+2014-10-06 14:33  eaxelson
+
+	* man/hfst-train-tagger.1: Added again hfst-train-tagger man page
+	  which is no more a symlink.
+
+2014-10-06 14:31  eaxelson
+
+	* man/Makefile.am, man/hfst-build-tagger.1,
+	  man/hfst-foma-wrapper.1, man/hfst-open-input-file-for-tagger.1,
+	  man/hfst-reweight-tagger.1, man/hfst-train-tagger.1,
+	  man/hfst-twolc-loc.1, man/hfst-twolc-system.1, man/hfst-twolc.1,
+	  man/hfst_tagger_compute_data_statistics.py.1, man/htwolcpre1.1,
+	  man/htwolcpre2.1, man/htwolcpre3.1: Added missing man pages.
+
+2014-10-06 14:00  eaxelson
+
+	* tools/src/hfst-tagger/src/hfst-reweight-tagger.cc: Now
+	  hfst-reweight-tagger --help returns EXIT_SUCCESS before trying to
+	  access uninitialized values.
+
+2014-10-06 13:24  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: Warn about shadowing
+	  definitions
+
+2014-10-06 13:05  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy: Alternate syntaxes regex for
+	  Define TOP and .#. for #
+
+2014-10-06 12:47  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Revamp LABEL parsing and
+	  introduce curly literal pairs
+
+2014-10-06 11:08  eaxelson
+
+	* man/Makefile.am, man/hfst-affix-guessify.1,
+	  man/hfst-apertium-proc.1, man/hfst-calculate.1,
+	  man/hfst-compare.1, man/hfst-compose-intersect.1,
+	  man/hfst-compose.1, man/hfst-concatenate.1, man/hfst-conjunct.1,
+	  man/hfst-determinise.1, man/hfst-determinize.1,
+	  man/hfst-disjunct.1, man/hfst-edit-metadata.1,
+	  man/hfst-expand-equivalences.1, man/hfst-expand.1,
+	  man/hfst-format.1, man/hfst-fst2fst.1, man/hfst-fst2strings.1,
+	  man/hfst-fst2txt.1, man/hfst-grep.1, man/hfst-guess.1,
+	  man/hfst-guessify.1, man/hfst-head.1, man/hfst-info.1,
+	  man/hfst-intersect.1, man/hfst-invert.1, man/hfst-lexc-wrapper.1,
+	  man/hfst-lexc.1, man/hfst-lookup.1, man/hfst-minimise.1,
+	  man/hfst-minimize.1, man/hfst-minus.1, man/hfst-multiply.1,
+	  man/hfst-name.1, man/hfst-open-input-file-for-tagger.1,
+	  man/hfst-optimised-lookup.1, man/hfst-optimized-lookup.1,
+	  man/hfst-pair-test.1, man/hfst-pmatch.1, man/hfst-pmatch2fst.1,
+	  man/hfst-proc2.1, man/hfst-project.1, man/hfst-prune-alphabet.1,
+	  man/hfst-push-weights.1, man/hfst-regexp2fst.1,
+	  man/hfst-remove-epsilons.1, man/hfst-repeat.1,
+	  man/hfst-reverse.1, man/hfst-reweight.1, man/hfst-sfstpl2fst.1,
+	  man/hfst-shuffle.1, man/hfst-split.1, man/hfst-strings2fst.1,
+	  man/hfst-substitute.1, man/hfst-subtract.1, man/hfst-summarise.1,
+	  man/hfst-summarize.1, man/hfst-tag.1, man/hfst-tail.1,
+	  man/hfst-train-tagger-loc.1, man/hfst-train-tagger-system.1,
+	  man/hfst-train-tagger.1, man/hfst-traverse.1, man/hfst-txt2fst.1,
+	  man/hfst-union.1, man/hfst-xfst.1: Updated and added man pages.
+
+2014-10-06 11:06  eaxelson
+
+	* tools/src/hfst-expand-equivalences.cc: Moved option checking
+	  after possible returning from program so that option --help will
+	  not generate error messages.
+
+2014-10-05 11:06  hardwick
+
+	* libhfst/src/parsers/pmatch_utils.cc: Require backslash character
+	  to be escaped as \\ in curly literals
+
+2014-10-05 10:53  hardwick
+
+	* libhfst/src/parsers/pmatch_utils.cc: Fix bug in unescaping
+	  function
+
+2014-10-03 12:28  eaxelson
+
+	* ChangeLog, ChangeLog.old, NEWS, configure.ac,
+	  libhfst/src/Makefile.am: Ready for release 3.8.0.
+
+2014-10-03 11:55  eaxelson
+
+	* tools/src/parsers/XfstCompiler.cc: Added variable
+	  'lexc-rename-flags' to hfst-xfst.
+
+2014-10-03 11:48  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Desperately fiddle with the
+	  way the minus operation expands things
+
+2014-10-01 16:29  hardwick
+
+	* libhfst/src/parsers/pmatch_lex.ll,
+	  libhfst/src/parsers/pmatch_parse.yy: Add lambda-like anonymous
+	  definitions for controlling subexpression boundaries
+
+2014-10-01 12:46  eaxelson
+
+	* tools/src/hfst-lexc-compiler.cc, tools/src/hfst-regexp2fst.cc,
+	  tools/src/parsers/XfstCompiler.cc: Changed the flag handling
+	  behavior of hfst-xfst and hfst-lexc. Now both tools by default
+	  use Xerox's way when composing, i.e. flag diacritics match
+	  unknown and identity symbols. This can be controlled with
+	  variable 'xerox-composition' (the default is ON) in hfst-xfst and
+	  with option '--xerox-composition={ON,OFF}' (the default is also
+	  ON) in hfst-lexc. hfst-regexp2fst also has the option
+	  '--xerox-composition' which by default is OFF, as it was earlier.
+
+2014-10-01 11:28  eaxelson
+
+	* tools/src/hfst-regexp2fst.cc: Fixed a typo in hfst-regexp2fst
+	  option handling.
+
+2014-09-30 15:38  eaxelson
+
+	* libhfst/src/HfstTransducer.cc: Now one-sided flag diacritics are
+	  allowed in composition when flag-is-epsilon is used.
+
+2014-09-30 13:54  eaxelson
+
+	* tools/src/hfst-strings2fst.cc: Added option --log10 for 10-based
+	  logarithmic weights in hfst-strings2fst.
+
+2014-09-30 13:18  eaxelson
+
+	* libhfst/src/HfstTransducer.cc, libhfst/src/parsers/xre_parse.yy,
+	  tools/src/parsers/XfstCompiler.cc: Now an error is thrown if
+	  flags are not twosided in composition when xerox composition is
+	  used.
+
+2014-09-30 13:05  eaxelson
+
+	* libhfst/src/HfstExceptionDefs.cc,
+	  libhfst/src/HfstExceptionDefs.h: Added exception class
+	  FlagDiacriticsAreNotIdentitesException.
+
+2014-09-29 11:22  hardwick
+
+	* test/tools/Makefile.am, test/tools/pmatch-functionality.sh,
+	  test/tools/pmatch-tester.sh, test/tools/pmatch-tests.sh: Add
+	  pmatch functionality test suite
+
+2014-09-29 10:49  eaxelson
+
+	* tools/src/hfst-regexp2fst.cc: Added option -X flag-is-epsilon to
+	  hfst-regexp2fst.
+
+2014-09-25 10:38  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Fix another symbol-leaking
+	  issue
+
+2014-09-25 10:17  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Allow nested logical
+	  operations on contexts
+
+2014-09-23 07:15  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc,
+	  libhfst/src/parsers/pmatch_utils.h: We need to avoid symbol
+	  pollution for more than just special symbols
+	  (this doesn't completely resolve pollution issues, just some
+	  urgent ones)
+
+2014-09-23 06:47  hardwick
+
+	* libhfst/src/implementations/optimized-lookup/pmatch.cc: Don't
+	  forget to pop the rtn stack when there's nothing matched
+
+2014-09-23 06:36  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Minimize after adding
+	  delimiters, not before
+
+2014-09-23 05:14  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Revert bracket-bounding
+	  behaviour
+
+2014-09-22 15:47  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy: Use brackets for extra
+	  delimiters to control tag and context boundaries more
+
+2014-09-22 15:28  hardwick
+
+	* libhfst/src/parsers/pmatch_parse.yy,
+	  libhfst/src/parsers/pmatch_utils.cc: add_delimiters was happening
+	  in the wrong place since recent syntax changes
+
 2014-09-22 12:57  eaxelson
 
 	* libhfst/src/implementations/HfstOlTransducer.cc: Fixed a too
diff --git a/NEWS b/NEWS
index 5c1c381..e937203 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,18 @@
 This file contains all noteworthy changes in HFST development between releases.
 For full listing of changes see ChangeLog.
 
+Noteworthy changes in 3.8.2
+---------------------------
+
+* Runtime speed improvements in pmatch
+
+* Speedup in conversion to optimized lookup format
+
+* Merge and compile-replace operations supported in hfst-xfst
+
+* Added option --print-symbol-pair-statistis(=N) to hfst-summarize
+
+
 Noteworthy changes in 3.8.1
 ---------------------------
 
diff --git a/check_installation/copy-tool-tests.sh b/check_installation/copy-tool-tests.sh
index d91adfa..72881c5 100755
--- a/check_installation/copy-tool-tests.sh
+++ b/check_installation/copy-tool-tests.sh
@@ -38,6 +38,8 @@ do
 	sed -i 's/-loc / /g' $file
     fi
 done
+# this file uses 'tooldir' instead of 'TOOLDIR'
+sed -i 's/$\tooldir\//$1/g' pmatch-tester.sh
 
 # These tests are rewritten in directory check_installation
 rm empty-input.sh
diff --git a/configure.ac b/configure.ac
index 9089334..f4c8c58 100644
--- a/configure.ac
+++ b/configure.ac
@@ -19,7 +19,7 @@
 HFST_NAME=hfst
 HFST_MAJOR=3
 HFST_MINOR=8
-HFST_EXTENSION=1
+HFST_EXTENSION=2
 HFST_VERSION=$HFST_MAJOR.$HFST_MINOR.$HFST_EXTENSION
 
 ### When the VERSION is INCREMENTED, REMEMBER to increment the LONGVERSION too.
@@ -28,10 +28,10 @@ HFST_VERSION=$HFST_MAJOR.$HFST_MINOR.$HFST_EXTENSION
 LIBHFST_NAME=hfst
 LIBHFST_MAJOR=3
 LIBHFST_MINOR=8
-LIBHFST_EXTENSION=1
+LIBHFST_EXTENSION=2
 LIBHFST_VERSION=$LIBHFST_MAJOR.$LIBHFST_MINOR.$LIBHFST_EXTENSION
 
-AC_INIT([hfst], [3.8.1], [hfst-bugs at helsinki.fi], [hfst])
+AC_INIT([hfst], [3.8.2], [hfst-bugs at helsinki.fi], [hfst])
 AC_CONFIG_AUX_DIR([build-aux])
 AM_INIT_AUTOMAKE([-Wall std-options foreign check-news])
 
@@ -44,8 +44,8 @@ AC_CONFIG_HEADERS([config.h libhfst/src/hfst.hpp])
 
 AC_SUBST([LIBHFST_MAJOR],     [3])
 AC_SUBST([LIBHFST_MINOR],     [8])
-AC_SUBST([LIBHFST_EXTENSION], [1])
-AC_SUBST([LIBHFST_VERSION],   [3.8.1])
+AC_SUBST([LIBHFST_EXTENSION], [2])
+AC_SUBST([LIBHFST_VERSION],   [3.8.2])
 AC_SUBST([LIBHFST_NAME],      [hfst])
 
 # long version = version vector cast in base 10000, for automatic comparisons
@@ -56,9 +56,9 @@ AC_SUBST([LIBHFST_NAME],      [hfst])
 # $LIBHFST_MINOR * 10000 + $LIBHFST_EXTENSION + "L"
 # NB! It turned out to be not portable, and can't be used!
 
-AC_DEFINE([HFST_LONGVERSION], [300080001L],
+AC_DEFINE([HFST_LONGVERSION], [300080002L],
           [Define to hfst version vector as long in base 10000])
-AC_DEFINE([HFST_REVISION], ["$Revision: 4088 $"],
+AC_DEFINE([HFST_REVISION], ["$Revision: 4145 $"],
           [Automatically substitute to configure.ac revision])
 AC_DEFINE_UNQUOTED([HFST_STRING], ["$PACKAGE_STRING"],
                    [Define to libhfst pretty name for programs to print])
@@ -570,7 +570,7 @@ AM_CONDITIONAL([CAN_DOXYGEN], [test x$DOXYGEN != xno])
 
 # Checks for libraries
 
-AC_CHECK_LIB([xml2], [main])
+#AC_CHECK_LIB([xml2], [main])
 AC_LANG_PUSH([C++])
 AS_IF([test "x$with_openfst" != "xno" -a "x$enable_mingw" == "xno"], 
       [AC_CHECK_LIB([dl], [main])])
diff --git a/libhfst/src/HfstTokenizer.cc b/libhfst/src/HfstTokenizer.cc
index 27c1260..8dd27b3 100644
--- a/libhfst/src/HfstTokenizer.cc
+++ b/libhfst/src/HfstTokenizer.cc
@@ -11,7 +11,9 @@
 //       along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 #include "HfstTokenizer.h"
+#include "HfstFlagDiacritics.h"
 #include <string>
+#include <cassert>
 
 #ifndef MAIN_TEST
 
@@ -111,7 +113,9 @@ const
 
 void
 HfstTokenizer::add_multichar_symbol(const string& symbol)
-{  multi_char_symbols.add(symbol.c_str()); }
+{  if (symbol == "")
+    { return; }
+  multi_char_symbols.add(symbol.c_str()); }
 
 void
 HfstTokenizer::add_skip_symbol(const std::string &symbol)
@@ -232,6 +236,130 @@ StringPairVector HfstTokenizer::tokenize
     }
   return spv;
 }
+
+StringPairVector HfstTokenizer::tokenize
+(const string& input_string,const string& output_string,
+ void (*warn_about_pair)(const std::pair<std::string, std::string> &symbol_pair)) const
+{
+  check_utf8_correctness(input_string);
+  check_utf8_correctness(output_string);
+
+  StringPairVector spv;
+  
+  StringPairVector input_spv = tokenize(input_string.c_str());
+  StringPairVector output_spv = tokenize(output_string.c_str());
+
+  if (input_spv.size() < output_spv.size())
+    {
+      StringPairVector::iterator jt = output_spv.begin();
+      for (StringPairVector::iterator it = input_spv.begin();
+           it != input_spv.end();
+           ++it)
+        { 
+          StringPair sp(it->first, jt->first);
+          warn_about_pair(sp);
+          spv.push_back(sp);
+          ++jt; }
+      for ( ; jt != output_spv.end(); ++jt)
+        { StringPair sp(internal_epsilon,jt->first);
+          warn_about_pair(sp);
+          spv.push_back(sp); }
+    }
+  else
+    {
+      StringPairVector::iterator it = input_spv.begin();
+      for (StringPairVector::iterator jt = output_spv.begin();
+           jt != output_spv.end();
+           ++jt)
+        { StringPair sp(it->first, jt->first);
+          warn_about_pair(sp);
+          spv.push_back(sp);
+          ++it; }
+      for ( ; it != input_spv.end(); ++it)
+        { StringPair sp(it->first,internal_epsilon);
+          warn_about_pair(sp);
+          spv.push_back(sp); }
+    }
+  return spv;
+}
+
+StringPairVector HfstTokenizer::tokenize_and_align_flag_diacritics
+(const string& input_string,const string& output_string,
+ void (*warn_about_pair)(const std::pair<std::string, std::string> &symbol_pair)) const
+{
+  check_utf8_correctness(input_string);
+  check_utf8_correctness(output_string);
+
+  StringPairVector spv;
+  
+  StringPairVector input_spv = tokenize(input_string.c_str());
+  StringPairVector output_spv = tokenize(output_string.c_str());
+  
+  assert(input_spv.size() > 0 && output_spv.size() > 0);
+  StringPairVector::const_iterator it = input_spv.begin();
+  StringPairVector::const_iterator jt = output_spv.begin();
+
+  // proceed until both token vectors are exhausted
+  while(it != input_spv.end() || jt != output_spv.end())
+    {
+      StringPair sp("", "");  // string pair to push back to the result
+      StringPair sp_cont("", "");  // possible continuation in case of missaligned flags
+
+      if (it == input_spv.end()) 
+        {
+          if (FdOperation::is_diacritic(jt->first)) // copy diacritic to other side
+            {
+              sp = StringPair(jt->first, jt->first);
+            }
+          else // pad input with epsilons
+            {
+              sp = StringPair(internal_epsilon, jt->first);
+            }
+          jt++;
+        }
+      else if (jt == output_spv.end()) 
+        {
+          if (FdOperation::is_diacritic(it->first)) // copy diacritic to other side
+            {
+              sp = StringPair(it->first, it->first);
+            }
+          else // pad output with epsilons
+            {
+              sp = StringPair(it->first, internal_epsilon);
+            }
+          it++;
+        }
+      else
+        {
+          // take from both vectors (cases foo:bar, foo:foo, flag1:flag1)
+          if ((!FdOperation::is_diacritic(it->first) && !FdOperation::is_diacritic(jt->first)) || 
+              *it == *jt)
+            {
+              sp = StringPair(it->first, jt->first);
+            }
+          // take first from first vector and then from second
+          // (cases flag1:flag2, flag1::bar, foo:flag2)
+          else
+            {
+              StringPair wrong_pair(it->first, jt->first);
+              warn_about_pair(wrong_pair);
+              sp = StringPair(it->first, it->first);
+              sp_cont = StringPair(jt->first, jt->first);
+            }
+          it++;
+          jt++;
+        }
+      
+      spv.push_back(sp);      
+      if (sp_cont.first.size() != 0 && sp_cont.second.size() != 0)
+        {
+          spv.push_back(sp_cont);
+        }      
+    }
+
+  return spv;
+}
+
   
   void 
   HfstTokenizer::check_utf8_correctness(const std::string &input_string)
diff --git a/libhfst/src/HfstTokenizer.h b/libhfst/src/HfstTokenizer.h
index 4174d99..d8e0cf5 100644
--- a/libhfst/src/HfstTokenizer.h
+++ b/libhfst/src/HfstTokenizer.h
@@ -128,6 +128,15 @@ namespace hfst
     StringPairVector tokenize(const std::string &input_string,
                               const std::string &output_string) const;
 
+    StringPairVector tokenize(const std::string &input_string,
+                              const std::string &output_string,
+                              void (*warn_about_pair)(const std::pair<std::string, std::string> &symbol_pair)) const;
+
+    StringPairVector tokenize_and_align_flag_diacritics
+      (const std::string &input_string,
+       const std::string &output_string,
+       void (*warn_about_pair)(const std::pair<std::string, std::string> &symbol_pair)) const;
+
     //! \brief If @a input_String is not valid utf-8, throw an
     //! @a IncorrectUtf8CodingException.
     //!
diff --git a/libhfst/src/HfstTransducer.cc b/libhfst/src/HfstTransducer.cc
index 2806202..d7c0083 100644
--- a/libhfst/src/HfstTransducer.cc
+++ b/libhfst/src/HfstTransducer.cc
@@ -3556,6 +3556,49 @@ bool substitute_unknown_identity_pairs
 }
 
 
+HfstTransducer &HfstTransducer::merge
+(const HfstTransducer &another, const struct hfst::xre::XreConstructorArguments & args)
+{
+  HfstBasicTransducer this_basic(*this);
+  HfstBasicTransducer another_basic(another);
+  std::set<std::string> markers_added;
+  HfstBasicTransducer result = hfst::implementations::HfstBasicTransducer::merge(this_basic, another_basic, args.list_definitions, markers_added);
+  HfstTransducer initial_merge(result, this->get_type());
+  initial_merge.minimize();
+
+  // filter non-optimal paths
+  // [ ? | #V ?:? ]* %#V:V ?:0 [ ? | #V ?:? | %#V:V ?:0 ]*
+  hfst::xre::XreCompiler xre_(args);
+  xre_.set_verbosity(false, NULL);
+
+  for (std::set<std::string>::const_iterator it = markers_added.begin(); it != markers_added.end(); it++)
+    {
+      std::string marker = *it;
+      std::string symbol(1, it->at(1)); // @X@ -> X
+      std::string worsener_string("[ ? | \"" + marker +  "\" ?:? ]* \"" + marker + "\":" + symbol + " ?:0 [ ? | \"" + marker + "\" ?:? | \"" + marker + "\":" + symbol + " ?:0 ]* ;");
+
+      HfstTransducer * worsener = xre_.compile(worsener_string);
+      assert(worsener != NULL);
+      worsener->minimize();
+      HfstTransducer cp(initial_merge);
+      cp.compose(*worsener).output_project().minimize();
+      delete worsener;
+
+      initial_merge.subtract(cp).minimize();
+      initial_merge.substitute(marker, internal_epsilon);
+
+      HfstBasicTransducer fsm(initial_merge);
+      StringSet symbols = fsm.symbols_used();
+      if (symbols.find(symbol) == symbols.end())
+        {
+          initial_merge.remove_from_alphabet(symbol);
+        }
+    }
+
+  *this = initial_merge;
+  return *this;
+}
+
 HfstTransducer &HfstTransducer::compose
 (const HfstTransducer &another,
  bool harmonize)
diff --git a/libhfst/src/HfstTransducer.h b/libhfst/src/HfstTransducer.h
index d1835c3..8fad045 100644
--- a/libhfst/src/HfstTransducer.h
+++ b/libhfst/src/HfstTransducer.h
@@ -1249,6 +1249,10 @@ ccc : ddd
     HfstTransducer &compose(const HfstTransducer &another, 
                             bool harmonize=true);
 
+    HfstTransducer &merge(const HfstTransducer &another, const std::map<std::string, std::set<std::string> > & list_symbols);
+
+    HfstTransducer &merge(const HfstTransducer &another, const struct hfst::xre::XreConstructorArguments & args);
+
     /** \brief Compose this transducer with the intersection of
         transducers in \a v. If \a invert is true, then compose the
         intersection of the transducers in \a v with this transducer.
diff --git a/libhfst/src/Makefile.am b/libhfst/src/Makefile.am
index 93f21d9..3da3c4e 100644
--- a/libhfst/src/Makefile.am
+++ b/libhfst/src/Makefile.am
@@ -97,7 +97,7 @@ HFST_HDRS = \
 
 hfstinclude_HEADERS = $(HFST_HDRS)
 
-libhfst_la_LDFLAGS = -no-undefined -version-info 39:0:0
+libhfst_la_LDFLAGS = -no-undefined -version-info 40:0:0
 
 LIBHFST_TSTS=HfstApply HfstInputStream HfstTransducer \
 		HfstOutputStream HfstXeroxRules HfstRules HfstSymbolDefs \
diff --git a/libhfst/src/implementations/ConvertOlTransducer.cc b/libhfst/src/implementations/ConvertOlTransducer.cc
index ad46db3..365d8ef 100644
--- a/libhfst/src/implementations/ConvertOlTransducer.cc
+++ b/libhfst/src/implementations/ConvertOlTransducer.cc
@@ -171,23 +171,17 @@ void get_states_and_symbols(
     StringSet * flag_diacritics = new StringSet();
     StringSet * other_symbols = new StringSet();
     
-    std::map<unsigned int, unsigned int> * relabeled_states =
-        new std::map<unsigned int, unsigned int>();
     unsigned int first_transition = 0;
-    unsigned int source_state=0;
+    unsigned int state_number = 0;
     for (HfstBasicTransducer::const_iterator it = t->begin(); 
          it != t->end(); ++it) {
-        unsigned int state_number = state_placeholders.size();
-        if (state_number != source_state) {
-            relabeled_states->operator[](source_state) = state_number;
-        }
         hfst_ol::Weight final_w = 0.0;
-        if (t->is_final_state(source_state)) {
-            final_w = t->get_final_weight(source_state);    
+        if (t->is_final_state(state_number)) {
+            final_w = t->get_final_weight(state_number);    
         }
         state_placeholders.push_back(hfst_ol::StatePlaceholder(
                                          state_number,
-                                         t->is_final_state(source_state),
+                                         t->is_final_state(state_number),
                                          first_transition,
                                          final_w));
         ++first_transition; // there's a padding entry between states
@@ -207,7 +201,7 @@ void get_states_and_symbols(
                 other_symbols->insert(tr_it->get_output_symbol());
             }
         }
-        source_state++;
+        ++state_number;
     }
 
     std::map<std::string, SymbolNumber> string_symbol_map;
@@ -265,41 +259,31 @@ void get_states_and_symbols(
     delete input_symbols;
     delete flag_diacritics;
     delete other_symbols;
-        
+
     // Do a second pass over the transitions, figuring out everything
     // about the states except starting indices
 
-    source_state=0;
+    state_number = 0;
     for (HfstBasicTransducer::const_iterator it = t->begin(); 
          it != t->end(); ++it) {
         for (HfstBasicTransducer::HfstTransitions::const_iterator tr_it 
                = it->begin();
              tr_it != it->end(); ++tr_it) {
-        unsigned int state_number = source_state;
-        if (relabeled_states->count(state_number) != 0) {
-        state_number = relabeled_states->operator[](state_number);
-        }
-            // check for previously unseen inputs
-            if (state_placeholders[state_number].inputs.count(
-                    string_symbol_map[tr_it->get_input_symbol()]) == 0) {
-                state_placeholders[state_number].inputs[
-                    string_symbol_map[tr_it->get_input_symbol()]] =
-                    std::vector<hfst_ol::TransitionPlaceholder>();
-            }
-        unsigned int target = tr_it->get_target_state();
-        if (relabeled_states->count(target) != 0) {
-        target = relabeled_states->operator[](target);
-        }
+            // add input in case we're seeing it the first time
+            state_placeholders[state_number].add_input(
+                string_symbol_map[tr_it->get_input_symbol()],
+                flag_symbols);
+            unsigned int target = tr_it->get_target_state();
             hfst_ol::TransitionPlaceholder trans(
                 target,
+                string_symbol_map[tr_it->get_input_symbol()],
                 string_symbol_map[tr_it->get_output_symbol()],
                 tr_it->get_weight());
-            state_placeholders[state_number]
-                .inputs[string_symbol_map[tr_it->get_input_symbol()]].push_back(trans);
+            SymbolNumber input_sym = string_symbol_map[tr_it->get_input_symbol()];
+            state_placeholders[state_number].add_transition(trans);
         }
-    source_state++;
+        ++state_number;
     }
-    delete relabeled_states;
 }
 
   /* Create an hfst_ol::Transducer equivalent to HfstBasicTransducer \a t.
@@ -333,12 +317,12 @@ void get_states_and_symbols(
                              flag_symbols,
                              harmonizer_ol);
 
-    // For determining the index table we first sort the states (excepting
+      // For determining the index table we first sort the states (excepting
     // the starting state) by number of different input symbols.
-    if (state_placeholders.begin() != state_placeholders.end()) {
-    std::sort(state_placeholders.begin() + 1, state_placeholders.end(),
-          hfst_ol::compare_states_by_input_size);
-    }
+    // if (state_placeholders.begin() != state_placeholders.end()) {
+    // std::sort(state_placeholders.begin() + 1, state_placeholders.end(),
+    //       hfst_ol::compare_states_by_input_size);
+    // }
 
     hfst_ol::IndexPlaceholders * used_indices =
         new hfst_ol::IndexPlaceholders();
@@ -360,107 +344,85 @@ void get_states_and_symbols(
     for (std::vector<hfst_ol::StatePlaceholder>::iterator it =
              state_placeholders.begin();
          it != state_placeholders.end(); ++it) {
-        if (it->is_simple(flag_symbols) and it->state_number != 0) {
+        if (it->is_simple()) {
             continue;
         }
         unsigned int i = first_available_index;
 
         // While this index is not suitable for a starting index, keep looking
-    if (!quick) {
         while (!used_indices->fits(*it, flag_symbols, i)) {
-        ++i;
+            ++i;
         }
-    }
         it->start_index = i;
-    previous_successful_index = i;
+        previous_successful_index = i;
         // Once we've found a starting index, insert a finality marker and
-    // mark all the used indices
-    used_indices->operator[](i) =
-        std::pair<unsigned int, SymbolNumber>(
-        it->state_number, NO_SYMBOL_NUMBER);
-        for (std::map<SymbolNumber,
-                 std::vector<hfst_ol::TransitionPlaceholder> >
-                 ::iterator sym_it = it->inputs.begin();
-             sym_it != it->inputs.end(); ++sym_it) {
-            SymbolNumber index_offset = sym_it->first;
+        // mark all the used indices
+        used_indices->assign(i, it->state_number, NO_SYMBOL_NUMBER);
+        for (std::vector<std::vector<hfst_ol::TransitionPlaceholder> >
+                 ::const_iterator tr_it = it->transition_placeholders.begin();
+             tr_it != it->transition_placeholders.end(); ++tr_it) {
+            SymbolNumber index_offset = tr_it->at(0).input;
             if (flag_symbols.count(index_offset) != 0) {
                 index_offset = 0;
             }
-            used_indices->operator[](i + index_offset + 1) =
-                std::pair<unsigned int, SymbolNumber>
-                (it->state_number, index_offset);
+            used_indices->assign(i + index_offset + 1, it->state_number, index_offset);
         }
-    if (quick) {
-        first_available_index = used_indices->rbegin()->first + 1;
-        continue;
-    }
-    while (used_indices->unsuitable(
-           first_available_index, seen_input_symbols,
-           packing_aggression)) {
-        ++first_available_index;
-    }
-    if (first_available_index == previous_first_index) {
-        if (floor_stuck_counter > floor_jump_threshold) {
-        SymbolNumber index_offset = it->inputs.rbegin()->first;
-        if (flag_symbols.count(index_offset) != 0) {
-            index_offset = 0;
-        }
-        first_available_index =
-            previous_successful_index + 1 + index_offset;
-        while (used_indices->unsuitable(
-               first_available_index,
-               seen_input_symbols, packing_aggression)) {
+
+        while (used_indices->unsuitable(first_available_index, seen_input_symbols, packing_aggression)) {
             ++first_available_index;
         }
-        floor_stuck_counter = 0;
-        previous_first_index = first_available_index;
+        if (first_available_index == previous_first_index) {
+            if (floor_stuck_counter > floor_jump_threshold) {
+                first_available_index = previous_successful_index + 1;
+                floor_stuck_counter = 0;
+                previous_first_index = first_available_index;
+            } else {
+                ++floor_stuck_counter;
+            }
         } else {
-        ++floor_stuck_counter;
+            previous_first_index = first_available_index;
+            floor_stuck_counter = 0;
         }
-    } else {
-        previous_first_index = first_available_index;
-        floor_stuck_counter = 0;
-    }
     }
 
     // Now resort by state number for the rest
     // (this could definitely be neater...)
-    if (state_placeholders.begin() != state_placeholders.end()) {
-    std::sort(state_placeholders.begin() + 1, state_placeholders.end(),
-          hfst_ol::compare_states_by_state_number);
-    }
+   // if (state_placeholders.begin() != state_placeholders.end()) {
+   // std::sort(state_placeholders.begin() + 1, state_placeholders.end(),
+   //       hfst_ol::compare_states_by_state_number);
+   // }
 
     // Now for each index entry we write its input symbol and target
 
     hfst_ol::TransducerTable<hfst_ol::TransitionWIndex> windex_table;
     
     unsigned int greatest_index = 0;
-    if (used_indices->size() != 0) {
-        greatest_index = used_indices->rbegin()->first;
+    if (used_indices->indices.size() != 0) {
+        greatest_index = used_indices->indices.size() - 1;
     }
 
     for(unsigned int i = 0; i <= greatest_index; ++i) {
-        if (used_indices->count(i) == 0) { // blank entries
+        if (!used_indices->used(i)) { // blank entries
             windex_table.append(hfst_ol::TransitionWIndex());
-        } else if (used_indices->operator[](i).second ==
-           NO_SYMBOL_NUMBER) { // finality markers
-        if (state_placeholders[used_indices->operator[](i).first].final) {
-        windex_table.append(
-            hfst_ol::TransitionWIndex::create_final(
-            state_placeholders[
-                used_indices->operator[](i).first].final_weight));
-        } else {
-        windex_table.append(hfst_ol::TransitionWIndex());
-        }
-    } else { // actual entries
-            unsigned int idx = used_indices->operator[](i).first;
-            SymbolNumber sym = used_indices->operator[](i).second;
+        } else if (used_indices->get_target(i).second ==
+                   NO_SYMBOL_NUMBER) { // finality markers
+            if (state_placeholders[used_indices->get_target(i).first].final) {
+                windex_table.append(
+                    hfst_ol::TransitionWIndex::create_final(
+                        state_placeholders[
+                            used_indices->get_target(i).first].final_weight));
+            } else {
+                windex_table.append(hfst_ol::TransitionWIndex());
+            }
+        } else { // actual entries
+            unsigned int idx = used_indices->get_target(i).first;
+            SymbolNumber sym = used_indices->get_target(i).second;
             windex_table.append(
-        hfst_ol::TransitionWIndex(
-            sym,
-            state_placeholders[idx].first_transition +
-            state_placeholders[idx].symbol_offset(
-            sym, flag_symbols) + TA_OFFSET));
+                hfst_ol::TransitionWIndex(
+                    sym,
+                    state_placeholders[idx].first_transition +
+                    state_placeholders[idx].symbol_offset(
+                        sym, flag_symbols) + TA_OFFSET));
         }
     }
 
diff --git a/libhfst/src/implementations/HfstTransitionGraph.h b/libhfst/src/implementations/HfstTransitionGraph.h
index 24096f2..8641699 100644
--- a/libhfst/src/implementations/HfstTransitionGraph.h
+++ b/libhfst/src/implementations/HfstTransitionGraph.h
@@ -276,7 +276,7 @@
 
      /* Check that all symbols that occur in the transitions of the graph
         are also in the alphabet. */
-     bool check_alphabet() 
+     bool check_alphabet()
      {
            for (iterator it = begin(); it != end(); it++)
              {
@@ -2732,7 +2732,7 @@
                  {
                    HfstState new_state = add_state();
                    std::string marker = weight2marker(IT->get_weight());
-                   std::cerr << "got marker '" << marker << "'" << std::endl;
+                   //std::cerr << "got marker '" << marker << "'" << std::endl;
                    HfstTransition <C> marker_transition(IT->get_target_state(),
                                                         marker,
                                                         marker,
@@ -2949,7 +2949,7 @@
                    if ( (!marker2weight(data.get_input_symbol(), weight)) && 
                         marker2weight(data.get_output_symbol(), weight) )
                      {
-                       std::cerr << "got weight '" << weight << "'" << std::endl;
+                       //std::cerr << "got weight '" << weight << "'" << std::endl;
                        // schedule a substitution
                        new_transitions.push_back
                          (HfstTransition <C> (tr_it->get_target_state(), 
@@ -2963,7 +2963,7 @@
                    else if (marker2weight(data.get_input_symbol(), weight) &&
                             marker2weight(data.get_output_symbol(), weight) )
                      {
-                       std::cerr << "got weight '" << weight << "'" << std::endl;
+                       //std::cerr << "got weight '" << weight << "'" << std::endl;
                        // schedule the old transition to be deleted
                        old_transitions.push(tr_it);
                      }
@@ -3869,24 +3869,24 @@
          }
 
          // Returns whether tr is "^]":"^]". If tr is not allowed, throws an error message.
-         bool check_regexp_transition_end(const HfstBasicTransition & tr)
+         bool check_regexp_transition_end(const HfstBasicTransition & tr, bool input_side)
          {
            std::string istr = tr.get_input_symbol();
            std::string ostr = tr.get_output_symbol();
-           if (is_special_symbol(istr) || is_special_symbol(ostr))
+           if ((input_side && is_special_symbol(istr)) || (!input_side && is_special_symbol(ostr)))
              {
                throw "error: special symbol detected in compile-replace regular expression";
              } 
-           if (("^[" == istr) || ("^[" == ostr))
+           if ((input_side && ("^[" == istr)) || (!input_side && ("^[" == ostr)))
              {
                throw "error: ^[ detected inside compile-replace regular expression";
              }
-           if (("^]" == istr) || ("^]" == ostr))
+           if ((input_side && ("^]" == istr)) || (!input_side && ("^]" == ostr)))
              {
-               if (istr != ostr)
+               /*if (istr != ostr)
                  {
                    throw "error: ^] detected on only one side of transition inside compile-replace regular expression";
-                 }
+                   }*/
                return true;
              }
            return false;
@@ -3904,7 +3904,7 @@
            (HfstState s, 
             std::set<HfstState> & states_visited, 
             std::vector<std::pair<std::string, std::string> > & path, 
-            HfstReplacements & full_paths)
+            HfstReplacements & full_paths, bool input_side)
            {
              // no cycles allowed inside "^[" and "^]"
              check_regexp_state_for_cycle(s, states_visited);
@@ -3918,11 +3918,12 @@
                   it != transitions.end(); it++)
                {
                  // closing bracket..
-                 if (check_regexp_transition_end(*it)) // throws error message if *it is not a valid transition
+                 if (check_regexp_transition_end(*it, input_side)) // throws error message if *it is not a valid transition
                    {
                      // ..cannot lead to a state already visited..
                      check_regexp_state_for_cycle(it->get_target_state(), states_visited);
                      // ..but else we can add the expression that it ends to the results
+                     path.push_back(std::pair<std::string, std::string>(it->get_input_symbol(), it->get_output_symbol()));
                      full_paths.push_back
                        (HfstReplacement(it->get_target_state(), path));
                    }
@@ -3934,7 +3935,7 @@
                        (it->get_target_state(),
                         states_visited,
                         path,
-                        full_paths);
+                        full_paths, input_side);
                      path.pop_back();
                    }   
                }
@@ -3953,7 +3954,8 @@
          // Weights are currently ignored.
          void find_regexp_paths
            (HfstState s,
-            std::vector<std::pair<HfstState, std::vector<std::pair<std::string, std::string> > > > & full_paths)
+            std::vector<std::pair<HfstState, std::vector<std::pair<std::string, std::string> > > > & full_paths, 
+            bool input_side)
          {
            // go through all transitions
            const HfstBasicTransducer::HfstTransitions &transitions 
@@ -3964,17 +3966,18 @@
                {
                  std::string istr = it->get_input_symbol();
                  std::string ostr = it->get_output_symbol();
-                 if ("^[" == istr || "^[" == ostr)
+                 if ((input_side && ("^[" == istr)) || (!input_side && ("^[" == ostr)))
                    {
-                     if (istr != ostr)
+                     /*if (istr != ostr)
                        {
                          throw "error: ^[ detected on only one side of transition";
-                       }
+                         }*/
                      std::set<HfstState> states_visited;
                      states_visited.insert(s);
                      std::vector<std::pair<std::string, std::string> > path; 
-                     find_regexp_paths(it->get_target_state(), states_visited, path, full_paths);
-                     fprintf(stderr, "%u regexp paths found for state %u\n", (unsigned int)full_paths.size(), s);
+                     path.push_back(std::pair<std::string, std::string>(istr, ostr));
+                     find_regexp_paths(it->get_target_state(), states_visited, path, full_paths, input_side);
+                     //fprintf(stderr, "%u regexp paths found for state %u\n", (unsigned int)full_paths.size(), s); // debug
                    }
                }
          }
@@ -3982,15 +3985,15 @@
          // Find all subpaths of form "^[" [x:y]* "^]" (x and y cannot be "^[" or "^]") and return them.
          // retval[start_state] == vector(pair(end_state, vector(pair(isymbol,osymbol) ) ) )
          // Weights are currently ignored.
-         HfstReplacementsMap find_replacements()
+         HfstReplacementsMap find_replacements(bool input_side)
          {
            HfstReplacementsMap replacements;
            unsigned int state = 0;
            for (iterator it = begin(); it != end(); it++)
              {
-               fprintf(stderr, "state %u......\n", state);
+               //fprintf(stderr, "state %u......\n", state); // debug
                HfstReplacements full_paths;
-               find_regexp_paths(state, full_paths);
+               find_regexp_paths(state, full_paths, input_side);
                if (full_paths.size() > 0)
                  {
                    replacements[state] = full_paths;
@@ -4184,7 +4187,7 @@
            graph1.sort_arcs();
            graph2.sort_arcs();
            state_map[StatePair(0, 0)] = 0;   // initial states
-
+           
            if (graph1.is_final_state(0) && graph2.is_final_state(0))
              {
                float final_weight = std::min(graph1.get_final_weight(0), graph2.get_final_weight(0));
@@ -4198,6 +4201,9 @@
 
 
 
+
+         // HERE BEGINS
+
            // A function used by find_matches_for_merge
            // Copy matching transition graph_tr/merger_tr to state \a result_state in \a result and return
            // the target state of that transition. Also make that state final, if needed.
@@ -4229,8 +4235,7 @@
            // the target state of that transition. Also make that state final, if needed.
            static HfstState handle_list_match(const HfstTransitionGraph & graph, const HfstTransition <C> & graph_transition,
                                               const HfstTransitionGraph & merger, const HfstTransition <C> & merger_transition,
-                                              HfstTransitionGraph & result, HfstState result_state, StateMap & state_map)
-                                    
+                                              HfstTransitionGraph & result, HfstState result_state, StateMap & state_map, std::set<std::string> & markers_added)
            {
              HfstState graph_target = graph_transition.get_target_state();
              HfstState merger_target = merger_transition.get_target_state();
@@ -4239,8 +4244,16 @@
                (graph_target, merger_target, state_map, result, was_new_state);
              // The sum of weight is copied to the resulting intersection.
              float transition_weight = graph_transition.get_weight() + merger_transition.get_weight();
+             
+             // testing: add a marker
+             HfstState extra_state = result.add_state();
              result.add_transition
                (result_state, HfstTransition <C> 
+                (extra_state, "@" + graph_transition.get_input_symbol() + "@", "@" + graph_transition.get_output_symbol() + "@", 0));
+             markers_added.insert("@" + graph_transition.get_input_symbol() + "@");
+
+             result.add_transition
+               (extra_state /*result_state*/, HfstTransition <C> 
                 (retval, merger_transition.get_input_symbol(), merger_transition.get_output_symbol(), transition_weight)); 
              // For each new state added, check if the corresponding states in \a graph1 and \a graph2
              // are final. If they are, make the new state final with the sum of final weights.
@@ -4251,17 +4264,55 @@
                }
              return retval;
            }
+           
 
-
-           static bool is_list_symbol(const C & transition_data)
+              
+           static bool is_list_symbol(const C & transition_data, const std::map<std::string, std::set<std::string> > & list_symbols)
            {
-             return false;
+             std::string isymbol = transition_data.get_input_symbol();
+             std::string osymbol = transition_data.get_output_symbol();
+
+             if (isymbol != osymbol)
+               {
+                 throw "is_list_symbol: input and output symbols must be the same";
+               }
+             return (list_symbols.find(isymbol) != list_symbols.end());
            }
 
-           static bool is_list_match(const C & graph_transition_data, const C & merger_transition_data)
+           /*
+           // @pre \a transition_data is a list symbol
+           // @pre list symbols cannot contain '_' or '@'
+           static std::set<std::string> get_list_symbols(const std::string & list_symbol)
            {
-             return false;
-           }
+             std::set<std::string> result;
+             unsigned int i = 6;
+
+             // skip list name
+             while(list_symbol[i] != '_')
+               {
+                 i++;
+               }
+             i++;
+
+             // extract symbols
+             std::string symbol("");
+             while (list_symbol[i] != '@')
+               {
+                 if (list_symbol[i] == '_')
+                   {
+                     result.insert(symbol);
+                     symbol = std::string("");
+                   }
+                 else
+                   {
+                     symbol.append(1, list_symbol[i]);
+                   }
+                 i++;
+               }
+             result.insert(symbol);
+
+             return result;
+             }*/
 
            // A recursive function used by function intersect.
            //
@@ -4278,7 +4329,8 @@
            // @pre \a graph and \a merger must be deterministic. (todo: handle equivalent transitions, maybe even epsilons?)
            static void find_matches_for_merge
              (HfstTransitionGraph & graph, HfstState graph_state, HfstTransitionGraph & merger, HfstState merger_state,
-              HfstTransitionGraph & result, HfstState result_state, StateMap & state_map, std::set<HfstState> & agenda)
+              HfstTransitionGraph & result, HfstState result_state, StateMap & state_map, std::set<HfstState> & agenda,
+              const std::map<std::string, std::set<std::string> > & list_symbols, std::set<std::string> & markers_added)
            {
              agenda.insert(result_state);  // do not handle \a result_state twice
              HfstTransitions & graph_transitions = graph.state_vector[graph_state]; // transitions of graph
@@ -4296,22 +4348,30 @@
                  const C & graph_transition_data = graph_transition.get_transition_data();
 
                  // List symbols must be checked separately
-                 if (is_list_symbol(graph_transition_data))
+                 if (is_list_symbol(graph_transition_data, list_symbols))
                    {
+                     const std::set<std::string> & symbol_list = list_symbols.find(graph_transition_data.get_input_symbol())->second;
                      bool list_match_found=false;
                      // Find all matches
                      for(unsigned int j=0; j < merger_transitions.size(); j++)
                        {
                          HfstTransition <C> & merger_transition = merger_transitions[j];
                          const C & merger_transition_data = merger_transition.get_transition_data();
+                         const std::string & isymbol = merger_transition_data.get_input_symbol();
+                         const std::string & osymbol = merger_transition_data.get_output_symbol();
+
+                         if (isymbol != osymbol)
+                           {
+                             throw "find_matches_for_merge: input and output symbols must be the same";
+                           }
 
-                         if (is_list_match(graph_transition_data, merger_transition_data))
+                         if (symbol_list.find(isymbol) != symbol_list.end())
                            {
                              list_match_found=true;
-                             HfstState target = handle_list_match(graph, graph_transition, merger, merger_transition, result, result_state, state_map);
+                             HfstState target = handle_list_match(graph, graph_transition, merger, merger_transition, result, result_state, state_map, markers_added);
                              if (agenda.find(target) == agenda.end())
                                {
-                                 find_matches_for_merge(graph, graph_transition.get_target_state(), merger, merger_transition.get_target_state(), result, target, state_map, agenda);
+                                 find_matches_for_merge(graph, graph_transition.get_target_state(), merger, merger_transition.get_target_state(), result, target, state_map, agenda, list_symbols, markers_added);
                                }
                            }
                        }
@@ -4325,7 +4385,7 @@
                  HfstState target = handle_non_list_match(graph, graph_transition, merger, merger_state, result, result_state, state_map);
                  if (agenda.find(target) == agenda.end())
                    {
-                     find_matches_for_merge(graph, graph_transition.get_target_state(), merger, /*merger_transition.get_target_state()*/ merger_state, result, target, state_map, agenda);
+                     find_matches_for_merge(graph, graph_transition.get_target_state(), merger, /*merger_transition.get_target_state()*/ merger_state, result, target, state_map, agenda, list_symbols, markers_added);
                    }
                  // --- A transition in graph compared for all corresponding transitions in merger, compare next transition. --- 
                }
@@ -4334,7 +4394,7 @@
            }
 
          static HfstTransitionGraph merge
-           (HfstTransitionGraph & graph, HfstTransitionGraph & merger)
+           (HfstTransitionGraph & graph, HfstTransitionGraph & merger, const std::map<std::string, std::set<std::string> > & list_symbols, std::set<std::string> & markers_added)
          {
            HfstTransitionGraph result;
            StateMap state_map;
@@ -4349,7 +4409,14 @@
                result.set_final_weight(0, final_weight);
              }
            
-           find_matches_for_merge(graph, 0, merger, 0, result, 0, state_map, agenda);
+           try 
+             {
+               find_matches_for_merge(graph, 0, merger, 0, result, 0, state_map, agenda, list_symbols, markers_added);
+             }
+           catch (const char * msg)
+             {
+               HFST_THROW_MESSAGE(TransducersAreNotAutomataException, std::string(msg));
+             }
 
            return result;
          }
@@ -4400,8 +4467,8 @@
       HfstFastTransducer;
 
  
-       }
+   }
    
-}
+ }
 
 #endif // #ifndef _HFST_TRANSITION_GRAPH_H_
diff --git a/libhfst/src/implementations/optimized-lookup/convert.cc b/libhfst/src/implementations/optimized-lookup/convert.cc
index 90b5da6..a7f3363 100644
--- a/libhfst/src/implementations/optimized-lookup/convert.cc
+++ b/libhfst/src/implementations/optimized-lookup/convert.cc
@@ -36,36 +36,36 @@ void write_transitions_from_state_placeholders(
         // Then we iterate through the symbols each state has.
     // First we do a pass for epsilon and flags (they have to come
     // first), then everything else.
-    if (it->inputs.count(0) != 0) {
-        add_transitions_with(0, it->inputs[0],
-                 transition_table,
-                 state_placeholders,
-                 flag_symbols);
+        if (it->input_present(0)) {
+            add_transitions_with(0, it->get_transition_placeholders(0),
+                                 transition_table,
+                                 state_placeholders,
+                                 flag_symbols);
     }
     for (std::set<hfst_ol::SymbolNumber>::iterator flag_it =
          flag_symbols.begin(); flag_it != flag_symbols.end();
          ++flag_it) {
-        if (it->inputs.count(*flag_it) != 0) {
-        hfst_ol::add_transitions_with(*flag_it,
-                          it->inputs[*flag_it],
-                          transition_table,
-                          state_placeholders,
-                          flag_symbols);
-        
+        if (it->input_present(*flag_it)) {
+            hfst_ol::add_transitions_with(
+                *flag_it,
+                it->get_transition_placeholders(*flag_it),
+                transition_table,
+                state_placeholders,
+                flag_symbols);
+            
         }
     }
-        for (std::map<SymbolNumber,
-         std::vector<TransitionPlaceholder> >::iterator sym_it =
-                 it->inputs.begin(); 
-             sym_it != it->inputs.end(); ++sym_it) {
-            if (sym_it->first == 0 or flag_symbols.count(sym_it->first) != 0) {
+    for (unsigned int i = 1; i < it->symbol_to_transition_placeholder_v.size();
+         ++i) {
+        if (!it->input_present(i) ||
+            flag_symbols.count(i) != 0) {
         continue;
         }
-        hfst_ol::add_transitions_with(sym_it->first,
-                      it->inputs[sym_it->first],
-                      transition_table,
-                      state_placeholders,
-                      flag_symbols);
+        hfst_ol::add_transitions_with(i,
+                                      it->get_transition_placeholders(i),
+                                      transition_table,
+                                      state_placeholders,
+                                      flag_symbols);
     }
     }
 
@@ -88,7 +88,7 @@ void add_transitions_with(SymbolNumber symbol,
     // before writing each transition, find out whether its
     // target is simple (ie. should point directly to TA entry)
     unsigned int target;
-    if (state_placeholders[it->target].is_simple(flag_symbols)) {
+    if (state_placeholders[it->target].is_simple()) {
         target = state_placeholders[it->target].first_transition + 
         TRANSITION_TARGET_TABLE_START - 1;
     } else {
@@ -106,7 +106,7 @@ bool compare_states_by_input_size(
     const StatePlaceholder & lhs, const StatePlaceholder & rhs)
 {
     // descending by input size
-    return lhs.inputs.size() > rhs.inputs.size();
+    return lhs.inputs > rhs.inputs;
 }
 
 bool compare_states_by_state_number(
@@ -504,7 +504,7 @@ void ConvertFstState::set_transition_indices(void)
         previous_symbol = input_symbol;
       }
     }
-    if(input_symbol == 0) { zero_transitions = true; }		
+    if(input_symbol == 0) { zero_transitions = true; }
     ++position;
   }
 }
@@ -724,9 +724,9 @@ PlaceHolderVector::size_type ConvertTransitionTableIndices::add_state(
   {
     // Only try the first 100 indices.
     //    if (index > lower_bound+100000)
-    //	{
-    //	  index = last_full_index()+1;
-    //	}
+    //{
+    //  index = last_full_index()+1;
+    //}
     if((index + number_of_input_symbols + 1) >= indices.size())
       get_more_space();
     
@@ -872,7 +872,7 @@ void ConvertTransducerHeader::compute_header(TransducerHeader& header,
   if(!header.weighted)
     header.has_unweighted_input_epsilon_cycles =
     header.has_input_epsilon_cycles;
-}	
+}
 
 
 ConvertTransducer* ConvertTransducer::constructing_transducer = NULL;
diff --git a/libhfst/src/implementations/optimized-lookup/convert.h b/libhfst/src/implementations/optimized-lookup/convert.h
index b68e4ef..58d137d 100644
--- a/libhfst/src/implementations/optimized-lookup/convert.h
+++ b/libhfst/src/implementations/optimized-lookup/convert.h
@@ -27,25 +27,32 @@ namespace hfst_ol {
 
 struct TransitionPlaceholder {
     unsigned int target;
+    SymbolNumber input;
     SymbolNumber output;
     float weight;
 
-TransitionPlaceholder(unsigned int t, SymbolNumber o, float w):
+    TransitionPlaceholder(unsigned int t, SymbolNumber i, SymbolNumber o, float w):
     target(t),
+    input(i),
     output(o),
     weight(w)
     {}
 };
 
-typedef std::map<SymbolNumber, std::vector<TransitionPlaceholder> >
-    SymbolTransitionsMap;
+//typedef std::map<SymbolNumber, std::vector<TransitionPlaceholder> >
+//    SymbolTransitionsMap;
 
 
 struct StatePlaceholder {
+    enum indexing_type {empty, simple_zero_index, simple_nonzero_index, nonsimple};
+    
     unsigned int state_number;
     unsigned int start_index;
     unsigned int first_transition;
-    SymbolTransitionsMap inputs;
+    std::vector<unsigned int> symbol_to_transition_placeholder_v;
+    std::vector<std::vector<TransitionPlaceholder> > transition_placeholders;
+    indexing_type type;
+    SymbolNumber inputs;
     bool final;
     float final_weight;
     StatePlaceholder (unsigned int state, bool finality, unsigned int first,
@@ -54,101 +61,136 @@ struct StatePlaceholder {
     start_index(UINT_MAX),
     first_transition(first),
     final(finality),
-    final_weight(final_weight)
-    {}
+    final_weight(final_weight),
+    type(state == 0 ? nonsimple: empty),
+    inputs(0)
+        { }
     StatePlaceholder ():
     state_number(UINT_MAX),
     start_index(UINT_MAX),
     first_transition(UINT_MAX),
     final(false),
-    final_weight(0.0)
+    final_weight(0.0),
+    type(empty),
+    inputs(0)
     { }
     
-    bool is_simple(std::set<SymbolNumber> const & flag_symbols) const
+    bool is_simple(void) const
     {
-        if (state_number == 0) {
-        return false;
-        }
-        if (flag_symbols.size() == 0) {
-        return inputs.size() < 2;
-        }
-        bool have_zero = false;
-        SymbolNumber input_symbols = 0;
-        for(SymbolTransitionsMap::const_iterator it = inputs.begin();
-        it != inputs.end(); ++it) {
-        if ((it->first == 0) or (flag_symbols.count(it->first) != 0)) {
-            if (!have_zero) {
-            have_zero = true;
-            ++input_symbols;
-            }
-        } else {
-            ++input_symbols;
-        }
-        if (input_symbols > 1) {
-            return false;
-        }
-        }
-        return true;
+        return type != nonsimple;
     }
     
     unsigned int number_of_transitions(void) const {
-    unsigned int count = 0;
-    for(SymbolTransitionsMap::const_iterator it = inputs.begin();
-        it != inputs.end(); ++it) {
-        count += it->second.size();
+        unsigned int count = 0;
+        for(std::vector<std::vector<TransitionPlaceholder> >::const_iterator it
+                = transition_placeholders.begin();
+            it != transition_placeholders.end(); ++it) {
+            count += it->size();
+        }
+        return count;
     }
-    return count;
+
+    bool input_present(SymbolNumber input) const {
+        return input < symbol_to_transition_placeholder_v.size() &&
+                       symbol_to_transition_placeholder_v[input] != UINT_MAX;
     }
+
+    void add_input(SymbolNumber input, std::set<SymbolNumber> const & flag_symbols)
+        {
+            if (input_present(input)) {
+                return;
+            }
+            while (symbol_to_transition_placeholder_v.size() <= input) {
+                symbol_to_transition_placeholder_v.push_back(UINT_MAX);
+            }
+            symbol_to_transition_placeholder_v[input] = transition_placeholders.size();
+            transition_placeholders.push_back(std::vector<TransitionPlaceholder>());
+            ++inputs;
+            if (type != nonsimple) {
+                // Depending on what type of inputs we now have, adjust the index type.
+                // Epsilons and flags both index to 0. If we have only one input symbol,
+                // we're simple.
+                if (type == empty) {
+                    if (input == 0 || flag_symbols.count(input) == 1) {
+                        type = simple_zero_index;
+                    } else {
+                        type = simple_nonzero_index;
+                    }
+                } else if (type == simple_zero_index) {
+                    if (input != 0 && flag_symbols.count(input) == 0) {
+                        type = nonsimple;
+                    }
+                } else { // simple_nonzero_index
+                    if (inputs > 1 || input == 0 || flag_symbols.count(input) == 1) {
+                        type = nonsimple;
+                    }
+                }
+            }
+        }
+
+    SymbolNumber get_largest_index(void)
+        {
+            return transition_placeholders[symbol_to_transition_placeholder_v.back()][0].input;
+        }
+
+    void add_transition(TransitionPlaceholder & trans)
+        {
+            transition_placeholders[symbol_to_transition_placeholder_v[trans.input]].push_back(trans);
+        }
+
+    std::vector<TransitionPlaceholder> & get_transition_placeholders(SymbolNumber input)
+        {
+            return transition_placeholders[symbol_to_transition_placeholder_v[input]];
+        }
     
     unsigned int symbol_offset(
         SymbolNumber const symbol,
-        std::set<SymbolNumber> const & flag_symbols) const {
+        std::set<SymbolNumber> const & flag_symbols) {
         if (symbol == 0) {
             return 0;
         }
         unsigned int offset = 0;
-        if (flag_symbols.size() == 0) {
-            for(SymbolTransitionsMap::const_iterator it = inputs.begin();
-                it!= inputs.end(); ++it) {
-                if (symbol == it->first) {
-                    return offset;
-                }
-                offset += it->second.size();
-            }
-            
-        } else {
-            if (inputs.count(0) != 0) { // if there are epsilons
-                offset = inputs.find(0)->second.size();
-            }
-            for(std::set<SymbolNumber>::iterator flag_it = flag_symbols.begin();
-                flag_it != flag_symbols.end(); ++flag_it) {
-                if (inputs.count(*flag_it) != 0) { // if this flag is present
-                    if (symbol == *flag_it) {
-                        // Flags go to 0 (even if there's no epsilon)
-                        return 0;
-                    }
-                    offset += inputs.find(*flag_it)->second.size();
+        // if (flag_symbols.size() == 0) {
+        //     for(int i = 0; i < symbol_to_transition_placeholder_v.size(); ++i) {
+        //         if (symbol_to_transition_placeholder_v[i] != UINT_MAX) {
+        //             if (symbol == i) {
+        //                 return offset;
+        //             }
+        //             offset += get_transition_placeholders(i).size();
+        //         }
+        //     }
+        // } else {
+        if (input_present(0)) { // if there are epsilons
+            offset = get_transition_placeholders(0).size();
+        }
+        for(std::set<SymbolNumber>::iterator flag_it = flag_symbols.begin();
+            flag_it != flag_symbols.end(); ++flag_it) {
+            if (input_present(*flag_it)) {
+                if (symbol == *flag_it) {
+                    // Flags go to 0 (even if there's no epsilon)
+                    return 0;
                 }
+                offset += get_transition_placeholders(*flag_it).size();
             }
-            for(SymbolTransitionsMap::const_iterator it = inputs.begin();
-                it!= inputs.end(); ++it) {
-                if (it->first == 0 || flag_symbols.count(it->first) != 0) {
+        }
+        for(unsigned int i = 1; i < symbol_to_transition_placeholder_v.size(); ++i) {
+            if (input_present(i)) {
+                if (flag_symbols.count(i) != 0) {
+                    // already counted
                     continue;
                 }
-                if (symbol == it->first) {
+                if (symbol == i) {
                     return offset;
                 }
-                offset += it->second.size();
+                offset += get_transition_placeholders(i).size();
             }
-            std::string message("error in conversion between optimized lookup "
-                                "format and HfstTransducer;\ntried to calculate "
-                                "symbol_offset for symbol not present in state");
-            HFST_THROW_MESSAGE
-                (HfstFatalException,
-                 message);
         }
-        std::string message("error in function StatePlaceholder::symbol_offset");
-        HFST_THROW_MESSAGE(HfstFatalException, message);
+        std::string message("error in conversion between optimized lookup "
+                            "format and HfstTransducer;\ntried to calculate "
+                            "symbol_offset for symbol not present in state");
+        HFST_THROW_MESSAGE
+            (HfstFatalException,
+             message);
     }
 };
 
@@ -157,35 +199,55 @@ bool compare_states_by_input_size(
 bool compare_states_by_state_number(
     const StatePlaceholder & lhs, const StatePlaceholder & rhs);
 
-class IndexPlaceholders: public std::map<unsigned int,
-        std::pair<unsigned int, SymbolNumber> >
+struct IndexPlaceholders
 {
-public:
+    std::vector<unsigned int> indices;
+    std::vector<std::pair<unsigned int, SymbolNumber> > targets;
+
+    bool used(unsigned int const position) const
+        {
+            return position < indices.size() && indices[position] != NO_TABLE_INDEX;
+        }
+
+    void assign(unsigned int const position, unsigned int target, SymbolNumber sym)
+        {
+            while (position >= indices.size()) {
+                indices.push_back(NO_TABLE_INDEX);
+            }
+            indices[position] = targets.size();
+            targets.push_back(std::pair<unsigned int, SymbolNumber>(target, sym));
+        }
+
+    std::pair<unsigned int, SymbolNumber> get_target(unsigned int index)
+        {
+            return targets[indices[index]];
+        }
+    
     bool fits(StatePlaceholder const & state,
               std::set<SymbolNumber> const & flag_symbols,
               unsigned int const position) const
-    {
-    if (count(position) != 0) {
-        return false;
-    }
-    for (SymbolTransitionsMap::const_iterator it = state.inputs.begin();
-         it != state.inputs.end(); ++it) {
-        SymbolNumber index_offset = it->first;
-        if (flag_symbols.count(index_offset) != 0) {
-        index_offset = 0;
-        }
-        if (count(index_offset + position + 1) != 0) {
-        return false;
+        {
+            if (used(position)) {
+                return false;
+            }
+            for (std::vector<std::vector<TransitionPlaceholder> >::const_iterator it = state.transition_placeholders.begin();
+                 it != state.transition_placeholders.end(); ++it) {
+                SymbolNumber index_offset = it->at(0).input;
+                if (flag_symbols.count(index_offset) != 0) {
+                    index_offset = 0;
+                }
+                if (used(index_offset + position + 1)) {
+                    return false;
+                }
+            }
+            return true;
         }
-    }
-    return true;
-    }
 
     bool unsuitable(unsigned int const index,
                     SymbolNumber const symbols,
                     float const packing_aggression) const
     {
-    if (count(index) != 0) {
+    if (used(index)) {
         return true;
     }
     
@@ -200,9 +262,9 @@ public:
 
     unsigned int filled = 0;
     for (unsigned int i = 0; i < symbols; ++i) {
-        filled += count(index + i + 1);
+        filled += used(index + i + 1);
         if (filled >= (packing_aggression*symbols)) {
-        return true; // too full
+            return true; // too full
         }
     }
     return false;
diff --git a/libhfst/src/implementations/optimized-lookup/pmatch.cc b/libhfst/src/implementations/optimized-lookup/pmatch.cc
index 5f7d1e7..cd7383e 100644
--- a/libhfst/src/implementations/optimized-lookup/pmatch.cc
+++ b/libhfst/src/implementations/optimized-lookup/pmatch.cc
@@ -5,22 +5,20 @@ namespace hfst_ol {
 
 PmatchAlphabet::PmatchAlphabet(std::istream & inputstream,
                                SymbolNumber symbol_count):
-    TransducerAlphabet(inputstream, symbol_count, false)
-{
-    special_symbols[entry] = NO_SYMBOL_NUMBER;
-    special_symbols[exit] = NO_SYMBOL_NUMBER;
-    special_symbols[LC_entry] = NO_SYMBOL_NUMBER;
-    special_symbols[LC_exit] = NO_SYMBOL_NUMBER;
-    special_symbols[RC_entry] = NO_SYMBOL_NUMBER;
-    special_symbols[RC_exit] = NO_SYMBOL_NUMBER;
-    special_symbols[NLC_entry] = NO_SYMBOL_NUMBER;
-    special_symbols[NLC_exit] = NO_SYMBOL_NUMBER;
-    special_symbols[NRC_entry] = NO_SYMBOL_NUMBER;
-    special_symbols[NRC_exit] = NO_SYMBOL_NUMBER;
-    special_symbols[Pmatch_passthrough] = NO_SYMBOL_NUMBER;
-    special_symbols[boundary] = NO_SYMBOL_NUMBER;
+    TransducerAlphabet(inputstream, symbol_count, false),
+    special_symbols(12, NO_SYMBOL_NUMBER) // SpecialSymbols enum
+{
+    symbol2lists = SymbolNumberVector(orig_symbol_count, NO_SYMBOL_NUMBER);
+    list2symbols = SymbolNumberVector(orig_symbol_count, NO_SYMBOL_NUMBER);
+    rtns = RtnVector(orig_symbol_count, NULL);
+    // We initialize the vector of which symbols have a printable representation
+    // with false, then flip those that actually do to true
+    printable_vector = std::vector<bool>(orig_symbol_count, false);
     for (SymbolNumber i = 1; i < symbol_table.size(); ++i) {
         add_special_symbol(symbol_table[i], i);
+        if (is_flag_diacritic(i)) {
+            printable_vector[i] = false;
+        }
     }
 }
 
@@ -28,19 +26,18 @@ PmatchAlphabet::PmatchAlphabet(void):
     TransducerAlphabet()
 {}
 
+void PmatchAlphabet::add_symbol(const std::string & symbol)
+{
+    TransducerAlphabet::add_symbol(symbol);
+    symbol2lists.push_back(NO_SYMBOL_NUMBER);
+    list2symbols.push_back(NO_SYMBOL_NUMBER);
+    rtns.push_back(NULL);
+    printable_vector.push_back(true);
+}
+
 bool PmatchAlphabet::is_printable(SymbolNumber symbol)
 {
-    if (symbol == 0 || symbol == NO_SYMBOL_NUMBER ||
-        is_flag_diacritic(symbol) || is_end_tag(symbol) || is_guard(symbol)) {
-        return false;
-    }
-    for (std::map<SpecialSymbol, SymbolNumber>::const_iterator it = special_symbols.begin();
-         it != special_symbols.end(); ++it) {
-        if (it->second == symbol) {
-            return false;
-        }
-    }
-    return true;
+    return symbol < printable_vector.size() && printable_vector[symbol];
 }
 
 void PmatchAlphabet::add_special_symbol(const std::string & str,
@@ -79,24 +76,90 @@ void PmatchAlphabet::add_special_symbol(const std::string & str,
         rtn_names[name_from_insertion(str)] = symbol_number;
     } else if (is_guard(str)) {
         guards.push_back(symbol_number);
+    } else if (is_list(str)) {
+        process_symbol_list(str, symbol_number);
+    } else if (is_counter(str)) {
+        process_counter(str, symbol_number);
+    } else {
+        printable_vector[symbol_number] = true;
+    }
+}
+
+void PmatchAlphabet::process_symbol_list(std::string str, SymbolNumber sym)
+{
+    SymbolNumberVector list_symbols;
+    StringSymbolMap ss = build_string_symbol_map();
+    size_t begin = strlen("@PMATCH_LIST_");
+    size_t stop;
+    std::vector<std::string> collected_symbols;
+    while ((stop = str.find('_', begin)) != std::string::npos) {
+// For each underscore after the prelude, grab the substring
+        std::string symbol = str.substr(begin, stop - begin);
+        if (symbol.size() == 0) {
+// If the symbol _is_ an underscore it looks like we got an empty string
+            symbol = "_";
+            begin = stop + 2;
+        } else {
+            begin = stop + 1;
+        }
+        collected_symbols.push_back(symbol);
+    }
+    // One at the end
+    collected_symbols.push_back(str.substr(begin, str.size() - begin - strlen("@")));
+    // Process the symbols we found
+    for (std::vector<std::string>::const_iterator it = collected_symbols.begin();
+         it != collected_symbols.end(); ++it) {
+        SymbolNumber str_sym;
+        if (ss.count(*it) == 0) {
+// This symbol isn't mentioned elsewhere in the alphabet
+            add_symbol(*it);
+            str_sym = orig_symbol_count;
+            ++orig_symbol_count;
+        } else {
+            str_sym = ss[*it];
+        }
+        list_symbols.push_back(str_sym);
+        if (symbol2lists[str_sym] == NO_SYMBOL_NUMBER) {
+            symbol2lists[str_sym] = symbol_lists.size();
+            symbol_lists.push_back(SymbolNumberVector(1, sym));
+        } else {
+            symbol_lists[symbol2lists[str_sym]].push_back(sym);
+        }
     }
+    list2symbols[sym] = symbol_list_members.size();
+    symbol_list_members.push_back(list_symbols);
 }
 
 SymbolNumberVector PmatchAlphabet::get_specials(void) const
 {
     SymbolNumberVector v;
-    for (std::map<SpecialSymbol, SymbolNumber>::const_iterator it =
+    for (SymbolNumberVector::const_iterator it =
              special_symbols.begin(); it != special_symbols.end(); ++it) {
-        if (it->second != NO_SYMBOL_NUMBER) {
-            v.push_back(it->second);
+        if (*it != NO_SYMBOL_NUMBER) {
+            v.push_back(*it);
         }
     }
     return v;
 }
 
-PmatchContainer::PmatchContainer(std::istream & inputstream,
-                                 bool _verbose, bool _extract_tags):
-    verbose(_verbose),
+void PmatchAlphabet::process_counter(std::string str, SymbolNumber sym)
+{
+    // Fill up non-counter spots in the counter vector with blanks
+    while (counters.size() < sym) {
+        counters.push_back(NO_COUNTER);
+    }
+    counters.push_back(0);
+}
+
+void PmatchAlphabet::count(SymbolNumber sym)
+{
+    if (is_counter(sym)) {
+        counters[sym]++;
+    }
+}
+
+PmatchContainer::PmatchContainer(std::istream & inputstream):
+    verbose(false),
     locate_mode(false),
     recursion_depth_left(PMATCH_MAX_RECURSION_DEPTH),
     entry_stack()
@@ -107,11 +170,11 @@ PmatchContainer::PmatchContainer(std::istream & inputstream,
     // for once more established
 
     TransducerHeader header(inputstream);
-    orig_symbol_count = symbol_count = header.symbol_count();
     alphabet = PmatchAlphabet(inputstream, header.symbol_count());
-    alphabet.extract_tags = _extract_tags;
+    orig_symbol_count = symbol_count = alphabet.get_orig_symbol_count();
+    alphabet.extract_tags = locate_mode;
     line_number = 0;
-    encoder = new Encoder(alphabet.get_symbol_table(), header.input_symbol_count());
+    encoder = new Encoder(alphabet.get_symbol_table(), orig_symbol_count);
     toplevel = new hfst_ol::PmatchTransducer(
         inputstream,
         header.index_table_size(),
@@ -158,9 +221,9 @@ PmatchContainer::PmatchContainer(std::istream & inputstream,
             alphabet.get_rtn(*it)->collect_possible_first_symbols();
             std::set<SymbolNumber> rtn_firsts =
                 alphabet.get_rtn(*it)->possible_first_symbols;
-            for (RtnMap::iterator it = alphabet.rtns.begin();
-                 it != alphabet.rtns.end(); ++it) {
-                if (rtn_firsts.count(it->first) == 1) {
+            for (RtnNameMap::const_iterator it = alphabet.rtn_names.begin();
+                 it != alphabet.rtn_names.end(); ++it) {
+                if (rtn_firsts.count(it->second) == 1) {
                     // For now we are very conservative:
                     // if we can go through two levels of rtns
                     // without any input, we just assume the full
@@ -181,9 +244,9 @@ PmatchContainer::PmatchContainer(std::istream & inputstream,
             }
         }
     }
-    for (RtnMap::iterator it = alphabet.rtns.begin();
-         it != alphabet.rtns.end(); ++it) {
-        possible_firsts.erase(it->first);
+    for (RtnNameMap::const_iterator it = alphabet.rtn_names.begin();
+         it != alphabet.rtn_names.end(); ++it) {
+        possible_firsts.erase(it->second);
     }
     if (!possible_firsts.empty() &&
         alphabet.get_special(boundary) != NO_SYMBOL_NUMBER) {
@@ -227,6 +290,16 @@ bool PmatchAlphabet::is_guard(const std::string & symbol)
     return symbol.find("@PMATCH_GUARD_") == 0 && symbol.rfind("@") == symbol.size() - 1;
 }
 
+bool PmatchAlphabet::is_counter(const std::string & symbol)
+{
+    return symbol.find("@PMATCH_COUNTER_") == 0 && symbol.rfind("@") == symbol.size() - 1;
+}
+
+bool PmatchAlphabet::is_list(const std::string & symbol)
+{
+    return symbol.find("@PMATCH_LIST_") == 0 && symbol.rfind("@") == symbol.size() - 1;
+}
+
 bool PmatchAlphabet::is_special(const std::string & symbol)
 {
     if (symbol.size() == 0) {
@@ -251,6 +324,11 @@ bool PmatchAlphabet::is_guard(const SymbolNumber symbol) const
     return false;
 }
 
+bool PmatchAlphabet::is_counter(const SymbolNumber symbol) const
+{
+    return (symbol < counters.size() && counters[symbol] != NO_COUNTER);
+}
+
 std::string PmatchAlphabet::name_from_insertion(const std::string & symbol)
 {
     return symbol.substr(sizeof("@I.") - 1, symbol.size() - (sizeof("@I.@") - 1));
@@ -283,9 +361,10 @@ PmatchContainer::~PmatchContainer(void)
 
 PmatchAlphabet::~PmatchAlphabet(void)
 {
-    for (RtnMap::iterator it = rtns.begin();
+    for (RtnVector::iterator it = rtns.begin();
          it != rtns.end(); ++it) {
-        delete it->second;
+        delete *it;
+        *it = NULL;
     }
 
 }
@@ -357,17 +436,17 @@ std::string PmatchContainer::parse_name_from_hfst3_header(std::istream & f)
 void PmatchAlphabet::add_rtn(PmatchTransducer * rtn, std::string const & name)
 {
     SymbolNumber symbol = rtn_names[name];
-    rtns.insert(std::pair<SymbolNumber, PmatchTransducer *>(symbol, rtn));
+    rtns[symbol] = rtn;
 }
 
 bool PmatchAlphabet::has_rtn(std::string const & name) const
 {
-    return rtns.count(rtn_names.at(name)) != 0;
+    return rtn_names.at(name) < rtns.size() && rtns[rtn_names.at(name)] != NULL;
 }
 
 bool PmatchAlphabet::has_rtn(SymbolNumber symbol) const
 {
-    return rtns.count(symbol) != 0;
+    return symbol < rtns.size() && rtns[symbol] != NULL;
 }
 
 PmatchTransducer * PmatchAlphabet::get_rtn(SymbolNumber symbol)
@@ -375,6 +454,19 @@ PmatchTransducer * PmatchAlphabet::get_rtn(SymbolNumber symbol)
     return rtns[symbol];
 }
 
+std::string PmatchAlphabet::get_counter_name(SymbolNumber symbol)
+{
+    if (symbol_table.size() <= symbol) {
+        return "INVALID_COUNTER";
+    }
+    std::string name = symbol_table[symbol];
+    if (!is_counter(name)) {
+        return "INVALID_COUNTER";
+    }
+    return name.substr(strlen("@PMATCH_COUNTER_"),
+                       name.size() - strlen("@PMATCH_COUNTER_") - 1);
+}
+
 SymbolNumber PmatchAlphabet::get_special(SpecialSymbol special) const
 {
     return special_symbols.at(special);
@@ -440,6 +532,47 @@ LocationVectorVector PmatchContainer::locate(std::string & input)
     return locations;
 }
 
+// A utility comparing function for get_profiling_info
+bool counter_comp(std::pair<std::string, unsigned long> l,
+                  std::pair<std::string, unsigned long> r)
+{
+    // Descending order
+    return l.second > r.second;
+}
+
+std::string PmatchContainer::get_profiling_info(void)
+{
+    std::stringstream retval;
+    size_t max_name_len = 0;
+    retval << "Profiling information:\n";
+    retval << "  Traversals of Counter() positions:\n";
+    std::vector<std::pair<std::string, unsigned long> > counter_name_val_pairs;
+    for(SymbolNumber i = 0; i < alphabet.counters.size(); ++i) {
+        if (alphabet.counters[i] != NO_COUNTER) {
+            std::string counter_name = alphabet.get_counter_name(i);
+            if (counter_name.size() > max_name_len) {
+                max_name_len = counter_name.size();
+            }
+            counter_name_val_pairs.push_back(
+                std::pair<std::string, unsigned long>(counter_name,
+                                                      alphabet.counters[i]));
+        }
+    }
+    std::sort(counter_name_val_pairs.begin(), counter_name_val_pairs.end(),
+              counter_comp);
+    for(std::vector<std::pair<std::string, unsigned long> >::const_iterator it =
+            counter_name_val_pairs.begin(); it != counter_name_val_pairs.end(); ++it) {
+        retval << "    " << it->first;
+        size_t spacing_counter = max_name_len + 8 - it->first.size();
+        while (spacing_counter) {
+            retval << " ";
+            --spacing_counter;
+        }
+        retval << it->second << "\n";
+    }
+    return retval.str();
+}
+
 void PmatchContainer::copy_to_output(const DoubleTape & best_result)
 {
     for (DoubleTape::const_iterator it = best_result.begin();
@@ -486,11 +619,9 @@ std::string PmatchAlphabet::stringify(const DoubleTape & str)
             }
             retval.insert(pos, start_tag(output));
             retval.append(end_tag(output));
-        } else if (output == special_symbols[boundary]
-                   || is_guard(output)) {
-            continue;
         } else {
-            if (!extract_tags || start_tag_pos.size() != 0) {
+            if ((!extract_tags || start_tag_pos.size() != 0)
+                && is_printable(output)) {
                 retval.append(string_from_symbol(output));
             }
         }
@@ -582,6 +713,7 @@ PmatchTransducer::PmatchTransducer(std::istream & is,
     is.read(indextab, TransitionWIndex::size * index_table_size);
     is.read(transitiontab, TransitionW::size * transition_table_size);
     char * orig_p = indextab;
+    index_table.reserve(index_table_size);
     while(index_table_size) {
         // index_table.push_back(
         //     SimpleIndex(*(SymbolNumber *) indextab,
@@ -593,6 +725,7 @@ PmatchTransducer::PmatchTransducer(std::istream & is,
     }
     free(orig_p);
     orig_p = transitiontab;
+    transition_table.reserve(transition_table_size);
     while(transition_table_size) {
         transition_table.push_back(TransitionW(transitiontab));
             // SimpleTransition(*(SymbolNumber *) transitiontab,
@@ -714,7 +847,16 @@ void PmatchTransducer::collect_first_transition(TransitionTableIndex i,
                     container->reset_recursion();
                     throw true;
                 }
-                possible_first_symbols.insert(*it);
+                if (alphabet.list2symbols[*it] != NO_SYMBOL_NUMBER) {
+// If this is a list, collect everything in the list
+                    for (SymbolNumberVector::const_iterator sym_it =
+                            alphabet.symbol_list_members[alphabet.list2symbols[*it]].begin();
+                        sym_it != alphabet.symbol_list_members[alphabet.list2symbols[*it]].end(); ++sym_it) {
+                        possible_first_symbols.insert(*sym_it);
+                    }
+                } else {
+                    possible_first_symbols.insert(*it);
+                }
             } else {
                 // faking through a context check
                 collect_first(transition_table[i].get_target(),
@@ -893,9 +1035,9 @@ void PmatchTransducer::note_analysis(unsigned int input_pos,
         rtn_stack.top().candidate_tape_pos = tape_pos;
         rtn_stack.top().candidate_input_pos = input_pos;
         rtn_stack.top().best_weight = local_stack.top().running_weight;
-    } else if (container->is_verbose() &&
-                 input_pos == rtn_stack.top().candidate_input_pos &&
-                 rtn_stack.top().best_weight == local_stack.top().running_weight) {
+    } else if (container->verbose &&
+               input_pos == rtn_stack.top().candidate_input_pos &&
+               rtn_stack.top().best_weight == local_stack.top().running_weight) {
         DoubleTape discarded(container->tape.extract_slice(
                                  rtn_stack.top().tape_entry, tape_pos));
         std::cerr << "\n\tline " << container->line_number << ": conflicting equally weighted matches found, keeping:\n\t"
@@ -937,6 +1079,9 @@ void PmatchTransducer::take_epsilons(unsigned int input_pos,
         Weight weight = transition_table[i].get_weight();
         // We handle paths where we're checking contexts here
         if (input == 0) {
+            if (container->profile_mode) {
+                alphabet.count(output);
+            }
             if (!checking_context()) {
                 if (!try_entering_context(output)) {
                     // no context to enter, regular input epsilon
@@ -950,7 +1095,6 @@ void PmatchTransducer::take_epsilons(unsigned int input_pos,
                     } else if (output == alphabet.get_special(exit)) {
                         container->entry_stack.pop();
                     }
-
                     
                     get_analyses(input_pos, tape_pos + 1, target);
 
@@ -1081,8 +1225,9 @@ void PmatchTransducer::take_transitions(SymbolNumber input,
         } else if (this_input == input) {
             if (!checking_context()) {
                 if (this_output == alphabet.get_identity_symbol() ||
-                    (this_output == alphabet.get_unknown_symbol())) {
-                // we got here via identity or unknown, so look back in the
+                    (this_output == alphabet.get_unknown_symbol()) ||
+                    (alphabet.list2symbols[this_output] != NO_SYMBOL_NUMBER)) {
+                // we got here via a meta-arc, so look back in the
                 // input tape to find the symbol we want to write
                     this_output = container->input[input_pos];
                 }
@@ -1113,7 +1258,7 @@ void PmatchTransducer::get_analyses(unsigned int input_pos,
                                     TransitionTableIndex i)
 {
     if (!container->try_recurse()) {
-        if (container->is_verbose()) {
+        if (container->verbose) {
             std::cerr << "pmatch: out of stack space, truncating result\n";
         }
         return;
@@ -1142,6 +1287,15 @@ void PmatchTransducer::get_analyses(unsigned int input_pos,
     } else {
         input = container->input[input_pos];
     }
+    
+    if (alphabet.symbol2lists[input] != NO_SYMBOL_NUMBER) {
+// At least one symbol list contains this symbol
+        for(SymbolNumberVector::const_iterator it =
+                alphabet.symbol_lists[alphabet.symbol2lists[input]].begin();
+            it != alphabet.symbol_lists[alphabet.symbol2lists[input]].end(); ++it) {
+            take_transitions(*it, input_pos, tape_pos, i+1);
+        }
+    }
 
     if (input < orig_symbol_count) {
         take_transitions(input, input_pos, tape_pos, i+1);
diff --git a/libhfst/src/implementations/optimized-lookup/pmatch.h b/libhfst/src/implementations/optimized-lookup/pmatch.h
index 3bf523d..b97f2dd 100644
--- a/libhfst/src/implementations/optimized-lookup/pmatch.h
+++ b/libhfst/src/implementations/optimized-lookup/pmatch.h
@@ -16,7 +16,8 @@ namespace hfst_ol {
 
     const unsigned int PMATCH_MAX_RECURSION_DEPTH = 5000;
     
-    typedef std::map<SymbolNumber, PmatchTransducer *> RtnMap;
+    typedef std::vector<PmatchTransducer *> RtnVector;
+    typedef std::map<std::string, SymbolNumber> RtnNameMap;
     typedef std::vector<Location> LocationVector;
     typedef std::vector<LocationVector> LocationVectorVector;
     typedef std::vector<WeightedDoubleTape> WeightedDoubleTapeVector;
@@ -86,13 +87,22 @@ namespace hfst_ol {
 
     class PmatchAlphabet: public TransducerAlphabet {
     protected:
-        RtnMap rtns;
-        std::map<SpecialSymbol, SymbolNumber> special_symbols;
+        RtnVector rtns;
+        SymbolNumberVector special_symbols;
         std::map<SymbolNumber, std::string> end_tag_map;
-        std::map<std::string, SymbolNumber> rtn_names;
+        RtnNameMap rtn_names;
+// For each symbol, either NO_SYMBOL for "no corresponding list" or an index into symbol_lists
+        SymbolNumberVector symbol2lists;
+// For each a symbol, either NO_SYMBOL for "this is not a list" or an index into symbol_lists_members
+        SymbolNumberVector list2symbols;
+        std::vector<SymbolNumberVector> symbol_lists;
+        std::vector<SymbolNumberVector> symbol_list_members;
+        std::vector<unsigned long int> counters;
         SymbolNumberVector guards;
+        std::vector<bool> printable_vector;
         bool is_end_tag(const SymbolNumber symbol) const;
         bool is_guard(const SymbolNumber symbol) const;
+        bool is_counter(const SymbolNumber symbol) const;
         std::string end_tag(const SymbolNumber symbol);
         std::string start_tag(const SymbolNumber symbol);
         bool extract_tags;
@@ -101,18 +111,25 @@ namespace hfst_ol {
         PmatchAlphabet(std::istream& is, SymbolNumber symbol_count);
         PmatchAlphabet(void);
         ~PmatchAlphabet(void);
+        virtual void add_symbol(const std::string & symbol);
         static bool is_end_tag(const std::string & symbol);
         static bool is_insertion(const std::string & symbol);
         static bool is_guard(const std::string & symbol);
+        static bool is_list(const std::string & symbol);
+        static bool is_counter(const std::string & symbol);
         static bool is_special(const std::string & symbol);
         static std::string name_from_insertion(
             const std::string & symbol);
         bool is_printable(SymbolNumber symbol);
         void add_special_symbol(const std::string & str, SymbolNumber symbol_number);
+        void process_symbol_list(std::string str, SymbolNumber sym);
+        void process_counter(std::string str, SymbolNumber sym);
+        void count(SymbolNumber sym);
         void add_rtn(PmatchTransducer * rtn, std::string const & name);
         bool has_rtn(std::string const & name) const;
         bool has_rtn(SymbolNumber symbol) const;
         PmatchTransducer * get_rtn(SymbolNumber symbol);
+        std::string get_counter_name(SymbolNumber symbol);
         SymbolNumber get_special(SpecialSymbol special) const;
         SymbolNumberVector get_specials(void) const;
         std::string stringify(const DoubleTape & str);
@@ -140,16 +157,16 @@ namespace hfst_ol {
         std::vector<char> possible_first_symbols;
         bool verbose;
         bool locate_mode;
+        bool profile_mode;
         unsigned int recursion_depth_left;
 
     public:
 
-        PmatchContainer(std::istream & is, bool verbose = false,
-                        bool extract_tags = false);
+        PmatchContainer(std::istream & is);
         PmatchContainer(void);
         ~PmatchContainer(void);
 
-        long line_number;
+        unsigned long line_number;
 
         void initialize_input(const char * input);
         bool has_unsatisfied_rtns(void) const;
@@ -157,6 +174,7 @@ namespace hfst_ol {
         void process(std::string & input);
         std::string match(std::string & input);
         LocationVectorVector locate(std::string & input);
+        std::string get_profiling_info(void);
         bool has_queued_input(unsigned int input_pos);
         bool not_possible_first_symbol(SymbolNumber sym)
         {
@@ -171,8 +189,12 @@ namespace hfst_ol {
         std::string stringify_output(void);
 //        LocationVector locatefy_output(void);
         static std::string parse_name_from_hfst3_header(std::istream & f);
-        void be_verbose(void) { verbose = true; }
-        bool is_verbose(void) { return verbose; }
+        void set_verbose(bool b) { verbose = b; }
+        void set_locate_mode(bool b) {
+            locate_mode = b;
+            alphabet.extract_tags = b;
+        }
+        void set_profile(bool b) { profile_mode = b; }
         bool try_recurse(void)
         {
             if (recursion_depth_left > 0) {
diff --git a/libhfst/src/implementations/optimized-lookup/transducer.cc b/libhfst/src/implementations/optimized-lookup/transducer.cc
index 5ca763e..3bdcf7d 100644
--- a/libhfst/src/implementations/optimized-lookup/transducer.cc
+++ b/libhfst/src/implementations/optimized-lookup/transducer.cc
@@ -60,6 +60,11 @@ void TransducerAlphabet::add_symbol(char * symbol)
     symbol_table.push_back(symbol);
 }
 
+void TransducerAlphabet::add_symbol(const std::string & symbol)
+{
+    symbol_table.push_back(symbol);
+}
+
 TransducerAlphabet::TransducerAlphabet(const SymbolTable& st):
     symbol_table(st)
 {
diff --git a/libhfst/src/implementations/optimized-lookup/transducer.h b/libhfst/src/implementations/optimized-lookup/transducer.h
index 25dd01b..4e6990b 100644
--- a/libhfst/src/implementations/optimized-lookup/transducer.h
+++ b/libhfst/src/implementations/optimized-lookup/transducer.h
@@ -78,6 +78,7 @@ typedef std::set<TraversalState> TraversalStates;
 const SymbolNumber NO_SYMBOL_NUMBER = std::numeric_limits<SymbolNumber>::max();
 const TransitionTableIndex NO_TABLE_INDEX =
     std::numeric_limits<TransitionTableIndex>::max();
+const unsigned long NO_COUNTER = std::numeric_limits<unsigned long>::max();
 const Weight INFINITE_WEIGHT = static_cast<float>(NO_TABLE_INDEX);
 
 enum HeaderFlag {Weighted, Deterministic, Input_deterministic, Minimized,
@@ -419,7 +420,9 @@ public:
         { return identity_symbol; }
     SymbolNumber get_orig_symbol_count(void) const
         { return orig_symbol_count; }
-    void add_symbol(char * symbol);
+    virtual void add_symbol(char * symbol);
+    virtual void add_symbol(const std::string & symbol);
+
     
 };
 
diff --git a/libhfst/src/parsers/LexcCompiler.cc b/libhfst/src/parsers/LexcCompiler.cc
index a0b1262..67cd416 100644
--- a/libhfst/src/parsers/LexcCompiler.cc
+++ b/libhfst/src/parsers/LexcCompiler.cc
@@ -85,7 +85,8 @@ LexcCompiler::LexcCompiler() :
     parseErrors_(false),
     with_flags_(false),
     minimize_flags_(false),
-    rename_flags_(false)
+    rename_flags_(false),
+    allow_multiple_sublexicon_definitions_(false)
 {
     xre_.set_expand_definitions(true);
 }
@@ -102,7 +103,8 @@ LexcCompiler::LexcCompiler(ImplementationType impl) :
     parseErrors_(false),
     with_flags_(false),
     minimize_flags_(false),
-    rename_flags_(false)
+    rename_flags_(false),
+    allow_multiple_sublexicon_definitions_(false)
 {
     tokenizer_.add_multichar_symbol("@_EPSILON_SYMBOL_@");
     tokenizer_.add_multichar_symbol("@0@");
@@ -126,7 +128,8 @@ LexcCompiler::LexcCompiler(ImplementationType impl, bool withFlags) :
     parseErrors_(false),
     with_flags_(withFlags),
     minimize_flags_(false),
-    rename_flags_(false)
+    rename_flags_(false),
+    allow_multiple_sublexicon_definitions_(false)
 {
     tokenizer_.add_multichar_symbol("@_EPSILON_SYMBOL_@");
     tokenizer_.add_multichar_symbol("@0@");
@@ -180,12 +183,38 @@ LexcCompiler& LexcCompiler::parse(const char* filename)
     return *this;
 }
 
+bool LexcCompiler::isQuiet()
+{
+  return quiet_;
+}
+
 LexcCompiler&
-LexcCompiler::setVerbosity(bool verbose)
+LexcCompiler::setVerbosity(unsigned int verbose)
 {
-    quiet_ = !verbose;
-    verbose_ = verbose;
-    return *this;
+  //quiet_ = !verbose;
+  //verbose_ = verbose;
+  if (verbose == 0)
+    {
+      quiet_ = true;
+      verbose_ = false;
+    }
+  else if (verbose == 1)
+    {
+      quiet_ = false;
+      verbose_ = false;
+    }
+  else
+    {
+      quiet_ = false;
+      verbose_ = true;
+    }
+  return *this;
+}
+
+bool
+LexcCompiler::areWarningsTreatedAsErrors()
+{
+  return treat_warnings_as_errors_;
 }
 
 LexcCompiler&
@@ -196,6 +225,14 @@ LexcCompiler::setTreatWarningsAsErrors(bool value)
 }
 
 LexcCompiler&
+LexcCompiler::setAllowMultipleSublexiconDefinitions(bool value)
+{
+    allow_multiple_sublexicon_definitions_ = value;
+    return *this;
+}
+
+
+LexcCompiler&
 LexcCompiler::setWithFlags(bool value)
 {
     with_flags_ = value;
@@ -284,6 +321,48 @@ LexcCompiler::addStringEntry(const string& data,
     return *this;
 }
 
+// to handle information to warn_about_one_sided_flags_
+static bool treat_one_sided_flags_as_errors_ = false;
+static bool quiet_one_sided_flags_ = false;
+
+static void warn_about_one_sided_flags(const std::pair<std::string, std::string> & symbol_pair)
+{
+  if (FdOperation::is_diacritic(symbol_pair.first))
+    {
+      if (symbol_pair.first != symbol_pair.second)
+        {
+          if (treat_one_sided_flags_as_errors_)
+            {
+              if (true /*!quiet_one_sided_flags_*/) // error messages are always printed
+                {
+                  std::cerr << std::endl << "*** ERROR: one-sided flag diacritic: " << symbol_pair.first << ":" << symbol_pair.second << " [--Werror]" << std::endl;
+                }
+              throw "one-sided flag";
+            }
+          if (!quiet_one_sided_flags_)
+            {
+              hfst::lexc::error_at_current_token(0, 0, "Warning: one-sided flag diacritic.");
+            }
+        }
+      return;
+    }
+  if (FdOperation::is_diacritic(symbol_pair.second))
+    {
+      if (treat_one_sided_flags_as_errors_)
+        {
+          if (true /*!quiet_one_sided_flags_*/) // error messages are always printed
+            {
+              std::cerr << std::endl << "*** ERROR: one-sided flag diacritic: " << symbol_pair.first << ":" << symbol_pair.second << " [--Werror]" << std::endl;
+            }
+          throw "one-sided flag";
+        }
+      if (!quiet_one_sided_flags_)
+        {
+          hfst::lexc::error_at_current_token(0, 0, "Warning: one-sided flag diacritic.");
+        }
+    }
+}
+
 LexcCompiler&
 LexcCompiler::addStringPairEntry(const string& upper, const string& lower,
         const string& continuation, double weight)
@@ -352,6 +431,10 @@ LexcCompiler::addStringPairEntry(const string& upper, const string& lower,
 
     StringPairVector newVector;
 
+    // information for function pointer &warn_about_one_sided_flags
+    treat_one_sided_flags_as_errors_ = treat_warnings_as_errors_;
+    quiet_one_sided_flags_ = quiet_;
+
     if ( upperSize > lowerSize)
     {
         std::string epsilons = "";
@@ -362,7 +445,8 @@ LexcCompiler::addStringPairEntry(const string& upper, const string& lower,
 
         }
         newVector = tokenizer_.tokenize(joinerEnc + upper_string + encodedCont,
-                            joinerEnc + lower_string + epsilons + encodedCont);
+                                        joinerEnc + lower_string + epsilons + encodedCont,
+                                        &warn_about_one_sided_flags);
 
     }
     else if (upperSize < lowerSize)
@@ -375,12 +459,14 @@ LexcCompiler::addStringPairEntry(const string& upper, const string& lower,
 
         }
         newVector = tokenizer_.tokenize(joinerEnc + upper_string + epsilons + encodedCont,
-                            joinerEnc + lower_string + encodedCont);
+                                        joinerEnc + lower_string + encodedCont,
+                                        &warn_about_one_sided_flags);
     }
     else
     {
         newVector = tokenizer_.tokenize(joinerEnc + upper_string + encodedCont,
-                    joinerEnc + lower_string + encodedCont);
+                                        joinerEnc + lower_string + encodedCont,
+                                        &warn_about_one_sided_flags);
     }
     stringsTrie_.disjunct(newVector, weight);
 
@@ -504,6 +590,13 @@ LexcCompiler::setCurrentLexiconName(const string& lexiconName)
 {
     static bool firstLexicon = true;
     currentLexiconName_ = lexiconName;
+
+    if (!allow_multiple_sublexicon_definitions_)
+      {
+        if (lexiconNames_.find(lexiconName) != lexiconNames_.end())
+          throw("Lexicon is defined more than once!");
+      }
+
     lexiconNames_.insert(lexiconName);
     if (noFlags_.find(lexiconName) == noFlags_.end())
     {
@@ -575,53 +668,23 @@ LexcCompiler::compileLexical()
         if (!quiet_) fprintf(stderr, "*** ERROR: could not parse lexc file: treating warnings as errors [--Werror] ***\n");
         return 0;
       }
-/*
-    if( with_flags_)
-        fprintf(stderr, "With Flags \n \n");
-    else
-        fprintf(stderr, "no Flags \n \n");
-*/
 
     HfstTransducer lexicons(stringsTrie_, format_);
 
 
     lexicons.minimize();
 
-
-    // DEBUG
-    //fprintf(stderr, "lexicons: \n");
-    //lexicons.write_in_att_format(stderr, 1);
-
-
     // repeat star to overgenerate
     lexicons.repeat_star().minimize();
 
-
-
-    //printf("lexicons: \n");
-    //lexicons.write_in_att_format(stdout, 1);
-
-
-
-
     HfstSymbolSubstitutions smallSubstitutions;
     smallSubstitutions.insert(StringPair("@0@", "@_EPSILON_SYMBOL_@"));
     smallSubstitutions.insert(StringPair("@@ANOTHER_EPSILON@@", "@_EPSILON_SYMBOL_@"));
     smallSubstitutions.insert(StringPair("@ZERO@", "0"));
 
-    /*
-    lexicons.substitute("@0@", "@_EPSILON_SYMBOL_@");
-    lexicons.substitute("@@ANOTHER_EPSILON@@", "@_EPSILON_SYMBOL_@");
-    lexicons.substitute("@ZERO@", "0");
-    */
     lexicons.substitute(smallSubstitutions);
     lexicons.prune_alphabet();
 
-
-    //printf("lexicons: \n");
-    //lexicons.write_in_att_format(stdout, 1);
-
-
     HfstBasicTransducer joinersTrie_;
 
     HfstSymbolSubstitutions allJoinersToEpsilon;
@@ -1122,27 +1185,30 @@ main(int argc, char** argv)
 #if HAVE_SFST
     std::cout << " (SFST)...";
     LexcCompiler lexcSfst(SFST_TYPE);
+    lexcSfst.setAllowMultipleSublexiconDefinitions(true);
 #endif
 #if HAVE_OPENFST
     std::cout << " (OpenFST)...";
     LexcCompiler lexcOfst(TROPICAL_OPENFST_TYPE);
+    lexcOfst.setAllowMultipleSublexiconDefinitions(true);
 #endif
 #if HAVE_FOMA
     std::cout << " (foma)...";
     LexcCompiler lexcFoma(FOMA_TYPE);
+    lexcFoma.setAllowMultipleSublexiconDefinitions(true);
 #endif
     std::cout << std::endl << "set verbose:";
 #if HAVE_SFST
-    lexcSfst.setVerbosity(true);
-    lexcSfst.setVerbosity(false);
+    lexcSfst.setVerbosity(1);
+    lexcSfst.setVerbosity(2);
 #endif
 #if HAVE_OFST
-    lexcOfst.setVerbosity(true);
-    lexcOfst.setVerbosity(false);
+    lexcOfst.setVerbosity(1);
+    lexcOfst.setVerbosity(2);
 #endif
 #if HAVE_FOMA
-    lexcFoma.setVerbosity(true);
-    lexcFoma.setVerbosity(false);
+    lexcFoma.setVerbosity(1);
+    lexcFoma.setVerbosity(2);
 #endif
     FILE* existence_check = fopen("LexcCompiler_test.lexc", "r");
     if (existence_check == NULL)
diff --git a/libhfst/src/parsers/LexcCompiler.h b/libhfst/src/parsers/LexcCompiler.h
index 2bf4818..fd14d60 100644
--- a/libhfst/src/parsers/LexcCompiler.h
+++ b/libhfst/src/parsers/LexcCompiler.h
@@ -65,12 +65,19 @@ class LexcCompiler
   LexcCompiler& parse(const char* filename);
 
   //! @brief set verbosity options.
-  //! When verbose is true, LexcCompiler will output the messages that Xerox
+  //! 0 means quiet, 1 the default and 2 (or bigger) the verbose mode.
+  //! When verbose is 2, LexcCompiler will output the messages that Xerox
   //! lexc compiler does.
-  LexcCompiler& setVerbosity(bool verbose);
+  LexcCompiler& setVerbosity(unsigned int verbose);
+
+  bool isQuiet();
 
   LexcCompiler& setTreatWarningsAsErrors(bool value);
 
+  bool areWarningsTreatedAsErrors();
+
+  LexcCompiler& setAllowMultipleSublexiconDefinitions(bool value);
+
   LexcCompiler& setWithFlags(bool value);
 
   LexcCompiler& setMinimizeFlags(bool value);
@@ -137,6 +144,7 @@ class LexcCompiler
   bool minimize_flags_;
   bool rename_flags_;
   bool treat_warnings_as_errors_;
+  bool allow_multiple_sublexicon_definitions_;
 
   hfst::ImplementationType format_;
   hfst::HfstTokenizer tokenizer_;
diff --git a/libhfst/src/parsers/XreCompiler.cc b/libhfst/src/parsers/XreCompiler.cc
index 0a453e9..41ce1db 100644
--- a/libhfst/src/parsers/XreCompiler.cc
+++ b/libhfst/src/parsers/XreCompiler.cc
@@ -10,7 +10,7 @@
 
 namespace hfst { namespace xre {
 
-    unsigned int cr=0;
+    unsigned int cr=0; // chars read from xre input
     std::set<unsigned int> positions;
     char * position_symbol = NULL;
     std::string error_message;
@@ -19,6 +19,7 @@ XreCompiler::XreCompiler() :
     definitions_(),
     function_definitions_(),
     function_arguments_(),
+    list_definitions_(),
     format_(hfst::TROPICAL_OPENFST_TYPE)
 {}
 
@@ -26,9 +27,18 @@ XreCompiler::XreCompiler(hfst::ImplementationType impl) :
     definitions_(),
     function_definitions_(),
     function_arguments_(),
+    list_definitions_(),
     format_(impl)
 {}
 
+    XreCompiler::XreCompiler(const struct XreConstructorArguments & args) :
+    definitions_(args.definitions),
+    function_definitions_(args.function_definitions),
+    function_arguments_(args.function_arguments),
+    list_definitions_(args.list_definitions),
+    format_(args.format)
+{}
+
 void
 XreCompiler::define(const std::string& name, const std::string& xre)
 {
@@ -42,6 +52,12 @@ XreCompiler::define(const std::string& name, const std::string& xre)
   definitions_[name] = compiled;
 }
 
+void 
+XreCompiler::define_list(const std::string& name, const std::set<std::string>& symbol_list)
+{
+  list_definitions_[name] = symbol_list;
+}
+
 void
 XreCompiler::define(const std::string& name, const HfstTransducer & transducer)
 {
@@ -129,13 +145,26 @@ XreCompiler::contained_only_comments()
 HfstTransducer*
 XreCompiler::compile(const std::string& xre)
 {
-  return hfst::xre::compile(xre, definitions_, function_definitions_, function_arguments_, format_);
+  // debug
+  //std::cerr << "XreCompiler: " << this << " : compile(\"" << xre << "\")" << std::endl;
+  unsigned int cr_before = cr;
+  cr = 0;
+  HfstTransducer * retval = hfst::xre::compile(xre, definitions_, function_definitions_, function_arguments_, list_definitions_, format_);
+  cr = cr_before;
+  return retval;
 }
 
 HfstTransducer*
 XreCompiler::compile_first(const std::string& xre, unsigned int & chars_read)
 {
-  return hfst::xre::compile_first(xre, definitions_, function_definitions_, function_arguments_, format_, chars_read);
+  // debug
+  //std::cerr << "XreCompiler: " << this << " : compile_first(\"" << xre << "\"";
+  unsigned int cr_before = cr;
+  cr = 0;
+  HfstTransducer * retval = hfst::xre::compile_first(xre, definitions_, function_definitions_, function_arguments_, list_definitions_, format_, chars_read);
+  //std::cerr << ", " << chars_read << ")" << std::endl;
+  cr = cr_before;
+  return retval;
 }
 
 bool XreCompiler::get_positions_of_symbol_in_xre
@@ -143,9 +172,10 @@ bool XreCompiler::get_positions_of_symbol_in_xre
 {
   position_symbol = strdup(symbol.c_str());
   positions.clear();
-  cr=0;
+  unsigned int cr_before = cr;
+  cr = 0;
   HfstTransducer * compiled = 
-    hfst::xre::compile(xre, definitions_, function_definitions_, function_arguments_, format_);
+    hfst::xre::compile(xre, definitions_, function_definitions_, function_arguments_, list_definitions_, format_);
   free(position_symbol);
   position_symbol = NULL;
   if (compiled == NULL)
@@ -156,6 +186,7 @@ bool XreCompiler::get_positions_of_symbol_in_xre
       return false;
     }
   positions_ = positions;
+  cr = cr_before;
   return true;
 }
 
diff --git a/libhfst/src/parsers/XreCompiler.h b/libhfst/src/parsers/XreCompiler.h
index b1b2153..665f52f 100644
--- a/libhfst/src/parsers/XreCompiler.h
+++ b/libhfst/src/parsers/XreCompiler.h
@@ -38,6 +38,31 @@ namespace hfst {
 //! @brief hfst::xre namespace is used for all functions related to Xerox 
 //! Regular Expresisions (XRE) parsing.
 namespace xre {
+
+  // needed for merge operation
+struct XreConstructorArguments
+{
+  std::map<std::string,hfst::HfstTransducer*> definitions;
+  std::map<std::string, std::string> function_definitions;
+  std::map<std::string, unsigned int > function_arguments;
+  std::map<std::string, std::set<std::string> > list_definitions;
+  hfst::ImplementationType format;
+
+  XreConstructorArguments
+  (std::map<std::string,hfst::HfstTransducer*> definitions_,
+   std::map<std::string, std::string> function_definitions_,
+   std::map<std::string, unsigned int > function_arguments_,
+   std::map<std::string, std::set<std::string> > list_definitions_,
+   hfst::ImplementationType format_)
+  {
+    definitions = definitions_;
+    function_definitions = function_definitions_;
+    function_arguments = function_arguments_;
+    list_definitions = list_definitions_;
+    format = format_;
+  }
+};
+
 //! @brief A compiler holding information needed to compile XREs.
 class XreCompiler
 {
@@ -46,12 +71,16 @@ class XreCompiler
   XreCompiler();
   //! @brief Create compiler for @a impl format transducers
   XreCompiler(hfst::ImplementationType impl);
+  // ...
+  XreCompiler(const struct XreConstructorArguments & args);
 
   //! @brief Add a definition macro.
   //!        Compilers will replace arcs labeled @a name, with the transducer
   //!        defined by @a xre in later phases of compilation.
   void define(const std::string& name, const std::string& xre);
 
+  void define_list(const std::string& name, const std::set<std::string>& symbol_list);
+
   //! @brief Add a function macro.
   //!        Compilers will replace call to function \a name with the transducer
   //!        defined by \a xre when the function is called.
@@ -126,6 +155,7 @@ class XreCompiler
   std::map<std::string,hfst::HfstTransducer*> definitions_;
   std::map<std::string, std::string> function_definitions_;
   std::map<std::string, unsigned int > function_arguments_;
+  std::map<std::string, std::set<std::string> > list_definitions_;
   hfst::ImplementationType format_;
 
 }
diff --git a/libhfst/src/parsers/lexc-lexer.ll b/libhfst/src/parsers/lexc-lexer.ll
index 6ff7d23..156566f 100644
--- a/libhfst/src/parsers/lexc-lexer.ll
+++ b/libhfst/src/parsers/lexc-lexer.ll
@@ -29,7 +29,6 @@
 #include <assert.h>
 
 extern void hlexcerror(const char *text);
-
 %}
 
 /* c.f. Unicode Standard 5.1 D92 Table 3-7 */
@@ -126,7 +125,6 @@ LWSP [\r\n\t ]
     hlexclval.name = hfst::lexc::strdup_nonconst_part(lexicon_start, "Lexicon",
                                           NULL, true);
     free(lexicon_start);
-    hlexcerror("Titlecase Lexicon parsed as LEXICON");
     return LEXICON_START_WRONG_CASE;
 }
 
@@ -196,7 +194,6 @@ LWSP [\r\n\t ]
     lexicon_start = hfst::lexc::strstrip(hlexctext);
     hlexclval.name = hfst::lexc::strdup_nonconst_part(lexicon_start, "Lexicon", 0, true);
     free(lexicon_start);
-    hlexcerror("Titlecase Lexicon parsed as LEXICON");
     return LEXICON_START_WRONG_CASE;
 }
 
diff --git a/libhfst/src/parsers/lexc-parser.yy b/libhfst/src/parsers/lexc-parser.yy
index 2bf2cb2..74cba6b 100644
--- a/libhfst/src/parsers/lexc-parser.yy
+++ b/libhfst/src/parsers/lexc-parser.yy
@@ -64,10 +64,18 @@ handle_definition(const string& variable_name, const string& reg_exp)
 }
 
 static
-void
+bool
 handle_lexicon_name(const string& lexiconName)
 {
+  try 
+  {
     hfst::lexc::lexc_->setCurrentLexiconName(lexiconName);
+  }
+  catch(const char * msg)
+  {
+    return false;
+  }
+  return true;
 }
 
 static
@@ -116,7 +124,7 @@ handle_string_entry(const string& data, const string& cont, const string& gloss)
 }
 
 static
-void
+bool
 handle_string_pair_entry(const string& upper, const string& lower,
                                 const string& cont, const string& gloss)
 {
@@ -127,7 +135,11 @@ handle_string_pair_entry(const string& upper, const string& lower,
     // handle epsilon "0"
     if (upper != "0" && lower != "0")
     {
-       hfst::lexc::lexc_->addStringPairEntry(upper, lower, cont, weight);
+       try {
+         hfst::lexc::lexc_->addStringPairEntry(upper, lower, cont, weight);
+       } catch(const char * msg) {
+         return false;
+       } 
     }
     else
     {
@@ -137,8 +149,13 @@ handle_string_pair_entry(const string& upper, const string& lower,
          upper_ = std::string("");
        if (lower == "0")
          lower_ = std::string("");
-       hfst::lexc::lexc_->addStringPairEntry(upper_, lower_, cont, weight);
+       try {
+         hfst::lexc::lexc_->addStringPairEntry(upper_, lower_, cont, weight);
+       } catch(const char * msg) {
+         return false;
+       }
     }
+    return true;
 }
 
 static
@@ -155,6 +172,15 @@ handle_regexp_entry(const string& reg_exp, const string& cont,
 
 static
 void
+hlexcwarn(const char* text)
+{
+  if (! hfst::lexc::lexc_->isQuiet())      
+    { hfst::lexc::error_at_current_token(0, 0, text); }
+}
+
+
+static
+void
 handle_eof()
 {
 }
@@ -255,12 +281,20 @@ LEXICONS: LEXICONS LEXICON2 LEXICON_LINES
           ;
 
 LEXICON2: LEXICON_START {
-            handle_lexicon_name($1);
+            bool retval = handle_lexicon_name($1);
             free($1);
+            if (!retval)
+              { hlexcerror("Sublexicon defined more than once."); YYABORT; }
           }
           | LEXICON_START_WRONG_CASE {
-            handle_lexicon_name($1);
+            if (hfst::lexc::lexc_->areWarningsTreatedAsErrors()) 
+              { hlexcerror("Keyword 'Lexicon' used instead of 'LEXICON'. [--Werror]"); YYABORT; }
+            else
+              { hlexcwarn("Titlecase Lexicon parsed as LEXICON"); }
+            bool retval = handle_lexicon_name($1);
             free($1);
+            if (!retval)
+              { hlexcerror("Sublexicon defined more than once."); YYABORT; }
           }
           ;
 
@@ -275,24 +309,30 @@ LEXICON_LINE: ULSTRING LEXICON_NAME ';' {
               }
               | ULSTRING ':' ULSTRING
                 LEXICON_NAME ';' {
-                handle_string_pair_entry($1, $3, $4, "");
+                bool retval = handle_string_pair_entry($1, $3, $4, "");
                 free( $1);
                 free( $3);
                 free( $4);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | LEXICON_NAME ';' {
                 handle_string_entry("", $1, "");
                 free( $1);
               }
               | ULSTRING ':' LEXICON_NAME ';' {
-                handle_string_pair_entry($1, "", $3, "");
+                bool retval = handle_string_pair_entry($1, "", $3, "");
                 free( $1);
                 free( $3);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | ':' ULSTRING LEXICON_NAME ';' {
-                handle_string_pair_entry("", $2, $3, "");
+                bool retval = handle_string_pair_entry("", $2, $3, "");
                 free( $2);
                 free( $3);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | ':' LEXICON_NAME ';' {
                 handle_string_entry("", $2, "");
@@ -306,11 +346,13 @@ LEXICON_LINE: ULSTRING LEXICON_NAME ';' {
               }
               | ULSTRING ':' ULSTRING
                 LEXICON_NAME ENTRY_GLOSS ';' {
-                handle_string_pair_entry($1, $3, $4, $5);
+                bool retval = handle_string_pair_entry($1, $3, $4, $5);
                 free( $1);
                 free( $3);
                 free( $4);
                 free( $5);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | LEXICON_NAME ENTRY_GLOSS ';' {
                 handle_string_entry("", $1, $2);
@@ -318,16 +360,20 @@ LEXICON_LINE: ULSTRING LEXICON_NAME ';' {
                 free( $2);
               }
               | ULSTRING ':' LEXICON_NAME ENTRY_GLOSS ';' {
-                handle_string_pair_entry($1, "", $3, $4);
+                bool retval = handle_string_pair_entry($1, "", $3, $4);
                 free( $1);
                 free( $3);
                 free( $4);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | ':' ULSTRING LEXICON_NAME ENTRY_GLOSS ';' {
-                handle_string_pair_entry("", $2, $3, $4);
+                bool retval = handle_string_pair_entry("", $2, $3, $4);
                 free( $2);
                 free( $3);
                 free( $4);
+                if (!retval)
+                  { hlexcerror("Erroneous string pair entry."); YYABORT; }
               }
               | ':' LEXICON_NAME ENTRY_GLOSS ';' {
                 handle_string_entry("", $2, $3);
@@ -363,7 +409,7 @@ void
 hlexcerror(const char* text)
 {
     hfst::lexc::error_at_current_token(0, 0, text);
+    hlexcnerrs++;
 }
 
-
 // vim: set ft=yacc:
diff --git a/libhfst/src/parsers/pmatch_lex.ll b/libhfst/src/parsers/pmatch_lex.ll
index ae88028..e13e3d7 100644
--- a/libhfst/src/parsers/pmatch_lex.ll
+++ b/libhfst/src/parsers/pmatch_lex.ll
@@ -53,6 +53,9 @@ UINTEGER [1-9][0-9]*
 INTEGER -?[1-9][0-9]*
 WSP [\t ]
 LWSP [\t\r\n ]
+
+HEXCHAR [0-9]|[a-f]
+UNICODE_ESCAPE ("\\u"{HEXCHAR}{HEXCHAR}{HEXCHAR}{HEXCHAR})|("\\U00"{HEXCHAR}{HEXCHAR}{HEXCHAR}{HEXCHAR}{HEXCHAR}{HEXCHAR})
 %%
 
 [Dd]"efine" {
@@ -63,6 +66,10 @@ LWSP [\t\r\n ]
 return REGEX;
 }
 
+"list" {
+    return DEFINED_LIST;
+}
+
 "DefIns" {
     return DEFINS;
 }
@@ -107,6 +114,14 @@ return REGEX;
     return TOUPPER_LEFT;
 }
 
+"UpCase(" {
+    return TOLOWER_LEFT;
+}
+
+"DownCase(" {
+    return TOUPPER_LEFT;
+}
+
 "Ins(" {
     return INS_LEFT;
 }
@@ -147,6 +162,22 @@ return REGEX;
     return MAP_LEFT;
 }
 
+"Lit(" {
+    return LIT_LEFT;
+}
+
+"Lst(" {
+    return LST_LEFT;
+}
+
+"Sigma(" {
+    return SIGMA_LEFT;
+}
+
+"Counter(" {
+    return COUNTER_LEFT;
+}
+
 "~"   { return COMPLEMENT; }
 "\\"  { return TERM_COMPLEMENT; }
 "&"   { return INTERSECTION; }
@@ -285,6 +316,11 @@ return REGEX;
     return CURLY_LITERAL;
 }
 
+"\""(({UNICODE_ESCAPE}|{U8C})"-"({UNICODE_ESCAPE}|{U8C}))+"\"" {
+    pmatchlval.transducer = hfst::pmatch::parse_range(pmatchtext);
+    return CHARACTER_RANGE;
+}
+
 "\""([^"\""]|"\\\"")+"\"" {
     pmatchlval.label = hfst::pmatch::parse_quoted(pmatchtext); 
     return QUOTED_LITERAL;
diff --git a/libhfst/src/parsers/pmatch_parse.yy b/libhfst/src/parsers/pmatch_parse.yy
index e3d5b40..b64c70c 100644
--- a/libhfst/src/parsers/pmatch_parse.yy
+++ b/libhfst/src/parsers/pmatch_parse.yy
@@ -49,6 +49,7 @@
          std::pair<std::string, hfst::HfstTransducer*>* transducerDefinition;
          std::map<std::string, hfst::HfstTransducer>* transducerDefinitions;
          hfst::pmatch::PmatchAstNode* ast_node;
+         std::vector<hfst::pmatch::PmatchAstNode *>* ast_node_vector;
          std::vector<std::string>* string_vector;
     
          std::pair<hfst::xeroxRules::ReplaceArrow, std::vector<hfst::xeroxRules::Rule> >* replaceRuleVectorWithArrow;
@@ -75,7 +76,8 @@
 REGEXP8 REGEXP9 REGEXP10 REGEXP11 REGEXP12 LABEL_PAIR
 REPLACE REGEXP3 FUNCALL MAP FUNCALL_ARG
 %type <label> LABEL
-%type <ast_node> FUNCBODY1 FUNCBODY2 FUNCBODY3 FUNCBODY4 FUNCBODY5 FUNCBODY6 FUNC_LABEL_PAIR
+%type <ast_node> FUNCBODY1 FUNCBODY2 FUNCBODY3 FUNCBODY4 FUNCBODY5 FUNCBODY6 FUNC_LABEL_PAIR FUN_OR_CONTEXT FUN_AND_CONTEXT FUN_CONTEXT_CONDITION FUN_CONTEXT
+%type <ast_node_vector> FUN_CONTEXT_CONDITIONS
 %type <string_vector> ARGLIST
 %type <transducerVector> FUNCALL_ARGLIST
 
@@ -96,6 +98,7 @@ REPLACE REGEXP3 FUNCALL MAP FUNCALL_ARG
 
 %nonassoc <weight> WEIGHT END_OF_WEIGHTED_EXPRESSION
 %nonassoc <label> QUOTED_LITERAL CURLY_LITERAL SYMBOL SYMBOL_WITH_LEFT_PAREN
+%nonassoc <transducer> CHARACTER_RANGE
 
 %left  CROSS_PRODUCT COMPOSITION LENIENT_COMPOSITION INTERSECTION
 %left  CENTER_MARKER MARKUP_MARKER
@@ -140,9 +143,10 @@ PAIR_SEPARATOR_WO_RIGHT PAIR_SEPARATOR_WO_LEFT
 %token EPSILON_TOKEN ANY_TOKEN BOUNDARY_MARKER
 %token LEXER_ERROR
 
-%nonassoc DEFINE REGEX DEFINS DEFFUN ALPHA LOWERALPHA UPPERALPHA NUM PUNCT WHITESPACE
-OPTCAP_LEFT TOLOWER_LEFT TOUPPER_LEFT INS_LEFT DEFINE_LEFT ENDTAG_LEFT LC_LEFT
-RC_LEFT NLC_LEFT NRC_LEFT MAP_LEFT SYM_LEFT OR_LEFT AND_LEFT
+%nonassoc DEFINE REGEX DEFINED_LIST DEFINS DEFFUN ALPHA LOWERALPHA UPPERALPHA
+NUM PUNCT WHITESPACE OPTCAP_LEFT TOLOWER_LEFT TOUPPER_LEFT INS_LEFT DEFINE_LEFT
+ENDTAG_LEFT LC_LEFT RC_LEFT NLC_LEFT NRC_LEFT MAP_LEFT LIT_LEFT LST_LEFT
+SIGMA_LEFT COUNTER_LEFT OR_LEFT AND_LEFT
 %%
 
 
@@ -211,6 +215,12 @@ DEFINITION: DEFINE BINDING { $$ = $2; }
     $2->minimize();
     $$ = new std::pair<std::string, hfst::HfstTransducer*>("TOP", $2);
  }
+| DEFINED_LIST BINDING {
+    $$ = new std::pair<std::string, hfst::HfstTransducer*>($2->first,
+                                                           hfst::pmatch::make_sigma($2->second));
+    delete $2->second;
+    delete $2;
+ }
 | DEFFUN FUNCTION { $$ = $2; }
 ;
 
@@ -297,20 +307,9 @@ FUNCBODY1: FUNCBODY2 { }
 | FUNCBODY1 LENIENT_COMPOSITION FUNCBODY2 {
     $$ = new PmatchAstNode($1, $3, hfst::pmatch::AstLenientCompose);
  }
-| FUNCBODY1 FUN_RIGHT_CONTEXT {
-     $$ = new PmatchAstNode($1, $2, hfst::pmatch::AstConcatenate);
- }
-| FUNCBODY1 FUN_LEFT_CONTEXT {
-    $$ = new PmatchAstNode($2, $1, hfst::pmatch::AstConcatenate);
- }
-| FUNCBODY1 FUN_NEGATIVE_RIGHT_CONTEXT {
-    $$ = new PmatchAstNode($1, $2, hfst::pmatch::AstConcatenate);
- }
-| FUNCBODY1 FUN_NEGATIVE_LEFT_CONTEXT {
-    $$ = new PmatchAstNode($2, $1, hfst::pmatch::AstConcatenate);
- }
 ;
 
+
 FUNCBODY2: FUNCBODY3 { }
 | FUNCBODY2 UNION FUNCBODY3 {
     $$ = new PmatchAstNode($1, $3, hfst::pmatch::AstDisjunct);
@@ -321,30 +320,28 @@ FUNCBODY2: FUNCBODY3 { }
 | FUNCBODY2 MINUS FUNCBODY3 {
     $$ = new PmatchAstNode($1, $3, hfst::pmatch::AstSubtract);
  }
-// | REGEXP5 UPPER_MINUS REGEXP6 {
+// | REGEXP3 UPPER_MINUS REGEXP4 {
 //     pmatcherror("No upper minus");
 //     $$ = $1;
 //     delete $3;
 //  }
-// | REGEXP5 LOWER_MINUS REGEXP6 {
+// | REGEXP3 LOWER_MINUS REGEXP4 {
 //     pmatcherror("No lower minus");
 //     $$ = $1;
 //     delete $3;
 //  }
-// | REGEXP5 UPPER_PRIORITY_UNION REGEXP6 {
+// | REGEXP3 UPPER_PRIORITY_UNION REGEXP4 {
 //     pmatcherror("No upper priority union");
 //     $$ = $1;
 //     delete $3;
 //  }
-// | REGEXP5 LOWER_PRIORITY_UNION REGEXP6 {
+// | REGEXP3 LOWER_PRIORITY_UNION REGEXP3 {
 //     pmatcherror("No lower priority union");
 //     $$ = $1;
 //     delete $3;
 //  }
 ;
 
-
-
 FUNCBODY3: FUNCBODY4 { }
 | FUNCBODY3 FUNCBODY4 {
     $$ = new PmatchAstNode($1, $2, hfst::pmatch::AstConcatenate);
@@ -376,6 +373,18 @@ FUNCBODY4: FUNCBODY5
     $$ = new PmatchAstNode(hfst::pmatch::get_utils()->latin1_whitespace_acceptor);
  }
 | INSERT { $$ = new PmatchAstNode($1); }
+| ANONYMOUS_DEFINITION {
+$$ = new PmatchAstNode($1, hfst::pmatch::AstAddDelimiters);
+// Fixme: no funargs inside this
+}
+| FUNCALL {
+    $$ = new PmatchAstNode($1);
+// Fixme: no funargs inside this
+  }
+| MAP {
+    $$ = new PmatchAstNode($1);
+// Fixme: no funargs inside this
+}
 | FUN_OPTCAP { }
 | FUN_TOUPPER { }
 | FUN_TOLOWER { }
@@ -428,9 +437,6 @@ FUNCBODY6: FUNC_LABEL_PAIR { }
     $$ = new PmatchAstNode($1);
     free($1);
  }
-| BOUNDARY_MARKER {
-    $$ = new PmatchAstNode(new HfstTransducer("@BOUNDARY@", "@BOUNDARY@", hfst::pmatch::format));
-  }
 | ENDTAG_LEFT SYMBOL RIGHT_PARENTHESIS {
     $$ = new PmatchAstNode(hfst::pmatch::make_end_tag($2));
     hfst::pmatch::need_delimiters = true;
@@ -468,20 +474,181 @@ FUNCBODY6: FUNC_LABEL_PAIR { }
     $$ = new PmatchAstNode(hfst::HfstTransducer::read_lexc_ptr($1, hfst::TROPICAL_OPENFST_TYPE, hfst::pmatch::verbose));
     free($1);
   }
+| FUN_CONTEXT_CONDITION { }
+
+
+
+
+;
+
+FUN_CONTEXT_CONDITION:
+FUN_CONTEXT { $$ = $1; hfst::pmatch::need_delimiters = true; }
+| FUN_OR_CONTEXT { }
+| FUN_AND_CONTEXT { }
 ;
 
+FUN_CONTEXT:
+FUN_RIGHT_CONTEXT { }
+| FUN_NEGATIVE_RIGHT_CONTEXT { }
+| FUN_LEFT_CONTEXT { }
+| FUN_NEGATIVE_LEFT_CONTEXT { };
+
+FUN_OR_CONTEXT: OR_LEFT FUN_CONTEXT_CONDITIONS RIGHT_PARENTHESIS
+{
+    $$ = NULL;
+    for (std::vector<PmatchAstNode *>::iterator it = $2->begin();
+         it != $2->end(); ++it) {
+        if ($$ == NULL) {
+            $$ = *it;
+        } else {
+            PmatchAstNode * tmp = $$;
+            $$ = new PmatchAstNode(tmp, *it, hfst::pmatch::AstDisjunct);
+        }
+    }
+    delete $2;
+    // Zero the counter for making minimization
+    // guards for disjuncted negative contexts
+    hfst::pmatch::zero_minimization_guard();
+};
+
+FUN_AND_CONTEXT: AND_LEFT FUN_CONTEXT_CONDITIONS RIGHT_PARENTHESIS
+{
+    $$ = NULL;
+    for (std::vector<PmatchAstNode *>::iterator it = $2->begin();
+         it != $2->end(); ++it) {
+        if ($$ = NULL) {
+            $$ = *it;
+        } else {
+            PmatchAstNode * tmp = $$;
+            $$ = new PmatchAstNode(tmp, *it, hfst::pmatch::AstConcatenate);
+        }
+    }
+    delete $2;
+};
+
+FUN_CONTEXT_CONDITIONS:
+FUN_CONTEXT_CONDITION {
+    $$ = new std::vector<PmatchAstNode *>(1, $1);
+ }
+| FUN_CONTEXT_CONDITION COMMA FUN_CONTEXT_CONDITIONS {
+    $3->push_back($1);
+    $$ = $3;
+ };
+
+
+
+
  FUNC_LABEL_PAIR:
 LABEL {
     $$ = new PmatchAstNode(new HfstTransducer($1, hfst::pmatch::format));
     free($1);
-} |
-CURLY_LITERAL {
+}
+| CURLY_LITERAL {
     HfstTokenizer tok;
     $$ = new PmatchAstNode(new HfstTransducer($1, tok, hfst::pmatch::format));
     free($1);
 }
+| LABEL PAIR_SEPARATOR LABEL {
+    $$ = new PmatchAstNode(new HfstTransducer($1, $3, hfst::pmatch::format));
+    free($1); free($3);
+}
+| ANY_TOKEN PAIR_SEPARATOR ANY_TOKEN {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown, hfst::pmatch::format));
+}
+| LABEL PAIR_SEPARATOR ANY_TOKEN {
+    $$ = new PmatchAstNode(new HfstTransducer($1, hfst::internal_unknown, hfst::pmatch::format));
+    free($1);
+}
+| ANY_TOKEN PAIR_SEPARATOR LABEL {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown, $3, hfst::pmatch::format));
+    free($3);
+}
+| LABEL PAIR_SEPARATOR_WO_RIGHT {
+    $$ = new PmatchAstNode(new HfstTransducer($1, hfst::internal_unknown, hfst::pmatch::format));
+    free($1);
+ }
+| ANY_TOKEN PAIR_SEPARATOR_WO_RIGHT {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown, hfst::internal_unknown,
+                                          hfst::pmatch::format));
+}
+| PAIR_SEPARATOR_WO_LEFT LABEL {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown, $2, hfst::pmatch::format));
+    free($2);
+}
+| PAIR_SEPARATOR_WO_LEFT ANY_TOKEN {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown, hfst::internal_unknown,
+                                              hfst::pmatch::format));
+}
+| PAIR_SEPARATOR_SOLE {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_unknown,
+                                              hfst::pmatch::format));
+  }
+| ANY_TOKEN {
+    $$ = new PmatchAstNode(new HfstTransducer(hfst::internal_identity,
+                                              hfst::pmatch::format));
+  }
+| CURLY_LITERAL PAIR_SEPARATOR CURLY_LITERAL {
+    HfstTokenizer tok;
+    HfstTransducer * left = new HfstTransducer($1, tok, hfst::pmatch::format);
+    HfstTransducer right($3, tok, hfst::pmatch::format);
+    HfstTransducer destroy(hfst::internal_unknown, hfst::internal_epsilon, hfst::pmatch::format);
+    HfstTransducer construct(hfst::internal_epsilon, hfst::internal_unknown, hfst::pmatch::format);
+    left->compose(destroy.repeat_star());
+    left->compose(construct.repeat_star());
+    left->compose(right);
+    $$ = new PmatchAstNode(left);
+    free($1); free($3);
+}
+| LABEL PAIR_SEPARATOR CURLY_LITERAL {
+    HfstTokenizer tok;
+    HfstTransducer * left = new HfstTransducer(
+        $1, hfst::internal_epsilon, hfst::pmatch::format);
+    HfstTransducer right($3, tok, hfst::pmatch::format);
+    HfstTransducer construct(hfst::internal_epsilon, hfst::internal_unknown, hfst::pmatch::format);
+    left->compose(construct.repeat_star());
+    left->compose(right);
+    $$ = new PmatchAstNode(left);
+    free($1); free($3);
+}
+| CURLY_LITERAL PAIR_SEPARATOR LABEL {
+    HfstTokenizer tok;
+    HfstTransducer * left = new HfstTransducer($1, tok, hfst::pmatch::format);
+    HfstTransducer right(hfst::internal_epsilon, $3, hfst::pmatch::format);
+    HfstTransducer destroy(hfst::internal_unknown, hfst::internal_epsilon, hfst::pmatch::format);
+    left->compose(destroy.repeat_star());
+    left->compose(right);
+    $$ = new PmatchAstNode(left);
+    free($1); free($3);
+}
+
 ;
 
+// MAP: MAP_LEFT SYMBOL COMMA READ_TEXT RIGHT_PARENTHESIS {
+//     if (hfst::pmatch::functions.count($2) == 0) {
+//         std::string errstring = "Function not defined: " + std::string($2);
+//         pmatcherror(errstring.c_str());
+//     }
+//     std::vector<string> & callee_args = hfst::pmatch::functions[$2].args;
+//     std::vector<std::vector<std::string> > caller_strings =
+//         hfst::pmatch::read_args($4, callee_args.size());
+//     std::map<std::string, HfstTransducer*> caller_args;
+//     HfstTokenizer tok;
+//     $$ = new HfstTransducer(hfst::pmatch::format);
+//     for (std::vector<std::vector<std::string> >::iterator it =
+//              caller_strings.begin(); it != caller_strings.end(); ++it) {
+//         for (int i = 0; i < it->size(); ++i) {
+//             caller_args[callee_args[i]] = new HfstTransducer(it->at(i), tok, hfst::pmatch::format);
+//         }
+//         $$->disjunct(*hfst::pmatch::functions[$2].evaluate(caller_args));
+//         // Clean up the string transducers we allocated each time 
+//         for (std::map<std::string, HfstTransducer *>::iterator it = caller_args.begin();
+//              it != caller_args.end(); ++it) {
+//             delete it->second;
+//         }
+//         caller_args.clear();
+//     }
+//     $$->minimize();
+// };
 
 REGEXP2: REPLACE
 { }
@@ -1147,8 +1314,13 @@ LABEL_PAIR: LABEL PAIR_SEPARATOR LABEL {
         if (strlen($1) == 0) {
             $$ = new HfstTransducer(hfst::pmatch::format);
         } else {
-            std::string errstring = "Unknown symbol: " + std::string($1);
-            pmatcherror(errstring.c_str());
+            if (hfst::pmatch::verbose) {
+                std::cerr << "Warning: interpreting undefined symbol \"" << $1
+                          << "\" as label on line " << pmatchlineno << "\n";
+            }
+            $$ = new HfstTransducer($1, hfst::pmatch::format);
+//            std::string errstring = "Unknown symbol: " + std::string($1);
+//            pmatcherror(errstring.c_str());
         }
     }
     free($1);
@@ -1223,12 +1395,32 @@ LABEL_PAIR: LABEL PAIR_SEPARATOR LABEL {
     $$ = hfst::pmatch::make_end_tag($2);
     hfst::pmatch::need_delimiters = true;
  }
-
+| CHARACTER_RANGE { $$ = $1; }
+| LST_LEFT REGEXP2 RIGHT_PARENTHESIS
+{
+    if (!hfst::pmatch::flatten) {
+        $$ = hfst::pmatch::make_list($2);
+        free($2);
+    } else {
+        $$ = $2;
+    }
+}
+| SIGMA_LEFT REGEXP2 RIGHT_PARENTHESIS
+{
+    $$ = hfst::pmatch::make_sigma($2);
+    free($2);
+}
+| COUNTER_LEFT SYMBOL RIGHT_PARENTHESIS
+{
+    $$ = hfst::pmatch::make_counter($2);
+    free($2);
+}
 ;
 
 LABEL: QUOTED_LITERAL { }
 | EPSILON_TOKEN { $$ = strdup(hfst::internal_epsilon.c_str()); }
 | BOUNDARY_MARKER { $$ = strdup("@BOUNDARY@"); }
+| LIT_LEFT SYMBOL RIGHT_PARENTHESIS { $$ = $2; }
 ;
 
 CONTEXT_CONDITION:
@@ -1330,6 +1522,9 @@ SYMBOL {
     $$ = new HfstTransducer($1, hfst::pmatch::format);
     free($1);
   }
+| LEFT_BRACKET REGEXP2 RIGHT_BRACKET {
+    $$ = $2;
+}
 ;
 
 MAP: MAP_LEFT SYMBOL COMMA READ_TEXT RIGHT_PARENTHESIS {
@@ -1469,15 +1664,16 @@ FUN_RIGHT_CONTEXT: RC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
                                hfst::pmatch::format),
             $2, hfst::pmatch::AstConcatenate);
     $$ = new PmatchAstNode(rc_entry,
-                                         new HfstTransducer(
-                                             hfst::internal_epsilon,
-                                             hfst::pmatch::RC_EXIT_SYMBOL,
-                                             hfst::pmatch::format),
-                                         hfst::pmatch::AstConcatenate);
+                           new HfstTransducer(
+                               hfst::internal_epsilon,
+                               hfst::pmatch::RC_EXIT_SYMBOL,
+                               hfst::pmatch::format),
+                           hfst::pmatch::AstConcatenate);
  }
 ;
 
 FUN_NEGATIVE_RIGHT_CONTEXT: NRC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
+    HfstTransducer * guard = hfst::pmatch::get_minimization_guard();
     PmatchAstNode * nrc_entry =
         new PmatchAstNode(
             new HfstTransducer(hfst::internal_epsilon,
@@ -1491,11 +1687,12 @@ FUN_NEGATIVE_RIGHT_CONTEXT: NRC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
                                             hfst::pmatch::NRC_EXIT_SYMBOL,
                                             hfst::pmatch::format),
                                         hfst::pmatch::AstConcatenate);
-    $$ = new PmatchAstNode(
+    PmatchAstNode * context = new PmatchAstNode(
         nrc_main_branch,
         new HfstTransducer("@PMATCH_PASSTHROUGH@",
                            hfst::internal_epsilon, hfst::pmatch::format),
         hfst::pmatch::AstDisjunct);
+    $$ = new PmatchAstNode(guard, context, hfst::pmatch::AstConcatenate);
  }
 ;
 
@@ -1516,6 +1713,7 @@ FUN_LEFT_CONTEXT: LC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
 ;
 
 FUN_NEGATIVE_LEFT_CONTEXT: NLC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
+    HfstTransducer * guard = hfst::pmatch::get_minimization_guard();
     PmatchAstNode * reverse = new PmatchAstNode(
         $2, hfst::pmatch::AstReverse);
     
@@ -1529,10 +1727,12 @@ FUN_NEGATIVE_LEFT_CONTEXT: NLC_LEFT FUNCBODY2 RIGHT_PARENTHESIS {
     PmatchAstNode * main_branch = new PmatchAstNode(
         entry, nlc_exit, hfst::pmatch::AstConcatenate);
     
-    $$ = new PmatchAstNode(main_branch,
-                           new HfstTransducer("@PMATCH_PASSTHROUGH@",
-                                              hfst::internal_epsilon, hfst::pmatch::format),
-                           hfst::pmatch::AstDisjunct);
+    PmatchAstNode * context =
+        new PmatchAstNode(main_branch,
+                          new HfstTransducer("@PMATCH_PASSTHROUGH@",
+                                             hfst::internal_epsilon, hfst::pmatch::format),
+                          hfst::pmatch::AstDisjunct);
+    $$ = new PmatchAstNode(guard, context, hfst::pmatch::AstConcatenate);
 }
 ;
 
diff --git a/libhfst/src/parsers/pmatch_utils.cc b/libhfst/src/parsers/pmatch_utils.cc
index 95304fe..4a33c63 100644
--- a/libhfst/src/parsers/pmatch_utils.cc
+++ b/libhfst/src/parsers/pmatch_utils.cc
@@ -12,6 +12,7 @@
 #include "pmatch_utils.h"
 #include "HfstTransducer.h"
 #include "tools/src/HfstUtf8.h"
+#include "implementations/optimized-lookup/pmatch.h"
 
 using std::string;
 using std::map;
@@ -254,6 +255,47 @@ HfstTransducer * make_end_tag(std::string tag)
     return end_tag;
 }
 
+HfstTransducer * make_counter(std::string name)
+{
+    HfstTransducer * counter = new HfstTransducer(
+        hfst::internal_epsilon,
+        "@PMATCH_COUNTER_" + name + "@",
+        format);
+    return counter;
+}
+
+HfstTransducer * make_list(HfstTransducer * t)
+{
+    std::string arc = "@PMATCH_LIST_";
+    hfst::StringSet alphabet = t->get_alphabet();
+    for (hfst::StringSet::const_iterator it = alphabet.begin();
+         it != alphabet.end(); ++it) {
+        if (!hfst_ol::PmatchAlphabet::is_special(*it) &&
+            *it != hfst::internal_epsilon && *it != hfst::internal_unknown &&
+            *it != hfst::internal_identity && *it != hfst::internal_default) {
+            arc.append(*it);
+            arc.append("_");
+        }
+    }
+    arc.append("@");
+    return new HfstTransducer(arc, format);
+}
+
+HfstTransducer * make_sigma(HfstTransducer * t)
+{
+    HfstTransducer * retval =
+        new HfstTransducer(format);
+    hfst::StringSet alphabet = t->get_alphabet();
+    for (hfst::StringSet::const_iterator it = alphabet.begin();
+         it != alphabet.end(); ++it) {
+        if (!hfst_ol::PmatchAlphabet::is_special(*it) &&
+            *it != hfst::internal_epsilon && *it != hfst::internal_unknown &&
+            *it != hfst::internal_identity && *it != hfst::internal_default) {
+            retval->disjunct(HfstTransducer(*it, format));
+        }
+    }
+    return retval;
+}
 
 char * get_delimited(const char *s, char delim_left, char delim_right)
 {
@@ -364,55 +406,37 @@ parse_quoted(const char *s)
               case 'u':
                   if (strlen(p) < 6) {
                       // Can't be a valid escape sequence
-                      *r++ = '\\';
-                      *r++ = 'u';
+                      *r++ = *p;
+                      *r++ = *(p+1);
                       p += 2;
                   } else {
                       char buf[5];
                       memcpy(buf, p+2, 4);
                       buf[4] = '\0';
                       unsigned int codepoint = strtol(buf, NULL, 16);
-                      bool u_parse_err = false;
-                      // The following is adapted from an answer at
-                      // http://stackoverflow.com/questions/4607413/c-library-to-convert-unicode-code-points-to-utf8
-                      // My understanding of the magic numbers:
-                      // 0x80 = 128 = 2^7
-                      // 64 = 2^6, 192 = 2^6 + 2^7
-                      // 0x800 = 2048 = 2^11
-                      // 0x1000 = 2^16 etc.
-                      if (codepoint < 0x80) {
-                          buf[0] = codepoint;
-                          buf[1] = '\0';
-                      } else if (codepoint < 0x800) {
-                          buf[0] = 192 + codepoint / 64;
-                          buf[1] = 128 + codepoint % 64;
-                          buf[2] = '\0';
-                      } else if (codepoint - 0xd800u < 0x800) {
-                          u_parse_err = true;
-                      } else if (codepoint < 0x10000) {
-                          buf[0] = 224 + codepoint / 4096;
-                          buf[1] = 128 + codepoint / 64 % 64;
-                          buf[2] = 128 + codepoint % 64;
-                          buf[3] = '\0';
-                      } else if (codepoint < 0x110000) {
-                          buf[0] = 240 + codepoint / 262144;
-                          buf[1] = 128 + codepoint / 4096 % 64;
-                          buf[2] = 128 + codepoint / 64 % 64;
-                          buf[3] = 128 + codepoint % 64;
-                          buf[4] = '\0';
-                      } else {
-                          u_parse_err = true;
-                      }
-                      if (u_parse_err) {
-                          fprintf(stderr, "PMATCH: Failed to parse unicode codepoint\n");
-                          *r++ = '\0';
-                      } else {
-                          strcpy(r, buf);
-                          r += strlen(buf) + 1;
-                      }
+                      std::string utf8_char = codepoint_to_utf8(codepoint);
+                      strcpy(r, utf8_char.c_str());
+                      r += utf8_char.size() + 1;
                       p += 6;
                   }
                   break;
+              case 'U':
+                  if (strlen(p) < 10) {
+                      // Can't be a valid escape sequence
+                      *r++ = *p;
+                      *r++ = *(p+1);
+                      p += 2;
+                  } else {
+                      char buf[9];
+                      memcpy(buf, p+2, 8);
+                      buf[8] = '\0';
+                      unsigned int codepoint = strtol(buf, NULL, 16);
+                      std::string utf8_char = codepoint_to_utf8(codepoint);
+                      strcpy(r, utf8_char.c_str());
+                      r += utf8_char.size() + 1;
+                      p += 10;
+                  }
+                  break;
               case 'v':
                 *r = '\v';
                 r++;
@@ -456,6 +480,141 @@ parse_quoted(const char *s)
     return rv;
 }
 
+unsigned int next_utf8_to_codepoint(unsigned char **c)
+{
+    unsigned int codepoint = 0;
+    int bytes_in_char = 0;
+    if (**c <= 127) {
+        bytes_in_char = 1;
+        codepoint = **c & 127;
+    } else if ( (**c & (128 + 64)) == (128 + 64) ) {
+        bytes_in_char = 2;
+        codepoint = **c & 31;
+    } else if ( (**c & (128 + 64 + 32)) == (128 + 64 + 32) ) {
+        bytes_in_char = 3;
+        codepoint = **c & 15;
+    } else if ( (**c & (128 + 64 + 32 + 16)) == (128 + 64 + 32 + 16)) {
+        bytes_in_char = 4;
+        codepoint = **c & 7;
+    } else {
+        return 0;
+    }
+    for (int i = 1; i < bytes_in_char; ++i) {
+        codepoint = ((codepoint << 6) | (unsigned long)(*(*c + i) & 63));
+    }
+    *c += bytes_in_char;
+    return codepoint;
+}
+
+std::string codepoint_to_utf8(unsigned int codepoint)
+{
+    char buf[5];
+    bool u_parse_err = false;
+    // The following is adapted from an answer at
+    // http://stackoverflow.com/questions/4607413/c-library-to-convert-unicode-code-points-to-utf8
+    // My understanding of the magic numbers:
+    // 0x80 = 128 = 2^7
+    // 64 = 2^6, 192 = 2^6 + 2^7
+    // 0x800 = 2048 = 2^11
+    // 0x1000 = 2^16 etc.
+    if (codepoint < 0x80) {
+        buf[0] = codepoint;
+        buf[1] = '\0';
+    } else if (codepoint < 0x800) {
+        buf[0] = 192 + codepoint / 64;
+        buf[1] = 128 + codepoint % 64;
+        buf[2] = '\0';
+    } else if (codepoint - 0xd800u < 0x800) {
+        u_parse_err = true;
+    } else if (codepoint < 0x10000) {
+        buf[0] = 224 + codepoint / 4096;
+        buf[1] = 128 + codepoint / 64 % 64;
+        buf[2] = 128 + codepoint % 64;
+        buf[3] = '\0';
+    } else if (codepoint < 0x110000) {
+        buf[0] = 240 + codepoint / 262144;
+        buf[1] = 128 + codepoint / 4096 % 64;
+        buf[2] = 128 + codepoint / 64 % 64;
+        buf[3] = 128 + codepoint % 64;
+        buf[4] = '\0'; 
+    } else {
+        u_parse_err = true;
+    }
+    if (u_parse_err) {
+        return "";
+    } else {
+        return std::string(buf);;
+    }
+}
+
+HfstTransducer * parse_range(const char * s)
+{
+    char * quoted = get_delimited(s, '"');
+    char * orig_quoted = quoted;
+    char ** c = & quoted;
+    unsigned char bytes_in_char;
+    HfstTransducer * retval = new HfstTransducer(format);
+    while (**c != '\0') {
+        unsigned int codepoint1 = 0;
+        unsigned int codepoint2 = 0;
+        if (strlen(*c) >= 6 && **c == '\\' &&
+            (*(*c + 1) == 'u' || *(*c + 1) == 'U')) {
+            // an escape sequence
+            char buf[9];
+            if (*(*c + 1) == 'u') {
+                memcpy(buf, *c+2, 4);
+                buf[4] = '\0';
+                *c += 6;
+            } else {
+                memcpy(buf, *c+2, 8);
+                buf[8] = '\0';
+                *c += 10;
+            }
+            codepoint1 = strtol(buf, NULL, 16);
+        } else {
+            codepoint1 = next_utf8_to_codepoint((unsigned char**) c);
+        }
+        if (**c != '-') {
+            std::string errstring("Could not parse range expression: ");
+            errstring.append(std::string(s));
+            pmatcherror(errstring.c_str());
+        }
+        *c += 1;
+        if (strlen(*c) >= 6 && **c == '\\' &&
+            (*(*c + 1) == 'u' || *(*c + 1) == 'U')) {
+            char buf[9];
+            if (*(*c + 1) == 'u') {
+                memcpy(buf, *c+2, 4);
+                buf[4] = '\0';
+                *c += 6;
+            } else {
+                memcpy(buf, *c+2, 8);
+                buf[8] = '\0';
+                *c += 10;
+            }
+            codepoint2 = strtol(buf, NULL, 16);
+        } else {
+            codepoint2 = next_utf8_to_codepoint((unsigned char**) c);
+        }
+        if (codepoint1 == 0 || codepoint2 == 0) {
+            std::string errstring("Malformed character in range expression: ");
+            errstring.append(std::string(s));
+            pmatcherror(errstring.c_str());
+        }
+        if (codepoint2 < codepoint1) {
+            std::string errstring("Range expression goes from higher to lower: ");
+            errstring.append(std::string(s));
+            pmatcherror(errstring.c_str());
+        }
+        while (codepoint1 <= codepoint2) {
+            retval->disjunct(HfstTransducer(codepoint_to_utf8(codepoint1), format));
+            ++codepoint1;
+        }
+    }
+    free(orig_quoted);
+    return retval;
+}
+
 double
 get_weight(const char *s)
 {
@@ -918,6 +1077,22 @@ HfstTransducer * PmatchAstNode::evaluate(
     } else if (type == AstSymbol) {
         if (funargs.count(symbol) == 1) {
             return new HfstTransducer(*funargs[symbol]);
+        } else if (definitions.count(symbol) == 1) {
+            if (!flatten && def_insed_transducers.count(symbol) == 1) {
+                inserted_transducers.insert(symbol);
+                if (verbose) {
+                    used_definitions.insert(symbol);
+                }
+                return new HfstTransducer(*def_insed_transducers[symbol]);
+            } else {
+                if (verbose) {
+                    std::cerr << "including " <<
+                        definitions[symbol]->get_name() << " with ";
+                    print_size_info(hfst::pmatch::definitions[symbol]);
+                    used_definitions.insert(symbol);
+                }
+                return new HfstTransducer(*hfst::pmatch::definitions[symbol]);
+            }
         } else {
             std::string errstring = "Symbol " + std::string(symbol) + " not found";
             pmatcherror(errstring.c_str());
diff --git a/libhfst/src/parsers/pmatch_utils.h b/libhfst/src/parsers/pmatch_utils.h
index 18f1258..817a76b 100644
--- a/libhfst/src/parsers/pmatch_utils.h
+++ b/libhfst/src/parsers/pmatch_utils.h
@@ -88,6 +88,9 @@ HfstTransducer * add_pmatch_delimiters(HfstTransducer * regex);
  */
 void add_end_tag(HfstTransducer * regex, std::string tag);
 HfstTransducer * make_end_tag(std::string tag);
+HfstTransducer * make_counter(std::string name);
+HfstTransducer * make_list(HfstTransducer * t);
+HfstTransducer * make_sigma(HfstTransducer * t);
 
 /**
  * @brief find first segment from strign @a s delimited by char delim.
@@ -100,6 +103,10 @@ char* unescape_delimited(char *s, char delim);
 
 char* parse_quoted(const char *s);
 
+unsigned int next_utf8_to_codepoint(unsigned char **c);
+std::string codepoint_to_utf8(unsigned int codepoint);
+HfstTransducer * parse_range(const char *s);
+
 int* get_n_to_k(const char* s);
 
 double get_weight(const char* s);
@@ -326,7 +333,7 @@ struct PmatchAstNode {
         transducer(NULL) { }
     
     PmatchAstNode(PmatchAstNode * l,
-              HfstTransducer * r,
+                  HfstTransducer * r,
               PmatchAstOperation o):
     left_child(l),
     right_child(new PmatchAstNode(r)),
diff --git a/libhfst/src/parsers/xre_lex.ll b/libhfst/src/parsers/xre_lex.ll
index 1803eca..79f13cc 100644
--- a/libhfst/src/parsers/xre_lex.ll
+++ b/libhfst/src/parsers/xre_lex.ll
@@ -102,6 +102,8 @@ BRACED      [{]([^}]|[\300-\337].|[\340-\357]..|[\360-\367]...)+[}]
 
 ".o." { CR; return COMPOSITION; }
 ".O." { CR; return LENIENT_COMPOSITION; }
+".m>." { CR; return MERGE_RIGHT_ARROW; }
+".<m." { CR; return MERGE_LEFT_ARROW; }
 ".x." { CR; return CROSS_PRODUCT; }
 ".P." { CR; return UPPER_PRIORITY_UNION; }
 ".p." { CR; return LOWER_PRIORITY_UNION; }
diff --git a/libhfst/src/parsers/xre_parse.yy b/libhfst/src/parsers/xre_parse.yy
index c75168e..2593c0b 100644
--- a/libhfst/src/parsers/xre_parse.yy
+++ b/libhfst/src/parsers/xre_parse.yy
@@ -121,7 +121,7 @@ int xrelex ( YYSTYPE * , yyscan_t );
 %nonassoc <weight> WEIGHT END_OF_WEIGHTED_EXPRESSION
 %nonassoc <label> SYMBOL CURLY_BRACKETS
 
-%left  CROSS_PRODUCT COMPOSITION LENIENT_COMPOSITION INTERSECTION
+%left  CROSS_PRODUCT COMPOSITION LENIENT_COMPOSITION INTERSECTION MERGE_RIGHT_ARROW MERGE_LEFT_ARROW
 %left  CENTER_MARKER MARKUP_MARKER
 %left  SHUFFLE
 %right LEFT_RESTRICTION LEFT_ARROW RIGHT_ARROW LEFT_RIGHT_ARROW
@@ -242,6 +242,23 @@ REGEXP2: REPLACE
             $$ = & $1->lenient_composition(*$3).minimize();
             delete $3;
         }
+       | REGEXP2 MERGE_RIGHT_ARROW REPLACE {
+          try {
+            $$ = hfst::xre::merge_first_to_second($1, $3);
+          }
+          catch (const TransducersAreNotAutomataException & e)
+          {
+            xreerror("Error: transducers must be automata in merge operation.");
+            delete $1;
+            delete $3;
+            YYABORT;
+          }
+          delete $1;
+       }
+       | REGEXP2 MERGE_LEFT_ARROW REPLACE {
+            $$ = hfst::xre::merge_first_to_second($3, $1);
+            delete $3;
+       }
         // substitute
        | SUB1 HALFARC PAIR_SEPARATOR HALFARC COMMA HALFARC PAIR_SEPARATOR HALFARC RIGHT_BRACKET {
             $1->substitute(StringPair($2,$4), StringPair($6,$8));
diff --git a/libhfst/src/parsers/xre_utils.cc b/libhfst/src/parsers/xre_utils.cc
index e9edade..996bfe3 100644
--- a/libhfst/src/parsers/xre_utils.cc
+++ b/libhfst/src/parsers/xre_utils.cc
@@ -70,11 +70,12 @@ namespace hfst
 namespace xre 
 {
 
-char* data;
-std::map<std::string,hfst::HfstTransducer*> definitions;
+  char* data;
+  std::map<std::string,hfst::HfstTransducer*> definitions;
   std::map<std::string,std::string>  function_definitions;
   std::map<std::string,unsigned int> function_arguments;
-char* startptr;
+  std::map<std::string,std::set<string> > symbol_lists;
+  char* startptr; // changed this to an internal variable in compile functions
 hfst::HfstTransducer* last_compiled;
 bool contains_only_comments = false;
 hfst::ImplementationType format;
@@ -438,29 +439,32 @@ HfstTransducer*
 compile(const string& xre, map<string,HfstTransducer*>& defs,
         map<string, string>& func_defs,
         map<string, unsigned int > func_args,
+        map<string, std::set<string> >& lists,
         ImplementationType impl)
 {
     // lock here?
     data = strdup(xre.c_str());
-    startptr = data;
+    // use an internal variable startptr_ instead of global startptr
+    char * startptr_ = data;
     len = strlen(data);
     definitions = defs;
     function_definitions = func_defs;
     function_arguments = func_args;
+    symbol_lists = lists;
     format = impl;
 
     contains_only_comments = false;
 
     yyscan_t scanner;
     xrelex_init(&scanner);
-    YY_BUFFER_STATE bs = xre_scan_string(startptr,scanner);
+    YY_BUFFER_STATE bs = xre_scan_string(startptr_,scanner);
     
     int parse_retval = xreparse(scanner);
 
     xre_delete_buffer(bs,scanner);
     xrelex_destroy(scanner);
 
-    free(startptr);
+    free(startptr_);
     data = 0;
     len = 0;
     if (parse_retval == 0 && !contains_only_comments) // if (yynerrs == 0)
@@ -480,23 +484,26 @@ HfstTransducer*
 compile_first(const string& xre, map<string,HfstTransducer*>& defs,
               map<string, string>& func_defs,
               map<string, unsigned int > func_args,
+              map<string, std::set<string> >& lists,
               ImplementationType impl,
               unsigned int & chars_read)
 {
     // lock here?
     data = strdup(xre.c_str());
-    startptr = data;
+    // use an internal variable startptr_ instead of global startptr
+    char * startptr_ = data;
     len = strlen(data);
     definitions = defs;
     function_definitions = func_defs;
     function_arguments = func_args;
+    symbol_lists = lists;
     format = impl;
 
     contains_only_comments = false;
 
     yyscan_t scanner;
     xrelex_init(&scanner);
-    YY_BUFFER_STATE bs = xre_scan_string(startptr,scanner);
+    YY_BUFFER_STATE bs = xre_scan_string(startptr_,scanner);
 
     bool tmp = hfst::xre::allow_extra_text_at_end;
     hfst::xre::allow_extra_text_at_end = true;
@@ -509,7 +516,8 @@ compile_first(const string& xre, map<string,HfstTransducer*>& defs,
     xre_delete_buffer(bs,scanner);
     xrelex_destroy(scanner);
 
-    free(startptr);
+    free(startptr_);
+
     data = 0;
     len = 0;
     if (parse_retval == 0 && !contains_only_comments) // if (yynerrs == 0)
@@ -725,30 +733,49 @@ xfst_label_to_transducer(const char* input, const char* output)
 {
   HfstTransducer * retval = NULL;
 
-  // non-matching definitions
-  if ( (is_definition(input) || is_definition(output)) && 
-       strcmp(input, output) != 0 )
+  bool input_is_definition = is_definition(input);
+  bool output_is_definition = is_definition(output);
+  bool input_is_unknown = (strcmp(input, hfst::internal_unknown.c_str()) == 0);
+  bool output_is_unknown = (strcmp(output, hfst::internal_unknown.c_str()) == 0);
+
+  // definitions -> use cross-product
+  if (input_is_definition || output_is_definition)
     {
-      // TODO, FIX:
-      //char msg[256];
-      //sprintf(msg, "invalid use of definitions in label %s:%s", 
-      //        get_print_format(input), get_print_format(output));
-      //yyerror(msg);
+      HfstTransducer * tmp = NULL; // temporary transducer for cross-product calculation
+      if (input_is_unknown)
+        {
+          retval = new HfstTransducer(hfst::internal_identity, hfst::xre::format);
+          tmp = expand_definition(output);
+        }
+      else if (output_is_unknown)
+        {
+          tmp = new HfstTransducer(hfst::internal_identity, hfst::xre::format);
+          retval = expand_definition(input);
+        }
+      else // neither is unknown
+        {
+          retval = expand_definition(input);
+          tmp = expand_definition(output);
+        }
+      retval->cross_product(*tmp);
+      delete tmp;
+      return retval;
     }
-  if  (strcmp(input, hfst::internal_unknown.c_str()) == 0 && 
-       strcmp(output, hfst::internal_unknown.c_str()) == 0)
+
+  // no definitions
+  if  (input_is_unknown && output_is_unknown)
     {
       retval = new HfstTransducer(hfst::internal_unknown, hfst::internal_unknown, hfst::xre::format);
       HfstTransducer id(hfst::internal_identity, hfst::internal_identity, hfst::xre::format);
       retval->disjunct(id).minimize();
     }
-  else if (strcmp(input, hfst::internal_unknown.c_str()) == 0)
+  else if (input_is_unknown)
     {
       retval = new HfstTransducer(hfst::internal_unknown, output, hfst::xre::format);
       HfstTransducer output_tr(output, output, hfst::xre::format);
       retval->disjunct(output_tr).minimize();
     }
-  else if (strcmp(output, hfst::internal_unknown.c_str()) == 0)
+  else if (output_is_unknown)
     {
       retval = new HfstTransducer(input, hfst::internal_unknown, hfst::xre::format);
       HfstTransducer input_tr(input, input, hfst::xre::format);
@@ -758,10 +785,6 @@ xfst_label_to_transducer(const char* input, const char* output)
     {
       retval = new HfstTransducer(input, output, hfst::xre::format);
     }
-
-  if (is_definition(input))
-    retval = expand_definition(input); // changed
-
   return retval;
 }
 
@@ -914,6 +937,16 @@ xfst_label_to_transducer(const char* input, const char* output)
     return retval;
   }
 
+  HfstTransducer * merge_first_to_second(HfstTransducer * tr1, HfstTransducer * tr2)
+  {
+    // Merge operation creates an XreCompiler that needs this information below. Otherwise, it will overwrite all this.
+    struct XreConstructorArguments args(hfst::xre::definitions, hfst::xre::function_definitions, hfst::xre::function_arguments, hfst::xre::symbol_lists, hfst::xre::format);
+
+    tr1->minimize();
+    tr2->merge(*tr1, args);
+    return tr2;
+  }
+
   void warn(const char * msg)
   {
     if (!verbose_)
diff --git a/libhfst/src/parsers/xre_utils.h b/libhfst/src/parsers/xre_utils.h
index f90c579..f8a9226 100644
--- a/libhfst/src/parsers/xre_utils.h
+++ b/libhfst/src/parsers/xre_utils.h
@@ -19,6 +19,7 @@ extern size_t len;
 extern std::map<std::string,hfst::HfstTransducer*> definitions;
 extern std::map<std::string,std::string> function_definitions;
 extern std::map<std::string,unsigned int> function_arguments;
+extern std::map<std::string, std::set<std::string> > symbol_lists;
 extern HfstTransducer* last_compiled;
 extern bool contains_only_comments;
 extern ImplementationType format;
@@ -79,6 +80,7 @@ HfstTransducer* compile(const std::string& xre,
                         std::map<std::string,hfst::HfstTransducer*>& defs,
                         std::map<std::string,std::string>& func_defs,
                         std::map<std::string,unsigned int> func_args,
+                        std::map<std::string, std::set<std::string> >& lists,
                         hfst::ImplementationType type);
 
 /**
@@ -88,6 +90,7 @@ HfstTransducer* compile_first(const std::string& xre,
                               std::map<std::string,hfst::HfstTransducer*>& defs,
                               std::map<std::string,std::string>& func_defs,
                               std::map<std::string,unsigned int> func_args,
+                              std::map<std::string, std::set<std::string> >& lists,
                               hfst::ImplementationType type,
                               unsigned int & chars_read);
 
@@ -125,6 +128,8 @@ bool is_valid_function_call(const char * name, const std::vector<HfstTransducer>
 
  HfstTransducer * contains_once_optional(const HfstTransducer * t);
 
+ HfstTransducer * merge_first_to_second(HfstTransducer * tr1, HfstTransducer * tr2);
+
  void warn(const char * msg);
  void warn_about_special_symbols_in_replace(HfstTransducer *t);
  /* Warn about \a symbol if it is of form "@_.*_@" and verbose mode is on. */
diff --git a/test/tools/Makefile.am b/test/tools/Makefile.am
index 794ade4..2dc396f 100644
--- a/test/tools/Makefile.am
+++ b/test/tools/Makefile.am
@@ -299,6 +299,7 @@ LEXC_TXTS=basic.cat-dog-bird.lexc basic.colons.lexc basic.comments.lexc \
 		  basic.two-lexicons.lexc basic.UTF-8.lexc basic.zeros-epsilons.lexc \
 		  cat.lexc hfst.weights.lexc stress.random-lexicons-100.lexc \
 		  xfail.bogus.lexc xfail.ISO-8859-1.lexc xfail.lexicon-semicolon.lexc \
+		  xfail.sublexicon-defined-more-than-once.lexc \
 		  xre.any-variations.lexc xre.at-file.lexc \
 		  xre.automatic-multichar-symbols.lexc xre.basic.lexc \
 		  xre.definitions.lexc xre.months.lexc xre.nested-definitions.lexc \
@@ -306,7 +307,8 @@ LEXC_TXTS=basic.cat-dog-bird.lexc basic.colons.lexc basic.comments.lexc \
 		  xre.star-plus-optional.lexc basic.lowercase-lexicon-end.lexc \
 		  basic.multichar-flag-with-zero.lexc basic.almost-reserved-words.lexc \
 		  basic.regexps.lexc no-newline-before-sublexicon.lexc \
-		  warn.sublexicon-mentioned-but-not-defined.lexc
+		  warn.sublexicon-mentioned-but-not-defined.lexc \
+		  warn.one-sided-flags.lexc
 
 #XFST_TXTS=cat-regex.xfst
 ALL_SRCS=$(FST_TXTS) $(FST_STRINGS) $(FST_PAIRS) $(FST_PAIRSTRINGS) \
@@ -412,7 +414,9 @@ xre.star-plus-optional.lexc.result \
 no-newline-before-sublexicon.lexc.flag.result \
 no-newline-before-sublexicon.lexc.result \
 warn.sublexicon-mentioned-but-not-defined.lexc.flag.result \
-warn.sublexicon-mentioned-but-not-defined.lexc.result
+warn.sublexicon-mentioned-but-not-defined.lexc.result \
+warn.one-sided-flags.lexc.flag.result \
+warn.one-sided-flags.lexc.result
 
 # files needed for test programs
 EXTRA_DIST=empty-file $(FST_TXTS) $(FST_STRINGS) $(FST_PAIRS) $(FST_PAIRSTRINGS) $(FST_SPACESTRINGS) $(SUBSTITUTE_TXTS) $(XRE_TXTS) $(XFST_TXTS) $(TESTS) $(EXTRA_FILES) $(LEXC_TXTS) $(PMATCH_TXTS) $(PMATCHSCRIPTS) script.xfst lexc2fst-stress.sh lookup-stress.sh proc-stress.sh valgrind.sh \
diff --git a/test/tools/lexc-compiler-functionality.sh b/test/tools/lexc-compiler-functionality.sh
index 25883bc..20d8ff8 100755
--- a/test/tools/lexc-compiler-functionality.sh
+++ b/test/tools/lexc-compiler-functionality.sh
@@ -30,9 +30,9 @@ LEXCTESTS="basic.cat-dog-bird.lexc basic.colons.lexc basic.comments.lexc
           
                     
           
-LEXCXFAIL="xfail.bogus.lexc xfail.ISO-8859-1.lexc xfail.lexicon-semicolon.lexc"
+LEXCXFAIL="xfail.bogus.lexc xfail.ISO-8859-1.lexc xfail.lexicon-semicolon.lexc xfail.sublexicon-defined-more-than-once.lexc"
 
-LEXCWARN="warn.sublexicon-mentioned-but-not-defined.lexc"
+LEXCWARN="warn.sublexicon-mentioned-but-not-defined.lexc warn.one-sided-flags.lexc"
 
 if test "$srcdir" = ""; then
     srcdir="./"
diff --git a/test/tools/warn.one-sided-flags.lexc b/test/tools/warn.one-sided-flags.lexc
new file mode 100644
index 0000000..5183fd5
--- /dev/null
+++ b/test/tools/warn.one-sided-flags.lexc
@@ -0,0 +1,7 @@
+Multichar_Symbols
+  @U.FOO.ON@
+  @U.FOO.OFF@
+
+LEXICON Root
+ at U.FOO.ON@foo:bar # ;
+# ;
diff --git a/test/tools/warn.one-sided-flags.lexc.flag.result b/test/tools/warn.one-sided-flags.lexc.flag.result
new file mode 100644
index 0000000..fb4d840
Binary files /dev/null and b/test/tools/warn.one-sided-flags.lexc.flag.result differ
diff --git a/test/tools/warn.one-sided-flags.lexc.result b/test/tools/warn.one-sided-flags.lexc.result
new file mode 100644
index 0000000..4250076
Binary files /dev/null and b/test/tools/warn.one-sided-flags.lexc.result differ
diff --git a/test/tools/xfail.sublexicon-defined-more-than-once.lexc b/test/tools/xfail.sublexicon-defined-more-than-once.lexc
new file mode 100644
index 0000000..aa21b15
--- /dev/null
+++ b/test/tools/xfail.sublexicon-defined-more-than-once.lexc
@@ -0,0 +1,15 @@
+LEXICON Root
+Noun ;
+Verb ;
+
+LEXICON Noun
+cat #;
+dog #;
+
+LEXICON Verb
+mew #;
+bark #;
+
+LEXICON Noun
+cat #;
+dog #;
diff --git a/tools/src/HfstStrings2FstTokenizer.cc b/tools/src/HfstStrings2FstTokenizer.cc
index ef59041..5ea13ab 100644
--- a/tools/src/HfstStrings2FstTokenizer.cc
+++ b/tools/src/HfstStrings2FstTokenizer.cc
@@ -7,8 +7,10 @@ HfstStrings2FstTokenizer::HfstStrings2FstTokenizer
   eps(eps)
 {
   // \: \\ \<space> and eps are special cases.
-  add_multichar_symbol( eps );
-
+  if (!eps.empty())
+    {
+      add_multichar_symbol( eps );
+    }
 
   tokenizer.add_multichar_symbol( BACKSLASH COL );
   tokenizer.add_multichar_symbol( BACKSLASH SPACE );
@@ -122,10 +124,10 @@ StringPairVector HfstStrings2FstTokenizer::make_pair_vector
       std::string output_symbol = unescape(*output_it);
 
       spv.push_back
-	(StringPair(input_symbol.empty() or input_symbol == eps ? 
-		    EPSILON_SYMBOL : input_symbol,
-		    output_symbol.empty() or output_symbol == eps ? 
-		    EPSILON_SYMBOL : output_symbol)); 
+        (StringPair(input_symbol.empty() or input_symbol == eps ? 
+                    EPSILON_SYMBOL : input_symbol,
+                    output_symbol.empty() or output_symbol == eps ? 
+                    EPSILON_SYMBOL : output_symbol)); 
       ++input_it;
       ++output_it;
     }
@@ -173,7 +175,7 @@ std::string HfstStrings2FstTokenizer::unescape(std::string symbol)
 
   pos = 0;
   while ((pos = symbol.find(TAB_ESCAPE)) != std::string::npos)
-    { symbol.replace(pos,strlen(TAB_ESCAPE),"	"); }
+    { symbol.replace(pos,strlen(TAB_ESCAPE),"   "); }
   
   pos = 0;
   while ((pos = symbol.find(COL_ESCAPE)) != std::string::npos)
@@ -277,7 +279,7 @@ void test_ps
        it != spv.end();
        ++it)
     { 
-      if (it->first != it->second)	
+      if (it->first != it->second)      
     { std::cout << it->first << " : " << it->second << std::endl; }
       else
     { std::cout << it->first << std::endl; }
@@ -295,7 +297,7 @@ void test_sp
        it != spv.end();
        ++it)
     { 
-      if (it->first != it->second)	
+      if (it->first != it->second)      
     { std::cout << it->first << " : " << it->second << std::endl; }
       else
     { std::cout << it->first << std::endl; }
diff --git a/tools/src/hfst-compose.cc b/tools/src/hfst-compose.cc
index f6fec45..86c7208 100644
--- a/tools/src/hfst-compose.cc
+++ b/tools/src/hfst-compose.cc
@@ -139,8 +139,11 @@ compose_streams(HfstInputStream& firststream, HfstInputStream& secondstream,
     size_t transducer_n_first = 0; // transducers read from first stream
     size_t transducer_n_second = 0; // transducers read from second stream
     while (continueReading) {
-        first = new HfstTransducer(firststream);
-        transducer_n_first++;
+        if (firststream.is_good())
+          {
+            first = new HfstTransducer(firststream);
+            transducer_n_first++;
+          }
         if (secondstream.is_good())
           {
             second = new HfstTransducer(secondstream);
@@ -194,19 +197,42 @@ compose_streams(HfstInputStream& firststream, HfstInputStream& secondstream,
                   hfst_strformat(secondstream.get_type()));
           }
 
-        continueReading = firststream.is_good() && 
-          (secondstream.is_good() || transducer_n_second == 1);
+        continueReading = 
+          (firststream.is_good() && secondstream.is_good())  ||
+          (firststream.is_good() && (transducer_n_second == 1))  ||
+          ((transducer_n_first == 1) && secondstream.is_good());
 
-        delete first;
-        first=0;
-        // delete the transducer of second stream, unless we continue reading
-        // the first stream and there is only one transducer in the second 
-        // stream
-        if ((continueReading && secondstream.is_good()) || not continueReading)
+        if (!continueReading)
           {
+            delete first;
             delete second;
-            second=0;
           }
+        else
+          {
+            if (firststream.is_good())
+              {
+                delete first;
+              }
+            if (secondstream.is_good())
+              {
+                delete second;
+              }
+          }
+
+        //continueReading = firststream.is_good() && 
+        //  (secondstream.is_good() || transducer_n_second == 1);
+
+        //delete first;
+        //first=0;
+        // delete the transducer of second stream, unless we continue reading
+        // the first stream and there is only one transducer in the second 
+        // stream
+        //if ((continueReading && secondstream.is_good()) || not continueReading)
+        //  {
+        //    delete second;
+        //    second=0;
+        //  }
+
         free(firstname);
         free(secondname);
     }
@@ -221,12 +247,14 @@ compose_streams(HfstInputStream& firststream, HfstInputStream& secondstream,
       }
 
     if (secondstream.is_good())
-    {
-      error(EXIT_FAILURE, 0, 
-            "first input '%s' contains fewer transducers than second input"
-            " '%s'",
-            firstfilename, secondfilename);
-    }
+      {
+        error(EXIT_FAILURE, 0, 
+              "first input '%s' contains fewer transducers than second input"
+              " '%s'; this is only possible if the first input contains"
+              " exactly one transducer", 
+              firstfilename, secondfilename);
+      }
+
     firststream.close();
     secondstream.close();
     outstream.close();
diff --git a/tools/src/hfst-fst2strings.cc b/tools/src/hfst-fst2strings.cc
index 5d7a985..b2a9ae3 100644
--- a/tools/src/hfst-fst2strings.cc
+++ b/tools/src/hfst-fst2strings.cc
@@ -563,8 +563,8 @@ process_stream(HfstInputStream& instream, std::ostream& outstream)
     verbose_printf("Printed %i random string(s)\n", cb.count);
       }
 
-    if (print_separator_after_each_transducer)
-      outstream << "--" << std::endl;
+    //if (print_separator_after_each_transducer)
+    //  outstream << "--" << std::endl;
   }
     
   instream.close();
diff --git a/tools/src/hfst-lexc-compiler.cc b/tools/src/hfst-lexc-compiler.cc
index bcf6c40..aa64185 100644
--- a/tools/src/hfst-lexc-compiler.cc
+++ b/tools/src/hfst-lexc-compiler.cc
@@ -60,6 +60,8 @@ static bool minimize_flags = false;
 static bool rename_flags = false;
 static bool treat_warnings_as_errors = false;
 static bool xerox_composition = true;  // Compatibility with Xerox tools is the default
+static bool encode_weights = false;
+static bool enc = false;
 
 void
 print_usage()
@@ -73,6 +75,7 @@ print_usage()
                "  -f, --format=FORMAT     compile into FORMAT transducer\n"
                "  -o, --output=OUTFILE    write result into OUTFILE\n");
         fprintf(message_out, "Lexc options:\n"
+               "  -E, --encode-weights    encode weights when minimizing (default is false)\n"
                "  -F, --withFlags         use flags to hyperminimize result\n"
                "  -M, --minimizeFlags     if --withFlags is used, minimize the number of flags\n"
                "  -R, --renameFlags       if --withFlags and --minimizeFlags are used, rename\n"
@@ -122,6 +125,7 @@ parse_options(int argc, char** argv)
         static const struct option long_options[] =
         {
           HFST_GETOPT_COMMON_LONG,
+          {"encode-weights", no_argument, 0, 'E'},
           {"format", required_argument, 0, 'f'},
           {"output", required_argument, 0, 'o'},
           {"withFlags", no_argument,    0, 'F'},
@@ -134,7 +138,7 @@ parse_options(int argc, char** argv)
         };
         int option_index = 0;
         char c = getopt_long(argc, argv, HFST_GETOPT_COMMON_SHORT
-                             "f:o:FMRx:X:W",
+                             "Ef:o:FMRx:X:W",
                              long_options, &option_index);
         if (-1 == c)
         {
@@ -143,6 +147,9 @@ parse_options(int argc, char** argv)
         switch (c)
         {
 #include "inc/getopt-cases-common.h"
+        case 'E':
+          encode_weights = true;
+          break;
         case 'f':
           format = hfst_parse_format_name(optarg);
           break;
@@ -276,6 +283,12 @@ lexc_streams(LexcCompiler& lexc, HfstOutputStream& outstream)
     verbose_printf("done\n");
     delete res;
     outstream.close();
+
+    if (encode_weights)
+      {
+        hfst::set_encode_weights(enc);
+      }
+
     return EXIT_SUCCESS;
 }
 
@@ -304,6 +317,13 @@ int main( int argc, char **argv ) {
     {
         fclose(outfile);
     }
+
+    enc = hfst::get_encode_weights();
+    if (encode_weights)
+      {
+        hfst::set_encode_weights(true);
+      }
+
     verbose_printf("Reading from ");
     for (unsigned int i = 0; i < lexccount; i++)
       {
@@ -321,11 +341,11 @@ int main( int argc, char **argv ) {
    // lexc.with_flags_ = with_flags;
     if (silent)
       {
-        lexc.setVerbosity(false);
+        lexc.setVerbosity(0);
       }
     else
       {
-        lexc.setVerbosity(verbose);
+        lexc.setVerbosity(verbose ? 2 : 1);
       }
     if (treat_warnings_as_errors)
       {
diff --git a/tools/src/hfst-pmatch.cc b/tools/src/hfst-pmatch.cc
index cf223b9..d9e3d1d 100644
--- a/tools/src/hfst-pmatch.cc
+++ b/tools/src/hfst-pmatch.cc
@@ -51,6 +51,7 @@ using std::pair;
 bool blankline_separated = true;
 bool extract_tags = false;
 bool locate_mode = false;
+bool profile = false;
 std::string pmatch_filename;
 
 void
@@ -64,7 +65,8 @@ print_usage()
     fprintf(message_out,
             "  -n  --newline          Newline as input separator (default is blank line)\n"
             "  -x  --extract-tags     Only print tagged parts in output\n"
-            "  -l  --locate           Only print locations of matches\n");
+            "  -l  --locate           Only print locations of matches\n"
+            "  -p  --profile          Produce profiling data\n");
     fprintf(message_out, 
             "Use standard streams for input and output.\n"
             "\n"
@@ -121,6 +123,9 @@ int process_input(hfst_ol::PmatchContainer & container,
     if (blankline_separated && !input_text.empty()) {
         match_and_print(container, outstream, input_text);
     }
+    if (profile) {
+        outstream << "\n" << container.get_profiling_info() << "\n";
+    }
     return EXIT_SUCCESS;
 }
 
@@ -137,6 +142,7 @@ int parse_options(int argc, char** argv)
                 {"newline", no_argument, 0, 'n'},
                 {"extract-tags", no_argument, 0, 'x'},
                 {"locate", no_argument, 0, 'l'},
+                {"profile", no_argument, 0, 'p'},
                 {0,0,0,0}
             };
         int option_index = 0;
@@ -160,6 +166,9 @@ int parse_options(int argc, char** argv)
         case 'l':
             locate_mode = true;
             break;
+        case 'p':
+            profile = true;
+            break;
 #include "inc/getopt-cases-error.h"
         }
 
@@ -212,7 +221,11 @@ int main(int argc, char ** argv)
         std::cerr << "Could not open file " << pmatch_filename << std::endl;
         return EXIT_FAILURE;
     }
-    hfst_ol::PmatchContainer container(instream, verbose, extract_tags);
+    hfst_ol::PmatchContainer container(instream);
+    container.set_verbose(verbose);
+// the locate_mode bool in this tool only affects its own processing
+    container.set_locate_mode(extract_tags);
+    container.set_profile(profile);
 //     if (outfile != stdout) {
 //         std::filebuf fb;
 // fb.open(outfilename, std::ios::out);
diff --git a/tools/src/hfst-proc2.cc b/tools/src/hfst-proc2.cc
index 46c2e9a..8a257f4 100644
--- a/tools/src/hfst-proc2.cc
+++ b/tools/src/hfst-proc2.cc
@@ -213,7 +213,8 @@ int main(int argc, char ** argv)
         std::cerr << "Could not open file " << tokenizer_filename << std::endl;
         return EXIT_FAILURE;
     }
-    hfst_ol::PmatchContainer container(instream, verbose);
+    hfst_ol::PmatchContainer container(instream);
+    container.set_verbose(verbose);
 //     if (outfile != stdout) {
 //         std::filebuf fb;
 // fb.open(outfilename, std::ios::out);
diff --git a/tools/src/hfst-summarize.cc b/tools/src/hfst-summarize.cc
index 63ef7bd..ec4b1a5 100644
--- a/tools/src/hfst-summarize.cc
+++ b/tools/src/hfst-summarize.cc
@@ -54,6 +54,8 @@ using hfst::StringSet;
 #include "inc/globals-unary.h"
 
 // add tools-specific variables here
+static bool print_symbol_pair_statistics = false;
+static int symbol_pair_threshold = -1;
 
 void
 print_usage()
@@ -66,6 +68,9 @@ print_usage()
     print_common_program_options(message_out);
     print_common_unary_program_options(message_out);
     // fprintf(message_out, (tool-specific options and short descriptions)
+    fprintf(message_out, "Summarize options:\n");
+    fprintf(message_out, "  -p, --print-symbol-pair-statistics=N  Print info about symbol pairs that occur\n");
+    fprintf(message_out, "                                        at most N times (default is infinity)\n");
     fprintf(message_out, "\n");
     print_common_unary_program_parameter_instructions(message_out);
     fprintf(message_out, "\n");
@@ -89,13 +94,14 @@ parse_options(int argc, char** argv)
         {
         HFST_GETOPT_COMMON_LONG,
         HFST_GETOPT_UNARY_LONG,
-          // add tool-specific options here 
-            {0,0,0,0}
+        // add tool-specific options here 
+        {"print-symbol-pair-statistics", optional_argument, 0, 'S'},
+        {0,0,0,0}
         };
         int option_index = 0;
         // add tool-specific options here 
         char c = getopt_long(argc, argv, HFST_GETOPT_COMMON_SHORT
-                             HFST_GETOPT_UNARY_SHORT,
+                             HFST_GETOPT_UNARY_SHORT "S::",
                              long_options, &option_index);
         if (-1 == c)
         {
@@ -107,6 +113,25 @@ parse_options(int argc, char** argv)
 #include "inc/getopt-cases-common.h"
 #include "inc/getopt-cases-unary.h"
           // add tool-specific cases here
+        case 'S':
+          print_symbol_pair_statistics = true ;
+          if (optarg != NULL)
+            {
+              if (optarg[0] == '=')
+                {
+                  optarg++;
+                }
+              symbol_pair_threshold = hfst_strtoul(optarg, 10);
+              if (symbol_pair_threshold < 0)
+                {
+                  error(EXIT_FAILURE, 0, "%u is not a valid argument for option --print-symbol-pair-statistics\n", symbol_pair_threshold);
+                }
+              if (symbol_pair_threshold == 0)
+                {
+                  error(EXIT_FAILURE, 0, "0 is not a valid argument for option --print-symbol-pair-statistics\n");
+                }
+            }
+          break;
 #include "inc/getopt-cases-error.h"
         }
     }
@@ -208,6 +233,8 @@ process_stream(HfstInputStream& instream)
           is_mutable = false;
           break;
         }
+
+      std::map<std::pair<std::string, std::string>,unsigned int> symbol_pairs;
       // iterate states in random order
       HfstState source_state=0;
       for (HfstBasicTransducer::const_iterator it = mutt->begin();
@@ -232,6 +259,13 @@ process_stream(HfstInputStream& instream)
               arcs_here++;
               foundAlphabet.insert(tr_it->get_input_symbol());
               foundAlphabet.insert(tr_it->get_output_symbol());
+
+              // ADDED
+              if (print_symbol_pair_statistics)
+                {
+                  symbol_pairs[std::pair<std::string,std::string>(tr_it->get_input_symbol(), tr_it->get_output_symbol())]++;
+                }
+
               if (tr_it->get_input_symbol() != tr_it->get_output_symbol())
                 {
                   acceptor = false;
@@ -521,6 +555,27 @@ process_stream(HfstInputStream& instream)
                 }
               fprintf(outfile, "\n");
             }
+          // ADDED
+          if (print_symbol_pair_statistics) 
+            {
+              if (symbol_pair_threshold > -1)
+                {
+                  fprintf(outfile, "symbol pairs that occur at most %u times:\n", symbol_pair_threshold);
+                }
+              else
+                {
+                  fprintf(outfile, "symbol pairs:\n");
+                }
+              for (std::map<std::pair<std::string,std::string>,unsigned int>::const_iterator it = symbol_pairs.begin(); it != symbol_pairs.end(); it++)
+                {
+                  if (it->second <= symbol_pair_threshold)
+                    {
+                      fprintf(outfile, "%s:%s\t%u\n", it->first.first.c_str(), it->first.second.c_str(), it->second);
+                    }
+                }
+              fprintf(outfile, "\n");
+            }
+
           delete trans;
         }
     }
diff --git a/tools/src/parsers/XfstCompiler.cc b/tools/src/parsers/XfstCompiler.cc
index dae3c9e..31344e3 100644
--- a/tools/src/parsers/XfstCompiler.cc
+++ b/tools/src/parsers/XfstCompiler.cc
@@ -20,11 +20,9 @@
 
 #include <string>
 #include <map>
-#include <list>
 #include <queue>
 #include <stack>
 
-using std::list;
 using std::string;
 using std::map;
 using std::queue;
@@ -999,13 +997,13 @@ namespace xfst {
           {
             hfst_fprintf(errorstream_, "unsupported unicode range %s-%s\n", start, end);
           }
-        list<string> l;
+        std::set<string> l;
         for (char c = *start; c < *end; c++)
           {
             char *s = static_cast<char*>(malloc(sizeof(char)*2));
             *s = c;
             *(s+1) = '\0';
-            l.push_back(s);
+            l.insert(s);
           }
         lists_[name] = l;
         return *this;
@@ -1022,16 +1020,17 @@ namespace xfst {
             MAYBE_QUIT;
             PROMPT_AND_RETURN_THIS;
           }
-        list<string> l;
+        std::set<string> l;
         char* p = strdup(values);
         char* token = strtok(p, " ");
         while (token != NULL)
           {
-            l.push_back(token);
+            l.insert(token);
             token = strtok(NULL, " ");
           }
         free(p);
         lists_[name] = l;
+        xre_.define_list(name, l);
         PROMPT_AND_RETURN_THIS;
       }
 
@@ -2528,31 +2527,43 @@ namespace xfst {
   XfstCompiler&
   XfstCompiler::print_list(const char* name, FILE* outfile)
     {
-      list<string> l = lists_[name];
-      hfst_fprintf(outfile, "%10s:", name);
-      for (list<string>::const_iterator s = l.begin();
+      if (lists_.find(name) == lists_.end())
+        {
+          hfst_fprintf(outfile, "No such list defined: %s\n", name);
+          PROMPT_AND_RETURN_THIS;
+        }
+      std::set<string> l = lists_[name];
+      hfst_fprintf(outfile, "%10s: ", name);
+      for (std::set<string>::const_iterator s = l.begin();
            s != l.end();
            ++s)
         {
           hfst_fprintf(outfile, "%s ", s->c_str());
         }
+      hfst_fprintf(outfile, "\n");
       PROMPT_AND_RETURN_THIS;
     }
 
   XfstCompiler& 
   XfstCompiler::print_list(FILE* outfile)
     {
-      for (map<string,list<string> >::const_iterator l = lists_.begin();
+      if (lists_.size() == 0)
+        {
+          hfst_fprintf(outfile, "No lists defined.\n");
+          PROMPT_AND_RETURN_THIS;
+        }
+      for (map<string,std::set<string> >::const_iterator l = lists_.begin();
            l != lists_.end();
            ++l)
         {
-          hfst_fprintf(outfile, "%10s:", l->first.c_str());
-          for (list<string>::const_iterator s = l->second.begin();
+          hfst_fprintf(outfile, "%10s: ", l->first.c_str());
+          for (std::set<string>::const_iterator s = l->second.begin();
                s != l->second.end();
                ++s)
             {
               hfst_fprintf(outfile, "%s ", s->c_str());
             }
+          hfst_fprintf(outfile, "\n");
         }
       PROMPT_AND_RETURN_THIS;
     }
@@ -3585,12 +3596,6 @@ namespace xfst {
         switch (operation)
           {
           case INTERSECT_NET:
-            /*{
-              HfstBasicTransducer basic(*t);
-              HfstBasicTransducer merge_tr = HfstBasicTransducer::merge(fsm, basic);
-              fprintf(stderr, "result of merge is:\n");
-              merge_tr.write_in_att_format(stderr);
-              }*/
             result->intersect(*t);
             break;
           case IGNORE_NET:
@@ -4183,6 +4188,7 @@ namespace xfst {
       PROMPT_AND_RETURN_THIS;
     }
 
+  // Returns an automaton that contains one ore more "^[" "^]" expressions.
   static HfstTransducer * contains_regexps(hfst::xre::XreCompiler & xre_)
   {
     HfstTransducer * not_bracket_star = xre_.compile("[? - \"^[\" - \"^]\"]* ;");
@@ -4194,6 +4200,22 @@ namespace xfst {
     return well_formed;
   }
 
+  static HfstTransducer * contains_regexp_markers_on_one_side(hfst::xre::XreCompiler & xre_, bool input_side)
+  {
+    HfstTransducer * retval = NULL;
+    if (input_side)
+      {
+        retval = xre_.compile("[?:?|0:?|?:0]* [\"^[\":? | \"^]\":? | \"^[\":0 | \"^]\":0] [?:?|0:?|?:0]*");
+      }
+    else // output side
+      {
+        retval = xre_.compile("[?:?|0:?|?:0]* [?:\"^[\" | ?:\"^]\" | 0:\"^[\" | 0:\"^]\"] [?:?|0:?|?:0]*");
+      }
+    assert(retval != NULL);
+    return retval;
+  }
+
+  // @pre \a t must be an automaton
   static bool is_well_formed_for_compile_replace(const HfstTransducer * t, hfst::xre::XreCompiler & xre_)
   {
     HfstTransducer * well_formed = contains_regexps(xre_);
@@ -4212,23 +4234,65 @@ namespace xfst {
     return value;
   }
 
-  static std::string to_literal_regexp(const hfst::StringPairVector & path)
+  static std::string to_literal_regexp(const hfst::StringPairVector & path, bool input_side)
   {
     std::string pathstr("[ ");
     for (hfst::StringPairVector::const_iterator it = path.begin(); it != path.end(); it++)
       {
-        pathstr.append("\"").append(it->first).append("\" ");
+        std::string symbol = (input_side) ? it->first : it->second ;
+        pathstr.append("\"").append(symbol).append("\" ");
       }
     pathstr.append("]");
     return pathstr;
   }
 
-  static std::string to_regexp(const hfst::StringPairVector & path)
+  static HfstTransducer * to_literal_transducer(const hfst::StringPairVector & path, hfst::xre::XreCompiler & xre_)
   {
     std::string pathstr("[ ");
     for (hfst::StringPairVector::const_iterator it = path.begin(); it != path.end(); it++)
       {
-        pathstr.append(it->first).append(" ");
+        std::string isymbol = it->first;
+        if (isymbol == hfst::internal_epsilon)
+          {
+            pathstr.append("0");
+          }
+        else
+          {
+            pathstr.append("\"").append(isymbol).append("\"");
+          }
+        
+        pathstr.append(":");
+
+        std::string osymbol = it->second;
+        if (osymbol == hfst::internal_epsilon)
+          {
+            pathstr.append("0 ");
+          }
+        else
+          {
+            pathstr.append("\"").append(osymbol).append("\" ");
+          }
+      }
+    pathstr.append("];");
+    // debug
+    //std::cerr << "to_literal_transducer: compiling expression: " << pathstr << std::endl;
+    char * p = strdup(pathstr.c_str());
+    HfstTransducer * retval = xre_.compile(p);
+    free(p);
+    return retval;
+  }
+
+  static std::string to_regexp(const hfst::StringPairVector & path, bool input_side)
+  {
+    std::string pathstr("[ ");
+    for (hfst::StringPairVector::const_iterator it = path.begin(); it != path.end(); it++)
+      {
+        std::string symbol = (input_side) ? it->first : it->second ;
+        // ignore "^[" and "^]"
+        if (symbol != "^]" && symbol != "^[")
+          {
+            pathstr.append(symbol).append(" ");
+          }
       }
     pathstr.append("]");
     return pathstr;
@@ -4243,18 +4307,35 @@ namespace xfst {
       using hfst::implementations::HfstReplacementsMap;
 
       GET_TOP(tmp);
-      if (is_well_formed_for_compile_replace(tmp, xre_))
+      HfstTransducer tmp_cp(*tmp);
+
+      if (level == UPPER_LEVEL)
+        {
+          tmp_cp.input_project();
+        }
+      else // LOWER_LEVEL
+        {
+          tmp_cp.output_project();
+        }
+
+      if (is_well_formed_for_compile_replace(&tmp_cp, xre_))
         {
-          fprintf(stderr, "Network is well-formed.\n");
+          if (verbose_)
+            fprintf(stderr, "Network is well-formed.\n");
         }
       else
         {
-          fprintf(stderr, "Network is not well-formed.\n");
+          if (verbose_)
+            fprintf(stderr, "Network is not well-formed.\n");
+          xfst_lesser_fail();
+          prompt();
+          return *this;
         }
+
       HfstBasicTransducer fsm(*tmp);
       try 
         {
-          HfstReplacementsMap replacement_map = fsm.find_replacements();
+          HfstReplacementsMap replacement_map = fsm.find_replacements((level == UPPER_LEVEL)); // input_side
           
             for (HfstReplacementsMap::const_iterator it = replacement_map.begin();
                  it != replacement_map.end(); it++)
@@ -4268,22 +4349,54 @@ namespace xfst {
                    std::string CPR(""); // Cross-Product Regexp
                    if (level == LOWER_LEVEL)
                      {
-                       CPR = to_literal_regexp(rit->second) + std::string(" .x. ") + to_regexp(rit->second);
-                       CPR = std::string("\"^[\":0") + std::string(" [") + CPR + std::string("] ") + std::string("\"^]\":0 ;");
+                       CPR = to_literal_regexp(rit->second, false /*output side*/) + std::string(" .x. ") + to_regexp(rit->second, false /*output side*/);
+                       //CPR = std::string("\"^[\":0") + std::string(" [") + CPR + std::string("] ") + std::string("\"^]\":0 ;");
+                       CPR = std::string("[") + CPR + std::string("] ;");
                      }
-                   else
+                   else // UPPER_LEVEL
                      {
-                       CPR = to_regexp(rit->second) + std::string(" .x. ") + to_literal_regexp(rit->second);
-                       CPR = std::string("0:\"^[\"") + std::string(" [") + CPR + std::string("] ") + std::string("0:\"^]\" ;");
+                       CPR = to_regexp(rit->second, true /*input side*/) + std::string(" .x. ") + to_literal_regexp(rit->second, true /*input side*/);
+                       //CPR = std::string("0:\"^[\"") + std::string(" [") + CPR + std::string("] ") + std::string("0:\"^]\" ;");
+                       CPR = std::string("[") + CPR + std::string("] ;");
                      }
                    char * cpr = strdup(CPR.c_str());
-                   fprintf(stderr, "compiling replacement '%s'...\n", cpr);
+                   // debug
+                   //fprintf(stderr, "compiling replacement '%s'...\n", cpr);
                    HfstTransducer * replacement = xre_.compile(cpr);
-                   assert(replacement != NULL); // todo
+                   if (replacement == NULL)
+                     {
+                       fprintf(stderr, "Could not compile regular expression in compile-replace: %s.\n", cpr);
+                       xfst_lesser_fail();
+                       prompt();
+                       return *this;
+                     }
                    replacement->minimize();
+
+                   // debug
+                   //std::cerr << "replacement is:" << std::endl << *replacement << std::endl;
+
+                   // compose with opposite level
+                   if (level == UPPER_LEVEL)
+                     {
+                       HfstTransducer * original_path = to_literal_transducer(rit->second, xre_);
+                       original_path->minimize();
+                       replacement->compose(*original_path);
+                       delete original_path;
+                       replacement->minimize();
+                     }
+                   else // LOWER_LEVEL
+                     {
+                       HfstTransducer * original_path = to_literal_transducer(rit->second, xre_);
+                       original_path->minimize();
+                       original_path->compose(*replacement);
+                       original_path->minimize();
+                       delete replacement;
+                       replacement = original_path; 
+                     }
+
                    HfstBasicTransducer repl(*replacement);
                    // DEBUG
-                   std::cerr << "inserting transducer:" << std::endl << *replacement << std::endl << "between states " << start_state << " and " << end_state << "." << std::endl ;
+                   //std::cerr << "inserting transducer:" << std::endl << *replacement << std::endl << "between states " << start_state << " and " << end_state << "." << std::endl ;
                    delete replacement;
                    fsm.insert_transducer(start_state, end_state, repl);
                   }
@@ -4295,9 +4408,16 @@ namespace xfst {
         }
       HfstTransducer * result = new HfstTransducer(fsm, format_);
 
-      std::cerr << "result from compile-replace is:" << std::endl << *result << std::endl;
+      // debug
+      //std::cerr << "result from compile-replace before filtering is:" << std::endl << *result << std::endl;
+
+      // filter out regexps (todo: possible that there are regexps on opposite side)
+      HfstTransducer * cr = contains_regexp_markers_on_one_side(xre_, (level == UPPER_LEVEL) /*input side*/);
+      cr->minimize();
+
+      // debug
+      //std::cerr << "filter is:" << std::endl << *cr << std::endl;
 
-      HfstTransducer * cr = contains_regexps(xre_);
       result->subtract(*cr).minimize();
       delete cr;
       stack_.pop();
diff --git a/tools/src/parsers/XfstCompiler.h b/tools/src/parsers/XfstCompiler.h
index 7495231..e73e2b8 100644
--- a/tools/src/parsers/XfstCompiler.h
+++ b/tools/src/parsers/XfstCompiler.h
@@ -29,7 +29,6 @@
 #endif
 
 #include <string>
-#include <list>
 #include <map>
 #include <stack>
 
@@ -670,7 +669,7 @@ class XfstCompiler
   std::map<std::string,std::string> aliases_;
   std::map<std::string,std::string> variables_;
   std::map<std::string,std::string> properties_;
-  std::map<std::string,std::list<string> > lists_;
+  std::map<std::string,std::set<string> > lists_;
   hfst::HfstTransducer* last_defined_;
   hfst::ImplementationType format_;
   bool verbose_;
diff --git a/tools/src/parsers/hfst-xfst.cc b/tools/src/parsers/hfst-xfst.cc
index cef05da..3b1af24 100644
--- a/tools/src/parsers/hfst-xfst.cc
+++ b/tools/src/parsers/hfst-xfst.cc
@@ -187,6 +187,7 @@ int parse_file(const char* filename, hfst::xfst::XfstCompiler &comp)
       error(EXIT_FAILURE, 0, "error when reading file %s\n", filename);
       return EXIT_FAILURE;
     }
+
   if (0 != comp.parse_line(line))
     {
       error(EXIT_FAILURE, 0, "error when parsing file %s\n", filename);
diff --git a/tools/src/parsers/test/Makefile.am b/tools/src/parsers/test/Makefile.am
index 46d332d..3fbbab3 100644
--- a/tools/src/parsers/test/Makefile.am
+++ b/tools/src/parsers/test/Makefile.am
@@ -21,6 +21,7 @@ EXTRA_DIST=test.sh \
 	reverse_net.xfst reverse_net.att \
 	upper_side_net.xfst upper_side_net.att \
 	lower_side_net.xfst lower_side_net.att \
+	one_transition_regex.xfst one_transition_regex.att \
 	substitute_defined.xfst substitute_defined.att \
 	substitute_symbol_1.xfst substitute_symbol_1.att \
 	substitute_symbol_2.xfst substitute_symbol_2.att \
@@ -42,6 +43,8 @@ EXTRA_DIST=test.sh \
 	substitute_defined_4.xfst substitute_defined_4.att \
 	substitute_defined_5.xfst substitute_defined_5.att \
 	substitute_defined_6.xfst substitute_defined_6.att \
+	merge.xfst merge.att \
+	merge_weighted.xfst merge_weighted.att \
 	replace_identity.xfst replace_identity.att \
 	quoted_literals.xfst quoted_literals.att \
 	define.xfst define.att \
@@ -162,5 +165,8 @@ EXTRA_DIST=test.sh \
 	weighted_parallel_rules_9.xfst weighted_parallel_rules_9.output \
 	weighted_parallel_rules_10.xfst weighted_parallel_rules_10.output \
 	weighted_parallel_rules_11.xfst weighted_parallel_rules_11.output \
-	xerox_composition.xfst xerox_composition.output
+	xerox_composition.xfst xerox_composition.output \
+	compile_replace_1.xfst compile_replace_1.output \
+	compile_replace_2.xfst compile_replace_2.output \
+	compile_replace_3.xfst compile_replace_3.output
 check_DATA=test.sh
diff --git a/tools/src/parsers/test/compile_replace_1.output b/tools/src/parsers/test/compile_replace_1.output
new file mode 100644
index 0000000..a3cabe7
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_1.output
@@ -0,0 +1,8 @@
+match
+match
+match
+match
+???
+???
+???
+???
diff --git a/tools/src/parsers/test/compile_replace_1.xfst b/tools/src/parsers/test/compile_replace_1.xfst
new file mode 100644
index 0000000..0d88fbd
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_1.xfst
@@ -0,0 +1,12 @@
+regex "^[":m a "+":t "^]":c 0:h ;
+compile-replace upper
+apply up a
+apply up aa
+apply up aaa
+apply up aaaaaaaaa
+apply up b
+apply up ab
+apply up ba
+apply up abba
+
+
diff --git a/tools/src/parsers/test/compile_replace_2.output b/tools/src/parsers/test/compile_replace_2.output
new file mode 100644
index 0000000..a3cabe7
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_2.output
@@ -0,0 +1,8 @@
+match
+match
+match
+match
+???
+???
+???
+???
diff --git a/tools/src/parsers/test/compile_replace_2.xfst b/tools/src/parsers/test/compile_replace_2.xfst
new file mode 100644
index 0000000..ee916d5
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_2.xfst
@@ -0,0 +1,10 @@
+regex m:"^[" a t:"+" c:"^]" h:0 ;
+compile-replace lower
+apply down a
+apply down aa
+apply down aaa
+apply down aaaaaaaaa
+apply down b
+apply down ab
+apply down ba
+apply down abba
diff --git a/tools/src/parsers/test/compile_replace_3.output b/tools/src/parsers/test/compile_replace_3.output
new file mode 100644
index 0000000..10dc3a2
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_3.output
@@ -0,0 +1,17 @@
+^[a+^]
+^[a+^]
+^[a+^]
+^[a+^]
+???
+???
+???
+???
+--
+^[a+^]
+^[a+^]
+^[a+^]
+^[a+^]
+???
+???
+???
+???
diff --git a/tools/src/parsers/test/compile_replace_3.xfst b/tools/src/parsers/test/compile_replace_3.xfst
new file mode 100644
index 0000000..0eb6ea5
--- /dev/null
+++ b/tools/src/parsers/test/compile_replace_3.xfst
@@ -0,0 +1,21 @@
+regex "^[" a "+" "^]" ;
+compile-replace upper
+apply up a
+apply up aa
+apply up aaa
+apply up aaaaaaaaa
+apply up b
+apply up ab
+apply up ba
+apply up abba
+echo --
+regex "^[" a "+" "^]" ;
+compile-replace lower
+apply down a
+apply down aa
+apply down aaa
+apply down aaaaaaaaa
+apply down b
+apply down ab
+apply down ba
+apply down abba
diff --git a/tools/src/parsers/test/merge.att b/tools/src/parsers/test/merge.att
new file mode 100644
index 0000000..53b30c2
--- /dev/null
+++ b/tools/src/parsers/test/merge.att
@@ -0,0 +1,39 @@
+0	1	k	k
+1	2	a	a
+2	3	t	t
+3	4	a	a
+4	5	b	b
+5	6	i	i
+6
+--
+0	1	k	k
+1	2	a	a
+2	3	t	t
+3	4	a	a
+4	5	b	b
+5	6	i	i
+6
+--
+0	1	k	k
+1	2	a	a
+2	3	t	t
+3	4	a	a
+4	5	b	b
+5	6	i	i
+6
+--
+0	1	k	k
+1	2	a	a
+2	3	t	t
+3	4	a	a
+4	5	b	b
+5	6	i	i
+6
+--
+0	1	k	k
+1	2	a	a
+2	3	t	t
+3	4	a	a
+4	5	b	b
+5	6	i	i
+6
diff --git a/tools/src/parsers/test/merge.xfst b/tools/src/parsers/test/merge.xfst
new file mode 100644
index 0000000..089e6f8
--- /dev/null
+++ b/tools/src/parsers/test/merge.xfst
@@ -0,0 +1,17 @@
+list V a e i o u ;
+regex [a+ i] .m>. {kVtVbV} ;
+write att
+echo --
+list C k t b ;
+regex [k t b] .m>. {CaCaCi} ;
+write att
+echo --
+regex [k t b] .m>. [ [a+ i] .m>. {CVCVCV} ] ;
+write att
+echo --
+regex [a+ i] .m>. [ [k t b] .m>. {CVCVCV} ] ;
+write att
+echo --
+regex [ {CVCVCV} .<m. [k t b] ] .<m. [a+ i] ;
+write att
+quit
diff --git a/tools/src/parsers/test/merge_weighted.att b/tools/src/parsers/test/merge_weighted.att
new file mode 100644
index 0000000..3761a51
--- /dev/null
+++ b/tools/src/parsers/test/merge_weighted.att
@@ -0,0 +1,39 @@
+0	1	k	k	7.000000
+1	2	a	a	0.000000
+2	3	t	t	0.000000
+3	4	a	a	0.000000
+4	5	b	b	0.000000
+5	6	i	i	0.000000
+6	0.000000
+--
+0	1	k	k	11.000000
+1	2	a	a	0.000000
+2	3	t	t	0.000000
+3	4	a	a	0.000000
+4	5	b	b	0.000000
+5	6	i	i	0.000000
+6	0.000000
+--
+0	1	k	k	3.000000
+1	2	a	a	0.000000
+2	3	t	t	0.000000
+3	4	a	a	0.000000
+4	5	b	b	0.000000
+5	6	i	i	0.000000
+6	0.000000
+--
+0	1	k	k	2.000000
+1	2	a	a	0.000000
+2	3	t	t	0.000000
+3	4	a	a	0.000000
+4	5	b	b	0.000000
+5	6	i	i	0.000000
+6	0.000000
+--
+0	1	k	k	4.000000
+1	2	a	a	0.000000
+2	3	t	t	0.000000
+3	4	a	a	0.000000
+4	5	b	b	0.000000
+5	6	i	i	0.000000
+6	0.000000
diff --git a/tools/src/parsers/test/merge_weighted.xfst b/tools/src/parsers/test/merge_weighted.xfst
new file mode 100644
index 0000000..4f2779b
--- /dev/null
+++ b/tools/src/parsers/test/merge_weighted.xfst
@@ -0,0 +1,18 @@
+set print-weight ON
+list V a e i o u ;
+regex [[a::1]+ i::5] .m>. {kVtVbV} ;
+write att
+echo --
+list C k t b ;
+regex [k::1 t::3 b::7] .m>. {CaCaCi} ;
+write att
+echo --
+regex [k t::2 b] .m>. [ [a+ i::1] .m>. {CVCVCV} ] ;
+write att
+echo --
+regex [[a::0.5]+ i] .m>. [ [k::0.3 t b::0.7] .m>. {CVCVCV} ] ;
+write att
+echo --
+regex [ {CVCVCV} .<m. [k t b]::1 ] .<m. [a+ i]::3 ;
+write att
+quit
diff --git a/tools/src/parsers/test/one_transition_regex.att b/tools/src/parsers/test/one_transition_regex.att
new file mode 100644
index 0000000..1016f8a
--- /dev/null
+++ b/tools/src/parsers/test/one_transition_regex.att
@@ -0,0 +1,10 @@
+0	1	x	y
+1
+--
+0	1	x	@_UNKNOWN_SYMBOL_@
+0	1	x	x
+1
+--
+0	1	@_UNKNOWN_SYMBOL_@	y
+0	1	y	y
+1
diff --git a/tools/src/parsers/test/one_transition_regex.xfst b/tools/src/parsers/test/one_transition_regex.xfst
new file mode 100644
index 0000000..309bd31
--- /dev/null
+++ b/tools/src/parsers/test/one_transition_regex.xfst
@@ -0,0 +1,11 @@
+define foo x ;
+define bar y ;
+regex foo:bar ;
+write att
+echo --
+regex foo:? ;
+write att
+echo --
+regex ?:bar ;
+write att
+quit
diff --git a/tools/src/parsers/test/test.sh b/tools/src/parsers/test/test.sh
index cc38eff..9f82ab7 100755
--- a/tools/src/parsers/test/test.sh
+++ b/tools/src/parsers/test/test.sh
@@ -99,7 +99,7 @@ do
         substitute_defined_1 substitute_defined_2 substitute_defined_3 \
         substitute_defined_4 substitute_defined_5 substitute_defined_6 \
         at_re_1 at_re_2 at_re_3 at_txt at_stxt at_txt_and_stxt at_pl \
-        quoted_literals replace_identity
+        quoted_literals replace_identity one_transition_regex merge
         # substitute_symbol_6 fails on sfst
         # angle_brackets omitted, since xfst and foma handle them differently
     do
@@ -122,6 +122,31 @@ do
 	fi
     done
 
+    ## The same as above but only for openfst format
+    if [ "$format" = "openfst-tropical" ]; then
+        for testfile in merge_weighted
+        do
+	    rm -f result result1 result2
+	    if ! (ls $testfile.xfst 2> /dev/null); then
+	        echo "skipping missing test for "$testfile"..."
+	        continue
+	    fi
+	    if ! (cat $testfile.xfst | ../hfst-xfst --pipe-mode -q -f $format > result 2> /dev/null); then
+	        echo "ERROR: in compiling "$testfile".xfst"
+	        exit 1;
+	    fi
+	    if ! (cat result | ${TXT2FST} > tmp1; cat $testfile.att | ${TXT2FST} > tmp2; ); then
+	        echo "ERROR: in compiling "$testfile".att"
+	        exit 1;
+	    fi
+	    if ! (${COMPARE} tmp1 tmp2); then
+	        echo "ERROR: "$testfile" test failed"
+	        exit 1;
+	    fi
+        done
+    fi
+
+
     ## Test that testfile_fail fails.
     #for testfile in define_fail
     #do
@@ -142,7 +167,8 @@ do
     ## Test that the result of testfile.xfst (written to standard output)
     ## is the same as testfile.output
     for testfile in print_stack print_labels print_label_tally \
-	shortest_string set_variable info print_net eliminate_flag empty_context xerox_composition
+	shortest_string set_variable info print_net eliminate_flag empty_context xerox_composition \
+        compile_replace_1 compile_replace_2 compile_replace_3 
     do
 	if ! (ls $testfile.xfst 2> /dev/null); then
 	    echo "skipping missing test for "$testfile"..."

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/hfst.git



More information about the debian-science-commits mailing list