[python-pynlpl] 01/02: New upstream version 1.2.5

Maarten van Gompel proycon-guest at moszumanska.debian.org
Wed Dec 6 21:18:31 UTC 2017


This is an automated email from the git hooks/post-receive script.

proycon-guest pushed a commit to branch master
in repository python-pynlpl.

commit dc6199653df028334bc59b6867f7450f4dd633ad
Author: proycon <proycon at anaproy.nl>
Date:   Wed Dec 6 22:16:34 2017 +0100

    New upstream version 1.2.5
---
 PKG-INFO                                           |   24 +-
 PyNLPl.egg-info/PKG-INFO                           |   24 +-
 PyNLPl.egg-info/SOURCES.txt                        |   31 +-
 PyNLPl.egg-info/requires.txt                       |    4 +-
 README.rst                                         |   21 +-
 pynlpl/__init__.py                                 |    2 +-
 pynlpl/algorithms.py                               |   11 +
 pynlpl/build/pynlpl/algorithms.py                  |   54 -
 pynlpl/docs/conf.py                                |  199 --
 pynlpl/evaluation.py                               |   43 +-
 pynlpl/formats/folia.py                            |  754 ++++++--
 pynlpl/formats/fql.py                              |   18 +-
 pynlpl/tests/FoLiA/foliatools/__init__.py          |    0
 pynlpl/tests/FoLiA/foliatools/alpino2folia.py      |  175 ++
 pynlpl/tests/FoLiA/foliatools/cgn2folia.py         |   85 +
 pynlpl/tests/FoLiA/foliatools/dcoi2folia.py        |   21 +
 .../tests/FoLiA/foliatools/folia2annotatedtxt.py   |  252 +++
 pynlpl/tests/FoLiA/foliatools/folia2columns.py     |  298 +++
 pynlpl/tests/FoLiA/foliatools/folia2dcoi.py        |   21 +
 pynlpl/tests/FoLiA/foliatools/folia2html.py        |   23 +
 pynlpl/tests/FoLiA/foliatools/folia2rst.py         |  161 ++
 pynlpl/tests/FoLiA/foliatools/folia2txt.py         |  186 ++
 pynlpl/tests/FoLiA/foliatools/foliacat.py          |  115 ++
 pynlpl/tests/FoLiA/foliatools/foliacorrect.py      |  175 ++
 pynlpl/tests/FoLiA/foliatools/foliacount.py        |  174 ++
 pynlpl/tests/FoLiA/foliatools/foliafreqlist.py     |  162 ++
 pynlpl/tests/FoLiA/foliatools/foliaid.py           |  145 ++
 pynlpl/tests/FoLiA/foliatools/foliamerge.py        |  178 ++
 pynlpl/tests/FoLiA/foliatools/foliaquery.py        |  172 ++
 pynlpl/tests/FoLiA/foliatools/foliaquery1.py       |  180 ++
 .../tests/FoLiA/foliatools/foliasetdefinition.py   |   93 +
 pynlpl/tests/FoLiA/foliatools/foliaspec.py         |  563 ++++++
 pynlpl/tests/FoLiA/foliatools/foliaspec2json.py    |   34 +
 pynlpl/tests/FoLiA/foliatools/foliatextcontent.py  |  359 ++++
 pynlpl/tests/FoLiA/foliatools/foliatree.py         |  179 ++
 pynlpl/tests/FoLiA/foliatools/foliavalidator.py    |  150 ++
 pynlpl/tests/FoLiA/foliatools/rst2folia.py         |  538 ++++++
 pynlpl/tests/FoLiA/foliatools/xslt.py              |  124 ++
 pynlpl/tests/FoLiA/schemas/generaterng.py          |   47 +
 pynlpl/tests/FoLiA/setup.py                        |   63 +
 pynlpl/tests/evaluation.py                         |   15 +-
 pynlpl/tests/folia.py                              | 1979 +++++++++++++++++---
 pynlpl/tests/fql.py                                |   34 +-
 requirements.txt                                   |    3 +
 setup.cfg                                          |    1 -
 setup.py                                           |   20 +-
 46 files changed, 7162 insertions(+), 748 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 3a329b7..57d2c7b 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: PyNLPl
-Version: 1.1.2
+Version: 1.2.5
 Summary: PyNLPl, pronounced as 'pineapple', is a Python library for Natural Language Processing. It contains various modules useful for common, and less common, NLP tasks. PyNLPl contains modules for basic tasks, clients for interfacting with server, and modules for parsing several file formats common in NLP, most notably FoLiA.
 Home-page: https://github.com/proycon/pynlpl
 Author: Maarten van Gompel
 Author-email: proycon at anaproy.nl
 License: GPL
+Description-Content-Type: UNKNOWN
 Description: PyNLPl - Python Natural Language Processing Library
         =====================================================
         
@@ -43,7 +44,7 @@ Description: PyNLPl - Python Natural Language Processing Library
           documents in `FoLiA <http://proycon.github.io/folia>`_ format (Format for Linguistic Annotation).
         - ``pynlpl.formats.fql`` - Extensive library for the FoLiA Query Language (FQL),
           built on top of ``pynlpl.formats.folia``. FQL is currently documented `here
-          <https://github.com/proycon/foliadocserve>`__. 
+          <https://github.com/proycon/foliadocserve>`__.
         - ``pynlpl.formats.cql`` - Parser for the Corpus Query Language (CQL), as also used by
           Corpus Workbench and Sketch Engine. Contains a convertor to FQL.
         - ``pynlpl.formats.giza`` - Module for reading GIZA++ word alignment data
@@ -58,8 +59,25 @@ Description: PyNLPl - Python Natural Language Processing Library
           beam-search, hill climbing, A star, various variants of each)
         - ``pynlpl.statistics`` - Frequency lists, Levenshtein, common statistics and
           information theory functions
-        - ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction 
+        - ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction
         
+        Installation
+        --------------------
+        
+        Download and install the latest stable version directly from the Python Package
+        Index with ``pip install pynlpl`` (or ``pip3`` for Python 3 on most
+        systems). For global installations prepend ``sudo``.
+        
+        Alternatively, clone this repository and run ``python setup.py install`` (or
+        ``python3 setup.py install`` for Python 3 on most system. Prepend ``sudo`` for
+        global installations.
+        
+        This software may also be found in the certain Linux distributions, such as
+        the latest versions as Debian/Ubuntu, as ``python-pynlpl`` and ``python3-pynlpl``.
+        PyNLPL is also included in our `LaMachine <http://proycon.github.io/LaMachine>`_ distribution.
+        
+        Documentation
+        --------------------
         
         API Documentation can be found `here <http://pynlpl.readthedocs.io/en/latest/>`__.
         
diff --git a/PyNLPl.egg-info/PKG-INFO b/PyNLPl.egg-info/PKG-INFO
index 3a329b7..57d2c7b 100644
--- a/PyNLPl.egg-info/PKG-INFO
+++ b/PyNLPl.egg-info/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: PyNLPl
-Version: 1.1.2
+Version: 1.2.5
 Summary: PyNLPl, pronounced as 'pineapple', is a Python library for Natural Language Processing. It contains various modules useful for common, and less common, NLP tasks. PyNLPl contains modules for basic tasks, clients for interfacting with server, and modules for parsing several file formats common in NLP, most notably FoLiA.
 Home-page: https://github.com/proycon/pynlpl
 Author: Maarten van Gompel
 Author-email: proycon at anaproy.nl
 License: GPL
+Description-Content-Type: UNKNOWN
 Description: PyNLPl - Python Natural Language Processing Library
         =====================================================
         
@@ -43,7 +44,7 @@ Description: PyNLPl - Python Natural Language Processing Library
           documents in `FoLiA <http://proycon.github.io/folia>`_ format (Format for Linguistic Annotation).
         - ``pynlpl.formats.fql`` - Extensive library for the FoLiA Query Language (FQL),
           built on top of ``pynlpl.formats.folia``. FQL is currently documented `here
-          <https://github.com/proycon/foliadocserve>`__. 
+          <https://github.com/proycon/foliadocserve>`__.
         - ``pynlpl.formats.cql`` - Parser for the Corpus Query Language (CQL), as also used by
           Corpus Workbench and Sketch Engine. Contains a convertor to FQL.
         - ``pynlpl.formats.giza`` - Module for reading GIZA++ word alignment data
@@ -58,8 +59,25 @@ Description: PyNLPl - Python Natural Language Processing Library
           beam-search, hill climbing, A star, various variants of each)
         - ``pynlpl.statistics`` - Frequency lists, Levenshtein, common statistics and
           information theory functions
-        - ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction 
+        - ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction
         
+        Installation
+        --------------------
+        
+        Download and install the latest stable version directly from the Python Package
+        Index with ``pip install pynlpl`` (or ``pip3`` for Python 3 on most
+        systems). For global installations prepend ``sudo``.
+        
+        Alternatively, clone this repository and run ``python setup.py install`` (or
+        ``python3 setup.py install`` for Python 3 on most system. Prepend ``sudo`` for
+        global installations.
+        
+        This software may also be found in the certain Linux distributions, such as
+        the latest versions as Debian/Ubuntu, as ``python-pynlpl`` and ``python3-pynlpl``.
+        PyNLPL is also included in our `LaMachine <http://proycon.github.io/LaMachine>`_ distribution.
+        
+        Documentation
+        --------------------
         
         API Documentation can be found `here <http://pynlpl.readthedocs.io/en/latest/>`__.
         
diff --git a/PyNLPl.egg-info/SOURCES.txt b/PyNLPl.egg-info/SOURCES.txt
index e9d9144..1575b22 100644
--- a/PyNLPl.egg-info/SOURCES.txt
+++ b/PyNLPl.egg-info/SOURCES.txt
@@ -1,6 +1,7 @@
 LICENSE
 MANIFEST.in
 README.rst
+requirements.txt
 setup.cfg
 setup.py
 PyNLPl.egg-info/PKG-INFO
@@ -21,12 +22,10 @@ pynlpl/search.py
 pynlpl/statistics.py
 pynlpl/tagger.py
 pynlpl/textprocessors.py
-pynlpl/build/pynlpl/algorithms.py
 pynlpl/clients/__init__.py
 pynlpl/clients/cornetto.py
 pynlpl/clients/freeling.py
 pynlpl/clients/frogclient.py
-pynlpl/docs/conf.py
 pynlpl/formats/__init__.py
 pynlpl/formats/cgn.py
 pynlpl/formats/cql.py
@@ -60,6 +59,34 @@ pynlpl/tests/search.py
 pynlpl/tests/statistics.py
 pynlpl/tests/test.sh
 pynlpl/tests/textprocessors.py
+pynlpl/tests/FoLiA/setup.py
+pynlpl/tests/FoLiA/foliatools/__init__.py
+pynlpl/tests/FoLiA/foliatools/alpino2folia.py
+pynlpl/tests/FoLiA/foliatools/cgn2folia.py
+pynlpl/tests/FoLiA/foliatools/dcoi2folia.py
+pynlpl/tests/FoLiA/foliatools/folia2annotatedtxt.py
+pynlpl/tests/FoLiA/foliatools/folia2columns.py
+pynlpl/tests/FoLiA/foliatools/folia2dcoi.py
+pynlpl/tests/FoLiA/foliatools/folia2html.py
+pynlpl/tests/FoLiA/foliatools/folia2rst.py
+pynlpl/tests/FoLiA/foliatools/folia2txt.py
+pynlpl/tests/FoLiA/foliatools/foliacat.py
+pynlpl/tests/FoLiA/foliatools/foliacorrect.py
+pynlpl/tests/FoLiA/foliatools/foliacount.py
+pynlpl/tests/FoLiA/foliatools/foliafreqlist.py
+pynlpl/tests/FoLiA/foliatools/foliaid.py
+pynlpl/tests/FoLiA/foliatools/foliamerge.py
+pynlpl/tests/FoLiA/foliatools/foliaquery.py
+pynlpl/tests/FoLiA/foliatools/foliaquery1.py
+pynlpl/tests/FoLiA/foliatools/foliasetdefinition.py
+pynlpl/tests/FoLiA/foliatools/foliaspec.py
+pynlpl/tests/FoLiA/foliatools/foliaspec2json.py
+pynlpl/tests/FoLiA/foliatools/foliatextcontent.py
+pynlpl/tests/FoLiA/foliatools/foliatree.py
+pynlpl/tests/FoLiA/foliatools/foliavalidator.py
+pynlpl/tests/FoLiA/foliatools/rst2folia.py
+pynlpl/tests/FoLiA/foliatools/xslt.py
+pynlpl/tests/FoLiA/schemas/generaterng.py
 pynlpl/tests/evaluation_timbl/test
 pynlpl/tests/evaluation_timbl/test.IB1.O.gr.k1.out
 pynlpl/tests/evaluation_timbl/timbltest.sh
diff --git a/PyNLPl.egg-info/requires.txt b/PyNLPl.egg-info/requires.txt
index 5e36e8a..f469dc7 100644
--- a/PyNLPl.egg-info/requires.txt
+++ b/PyNLPl.egg-info/requires.txt
@@ -1,3 +1,3 @@
-lxml >= 2.2
-httplib2 >= 0.6
+lxml>=2.2
+httplib2>=0.6
 rdflib
diff --git a/README.rst b/README.rst
index 126734f..6a03e92 100644
--- a/README.rst
+++ b/README.rst
@@ -35,7 +35,7 @@ The following modules are available:
   documents in `FoLiA <http://proycon.github.io/folia>`_ format (Format for Linguistic Annotation).
 - ``pynlpl.formats.fql`` - Extensive library for the FoLiA Query Language (FQL),
   built on top of ``pynlpl.formats.folia``. FQL is currently documented `here
-  <https://github.com/proycon/foliadocserve>`__. 
+  <https://github.com/proycon/foliadocserve>`__.
 - ``pynlpl.formats.cql`` - Parser for the Corpus Query Language (CQL), as also used by
   Corpus Workbench and Sketch Engine. Contains a convertor to FQL.
 - ``pynlpl.formats.giza`` - Module for reading GIZA++ word alignment data
@@ -50,8 +50,25 @@ The following modules are available:
   beam-search, hill climbing, A star, various variants of each)
 - ``pynlpl.statistics`` - Frequency lists, Levenshtein, common statistics and
   information theory functions
-- ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction 
+- ``pynlpl.textprocessors`` - Simple tokeniser, n-gram extraction
 
+Installation
+--------------------
+
+Download and install the latest stable version directly from the Python Package
+Index with ``pip install pynlpl`` (or ``pip3`` for Python 3 on most
+systems). For global installations prepend ``sudo``.
+
+Alternatively, clone this repository and run ``python setup.py install`` (or
+``python3 setup.py install`` for Python 3 on most system. Prepend ``sudo`` for
+global installations.
+
+This software may also be found in the certain Linux distributions, such as
+the latest versions as Debian/Ubuntu, as ``python-pynlpl`` and ``python3-pynlpl``.
+PyNLPL is also included in our `LaMachine <http://proycon.github.io/LaMachine>`_ distribution.
+
+Documentation
+--------------------
 
 API Documentation can be found `here <http://pynlpl.readthedocs.io/en/latest/>`__.
 
diff --git a/pynlpl/__init__.py b/pynlpl/__init__.py
index 0087ef6..ac607b1 100644
--- a/pynlpl/__init__.py
+++ b/pynlpl/__init__.py
@@ -2,4 +2,4 @@
 
 The library is divided into several packages and modules. It is designed for Python 2.6 and upwards. Including Python 3."""
 
-VERSION = "1.1.2"
+VERSION = "1.2.5"
diff --git a/pynlpl/algorithms.py b/pynlpl/algorithms.py
index 5c9fa54..4476875 100644
--- a/pynlpl/algorithms.py
+++ b/pynlpl/algorithms.py
@@ -42,6 +42,17 @@ def consecutivegaps(n, leftmargin = 0, rightmargin = 0):
             length -= 1
         begin += 1
 
+def possiblesplits(n, minsplits=2, maxsplits=0):
+    """Returns lists of (index,length) tuples, representing all possible splits of a sequence of length n."""
+    if not maxsplits: maxsplits = n
+    for nrsplits in range(minsplits,maxsplits + 1):
+        for split in sum_to_n(n,nrsplits):
+            split_with_indices = []
+            begin = 0
+            for length in split:
+                split_with_indices.append( (begin, length) )
+                begin += length
+            yield split_with_indices
 
 def bytesize(n):
     """Return the required size in bytes to encode the specified integer"""
diff --git a/pynlpl/build/pynlpl/algorithms.py b/pynlpl/build/pynlpl/algorithms.py
deleted file mode 100644
index 5c9fa54..0000000
--- a/pynlpl/build/pynlpl/algorithms.py
+++ /dev/null
@@ -1,54 +0,0 @@
-
-###############################################################9
-# PyNLPl - Algorithms
-#   by Maarten van Gompel
-#   Centre for Language Studies
-#   Radboud University Nijmegen
-#   http://www.github.com/proycon/pynlpl
-#   proycon AT anaproy DOT nl
-#
-#       Licensed under GPLv3
-#
-###############################################################
-
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-
-def sum_to_n(n, size, limit=None): #from http://stackoverflow.com/questions/2065553/python-get-all-numbers-that-add-up-to-a-number
-    """Produce all lists of `size` positive integers in decreasing order
-    that add up to `n`."""
-    if size == 1:
-        yield [n]
-        return
-    if limit is None:
-        limit = n
-    start = (n + size - 1) // size
-    stop = min(limit, n - size + 1) + 1
-    for i in range(start, stop):
-        for tail in sum_to_n(n - i, size - 1, i):
-            yield [i] + tail
-
-
-def consecutivegaps(n, leftmargin = 0, rightmargin = 0):
-    """Compute all possible single consecutive gaps in any sequence of the specified length. Returns
-    (beginindex, length) tuples. Runs in  O(n(n+1) / 2) time. Argument is the length of the sequence rather than the sequence itself"""
-    begin = leftmargin
-    while begin < n:
-        length = (n - rightmargin) - begin
-        while length > 0:
-            yield (begin, length)
-            length -= 1
-        begin += 1
-
-
-def bytesize(n):
-    """Return the required size in bytes to encode the specified integer"""
-    for i in range(1, 1000):
-        if n < 2**(8*i):
-            return i
-
-
-
-
diff --git a/pynlpl/docs/conf.py b/pynlpl/docs/conf.py
deleted file mode 100644
index f052138..0000000
--- a/pynlpl/docs/conf.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# PyNLPl documentation build configuration file, created by
-# sphinx-quickstart on Tue Jul  6 22:07:20 2010.
-#
-# This file is execfile()d with the current directory set to its containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-import sys, os
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.append(os.path.abspath('.'))
-
-sys.path.append(os.path.abspath('../../'))
-from pynlpl import VERSION
-
-# -- General configuration -----------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon','sphinx.ext.autosummary']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix of source filenames.
-source_suffix = '.rst'
-
-# The encoding of source files.
-#source_encoding = 'utf-8'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'PyNLPl'
-copyright = u'2016, Maarten van Gompel'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = VERSION
-# The full version, including alpha/beta/rc tags.
-release = VERSION
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#today = ''
-# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
-
-# List of documents that shouldn't be included in the build.
-#unused_docs = []
-
-# List of directories, relative to source directory, that shouldn't be searched
-# for source files.
-exclude_trees = ['_build']
-
-# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
-
-
-# -- Options for HTML output ---------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  Major themes that come with
-# Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = 'default'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
-
-# The name for this set of Sphinx documents.  If None, it defaults to
-# "<project> v<release> documentation".
-#html_title = None
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#html_logo = None
-
-# The name of an image file (within the static path) to use as favicon of the
-# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-# html_static_path = ['_static']
-
-# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
-# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#html_additional_pages = {}
-
-# If false, no module index is generated.
-#html_use_modindex = True
-
-# If false, no index is generated.
-#html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
-
-# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
-
-# Output file base name for HTML help builder.
-# htmlhelp_basename = 'pynlpl'
-
-
-# -- Options for LaTeX output --------------------------------------------------
-
-# The paper size ('letter' or 'a4').
-latex_paper_size = 'a4'
-
-# The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
-latex_documents = [
-  ('index', 'pynlpl.tex', u'PyNLPl Documentation',
-   u'Maarten van Gompel', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#latex_use_parts = False
-
-# Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
-
-# Documents to append as an appendix to all manuals.
-#latex_appendices = []
-
-# If false, no module index is generated.
-#latex_use_modindex = True
-
-autosummary_generate = True
diff --git a/pynlpl/evaluation.py b/pynlpl/evaluation.py
index 537f38c..e19470a 100644
--- a/pynlpl/evaluation.py
+++ b/pynlpl/evaluation.py
@@ -38,11 +38,11 @@ import subprocess
 import itertools
 import time
 import random
+import math
 import copy
 import datetime
 import os.path
 
-
 def auc(x, y, reorder=False): #from sklearn, http://scikit-learn.org, licensed under BSD License
     """Compute Area Under the Curve (AUC) using the trapezoidal rule
 
@@ -107,6 +107,19 @@ def auc(x, y, reorder=False): #from sklearn, http://scikit-learn.org, licensed u
     return area
 
 
+def mae(absolute_error_values):
+    if np is None:
+        return sum(absolute_error_values) / len(absolute_error_values)
+    else:
+        return np.mean(absolute_error_values)
+
+def rmse(squared_error_values):
+    if np is None:
+        return math.sqrt(sum(squared_error_values)/len(squared_error_values))
+    else:
+        return math.sqrt(np.mean(squared_error_values))
+
+
 class ProcessFailed(Exception):
     pass
 
@@ -381,6 +394,34 @@ class ClassEvaluation(object):
         return str(self)
 
 
+class OrdinalEvaluation(ClassEvaluation):
+
+    def __init__(self,  goals = [], observations = [], missing = {}, encoding ='utf-8'):
+        ClassEvaluation.__init__(self,goals,observations,missing,encoding='utf-8')
+
+    def compute(self):
+        assert not False in [type(cls) == int for cls in self.classes]
+        ClassEvaluation.compute(self)
+        self.error = defaultdict(list)
+        self.squared_error = defaultdict(list)
+        for goal, observation in self:
+            self.error[observation].append(abs(goal-observation))
+            self.squared_error[observation].append(abs(goal-observation)**2)
+
+    def mae(self, cls=None):
+        if not self.computed: self.compute()
+        if cls:
+            return mae(self.error[cls])
+        else:
+            return mae(sum([self.error[x] for x in set(self.goals)], []))
+             
+    def rmse(self, cls=None):
+        if not self.computed: self.compute()
+        if cls:
+            return rmse(self.squared_error[cls])
+        else:
+            return rmse(sum([self.squared_error[x] for x in set(self.goals)], []))
+
 class AbstractExperiment(object):
 
     def __init__(self, inputdata = None, **parameters):
diff --git a/pynlpl/formats/folia.py b/pynlpl/formats/folia.py
index 5dcba30..ea5dc7a 100644
--- a/pynlpl/formats/folia.py
+++ b/pynlpl/formats/folia.py
@@ -26,6 +26,7 @@ import sys
 
 from copy import copy, deepcopy
 from datetime import datetime
+from collections import OrderedDict
 import inspect
 import itertools
 import glob
@@ -70,9 +71,9 @@ LXE=True #use lxml instead of built-in ElementTree (default)
 
 #foliaspec:version:FOLIAVERSION
 #The FoLiA version
-FOLIAVERSION = "1.4.0"
+FOLIAVERSION = "1.5.1"
 
-LIBVERSION = FOLIAVERSION + '.84' #== FoLiA version + library revision
+LIBVERSION = FOLIAVERSION + '.88' #== FoLiA version + library revision
 
 #0.9.1.31 is the first version with Python 3 support
 
@@ -98,6 +99,7 @@ DOCSTRING_GENERIC_ATTRIBS = """    id (str): An ID for the element. IDs must be
     speaker (str): Speech annotation attribute: a name or ID of the speaker. This is a generic FoLiA attribute.
     begintime (str): Speech annotation attribute: the time (in ``hh:mm:ss.mmm`` format, relative to the media file in ``src``) when the audio that this element describes starts. This is a generic FoLiA attribute.
     endtime (str): Speech annotation attribute: the time (in ``hh:mm:ss.mmm`` format, relative to the media file in ``src``) when the audio that this element describes starts. This is a generic FoLiA attribute.
+    textclass (str): Refers to the textclass from which this annotation is derived (defaults to "current")>. This is a generic FoLiA attribute.
     contents (list): Alternative for ``*args``, exists for purely syntactic reasons.
 """
 
@@ -119,7 +121,7 @@ class AnnotatorType:
 #foliaspec:attributes
 #Defines all common FoLiA attributes (as part of the Attrib enumeration)
 class Attrib:
-    ID, CLASS, ANNOTATOR, CONFIDENCE, N, DATETIME, BEGINTIME, ENDTIME, SRC, SPEAKER = range(10)
+    ID, CLASS, ANNOTATOR, CONFIDENCE, N, DATETIME, BEGINTIME, ENDTIME, SRC, SPEAKER, TEXTCLASS, METADATA = range(12)
 
 #foliaspec:annotationtype
 #Defines all annotation types (as part of the AnnotationType enumeration)
@@ -149,6 +151,10 @@ class NoSuchPhon(Exception):
     """Exception raised when the requested type of phonetic content does not exist for the selected element"""
     pass
 
+class InconsistentText(Exception):
+    """Exception raised when the the text of a structural element is inconsistent with text on deeper levels"""
+    pass
+
 class DuplicateAnnotationError(Exception):
     pass
 
@@ -166,6 +172,12 @@ class UnresolvableTextContent(Exception):
 class MalformedXMLError(Exception):
     pass
 
+class ParseError(Exception):
+    def __init__(self, msg, cause=None):
+        self.cause = cause
+        Exception.__init__(self, msg)
+
+
 class ModeError(Exception):
     pass
 
@@ -182,10 +194,10 @@ class CorrectionHandling:
     EITHER,CURRENT, ORIGINAL = range(3)
 
 
-def checkversion(version):
+def checkversion(version, REFVERSION=FOLIAVERSION):
     """Checks FoLiA version, returns 1 if the document is newer than the library, -1 if it is older, 0 if it is equal"""
     try:
-        for refversion, docversion in zip([int(x) for x in FOLIAVERSION.split('.')], [int(x) for x in version.split('.')]):
+        for refversion, docversion in zip([int(x) for x in REFVERSION.split('.')], [int(x) for x in version.split('.')]):
             if docversion > refversion:
                 return 1 #doc is newer than library
             elif docversion < refversion:
@@ -261,7 +273,9 @@ def parsecommonarguments(object, doc, annotationtype, required, allowed, **kwarg
 
         if object.set:
             if doc and (not (annotationtype in doc.annotationdefaults) or not (object.set in doc.annotationdefaults[annotationtype])):
-                if doc.autodeclare:
+                if object.set in doc.alias_set:
+                    object.set = doc.alias_set[object.set]
+                elif doc.autodeclare:
                     doc.annotations.append( (annotationtype, object.set ) )
                     doc.annotationdefaults[annotationtype] = {object.set: {} }
                 else:
@@ -421,6 +435,26 @@ def parsecommonarguments(object, doc, annotationtype, required, allowed, **kwarg
             object.setphon(kwargs['phon'])
         del kwargs['phon']
 
+    if 'textclass' in kwargs:
+        if not Attrib.TEXTCLASS in supported:
+            raise ValueError("Textclass is not supported for " + object.__class__.__name__)
+        object.textclass = kwargs['textclass']
+        del kwargs['textclass']
+    else:
+        if Attrib.TEXTCLASS in supported:
+            object.textclass = "current"
+
+    if 'metadata' in kwargs:
+        if not Attrib.METADATA in supported:
+            raise ValueError("Metadata is not supported for " + object.__class__.__name__)
+        object.metadata = kwargs['metadata']
+        if doc:
+            try:
+                doc.submetadata[kwargs['metadata']]
+            except KeyError:
+                raise KeyError("No such metadata defined: " + kwargs['metadata'])
+        del kwargs['metadata']
+
     if object.XLINK:
         if 'href' in kwargs:
             object.href =kwargs['href']
@@ -472,7 +506,9 @@ def parsecommonarguments(object, doc, annotationtype, required, allowed, **kwarg
 
     return kwargs
 
-
+def norm_spaces(s):
+    """Normalize spaces, splits on whitespace (\n\r\t\s) and rejoins (faster than a s/\s+// regexp)"""
+    return ' '.join(s.split())
 
 def parse_datetime(s): #source: http://stackoverflow.com/questions/2211362/how-to-parse-xsddatetime-format
     """Returns (datetime, tz offset in minutes) or (None, None)."""
@@ -666,7 +702,7 @@ class AbstractElement(object):
     def __getattr__(self, attr):
         """Internal method"""
         #overriding getattr so we can get defaults here rather than needing a copy on each element, saves memory
-        if attr in ('set','cls','confidence','annotator','annotatortype','datetime','n','href','src','speaker','begintime','endtime','xlinktype','xlinktitle','xlinklabel','xlinkrole','xlinkshow','label'):
+        if attr in ('set','cls','confidence','annotator','annotatortype','datetime','n','href','src','speaker','begintime','endtime','xlinktype','xlinktitle','xlinklabel','xlinkrole','xlinkshow','label', 'textclass', 'metadata'):
             return None
         else:
             return super(AbstractElement, self).__getattribute__(attr)
@@ -734,11 +770,107 @@ class AbstractElement(object):
         """Alias for :meth:`text` with ``strict=True``"""
         return self.text(cls,strict=True)
 
+    def findcorrectionhandling(self, cls):
+        """Find the proper correctionhandling given a textclass by looking in the underlying corrections where it is reused"""
+        if cls == "current":
+            return CorrectionHandling.CURRENT
+        elif cls == "original":
+            return CorrectionHandling.ORIGINAL #backward compatibility
+        else:
+            correctionhandling = None
+            #but any other class may be anything
+            #Do we have corrections at all? otherwise no need to bother
+            for correction in self.select(Correction):
+                #yes, in which branch is the text class found?
+                found = False
+                hastext = False
+                if correction.hasnew():
+                    found = True
+                    doublecorrection = correction.new().count(Correction) > 0
+                    if doublecorrection: return None #skipping text validation, correction is too complex (nested) to handle for now
+                    for t in  correction.new().select(TextContent):
+                        hastext = True
+                        if t.cls == cls:
+                            if correctionhandling is not None and correctionhandling != CorrectionHandling.CURRENT:
+                                return None #inconsistent
+                            else:
+                                correctionhandling = CorrectionHandling.CURRENT
+                            break
+                elif correction.hascurrent():
+                    found = True
+                    doublecorrection = correction.current().count(Correction) > 0
+                    if doublecorrection: return None #skipping text validation, correction is too complex (nested) to handle for now
+                    for t in  correction.current().select(TextContent):
+                        hastext = True
+                        if t.cls == cls:
+                            if correctionhandling is not None and correctionhandling != CorrectionHandling.CURRENT:
+                                return None #inconsistent
+                            else:
+                                correctionhandling = CorrectionHandling.CURRENT
+                            break
+                if correction.hasoriginal():
+                    found = True
+                    doublecorrection = correction.original().count(Correction) > 0
+                    if doublecorrection: return None #skipping text validation, correction is too complex (nested) to handle for now
+                    for t in  correction.original().select(TextContent):
+                        hastext = True
+                        if t.cls == cls:
+                            if correctionhandling is not None and correctionhandling != CorrectionHandling.ORIGINAL:
+                                return None #inconsistent
+                            else:
+                                correctionhandling = CorrectionHandling.ORIGINAL
+                            break
+            if correctionhandling is None:
+                #well, we couldn't find our textclass in any correction, just fall back to current and let text validation fail if needed
+                return CorrectionHandling.CURRENT
+
+
+    def textvalidation(self, warnonly=None):
+        """Run text validation on this element. Checks whether any text redundancy is consistent and whether offsets are valid.
+
+        Parameters:
+            warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5)
+
+        Returns:
+            bool
+        """
+
+        if warnonly is None and self.doc and self.doc.version:
+            warnonly = (checkversion(self.doc.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5
+        valid = True
+        for cls in self.doc.textclasses:
+            if self.hastext(cls, strict=True) and not isinstance(self, (Linebreak, Whitespace)):
+                if self.doc and self.doc.debug: print("[PyNLPl FoLiA DEBUG] Text validation on " + repr(self),file=stderr)
+                correctionhandling = self.findcorrectionhandling(cls)
+                if correctionhandling is None:
+                    #skipping text validation, correction is too complex (nested) to handle for now; just assume valid (benefit of the doubt)
+                    if self.doc and self.doc.debug: print("[PyNLPl FoLiA DEBUG] SKIPPING Text validation on " + repr(self) + ", too complex to handle (nested corrections or inconsistent use)",file=stderr)
+                    return True #just assume it's valid then
+
+                strictnormtext = self.text(cls,retaintokenisation=False,strict=True, normalize_spaces=True)
+                deepnormtext = self.text(cls,retaintokenisation=False,strict=False, normalize_spaces=True)
+                if strictnormtext != deepnormtext:
+                    valid = False
+                    msg = "Text for " + self.__class__.__name__ + ", ID " + str(self.id) + ", class " + cls  + ", is inconsistent: expected (after normalization): '" + deepnormtext + "', got (after normalization): '" + strictnormtext + "'"
+                    if warnonly:
+                        print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr)
+                    else:
+                        raise InconsistentText(msg)
+
+                #validate offsets
+                tc = self.textcontent(cls)
+                if tc.offset is not None:
+                    #we can't validate the reference of this element yet since it may point to higher level elements still being created!! we store it in a buffer that will
+                    #be processed by pendingvalidation() after parsing and prior to serialisation
+                    if self.doc and self.doc.debug: print("[PyNLPl FoLiA DEBUG] Queing element for later offset validation: " + repr(self),file=stderr)
+                    self.doc.offsetvalidationbuffer.append( (self, cls) )
+        return valid
+
     def toktext(self,cls='current'):
         """Alias for :meth:`text` with ``retaintokenisation=True``"""
         return self.text(cls,retaintokenisation=True)
 
-    def text(self, cls='current', retaintokenisation=False, previousdelimiter="",strict=False, correctionhandling=CorrectionHandling.CURRENT):
+    def text(self, cls='current', retaintokenisation=False, previousdelimiter="",strict=False, correctionhandling=CorrectionHandling.CURRENT, normalize_spaces=False):
         """Get the text associated with this element (of the specified class)
 
         The text will be constructed from child-elements whereever possible, as they are more specific.
@@ -751,6 +883,7 @@ class AbstractElement(object):
             previousdelimiter (str): Can be set to a delimiter that was last outputed, useful when chaining calls to :meth:`text`. Defaults to an empty string.
             strict (bool):  Set this iif you are strictly interested in the text explicitly associated with the element, without recursing into children. Defaults to ``False``.
             correctionhandling: Specifies what text to retrieve when corrections are encountered. The default is ``CorrectionHandling.CURRENT``, which will retrieve the corrected/current text. You can set this to ``CorrectionHandling.ORIGINAL`` if you want the text prior to correction, and ``CorrectionHandling.EITHER`` if you don't care.
+            normalize_spaces (bool): Return the text with multiple spaces, linebreaks, tabs normalized to single spaces
 
         Example::
 
@@ -764,17 +897,20 @@ class AbstractElement(object):
         """
 
         if strict:
-            return self.textcontent(cls, correctionhandling).text()
+            return self.textcontent(cls, correctionhandling).text(normalize_spaces=normalize_spaces)
 
         if self.TEXTCONTAINER:
             s = ""
             for e in self:
                 if isstring(e):
                     s += e
-                else:
+                elif e.PRINTABLE:
                     if s: s += e.TEXTDELIMITER #for AbstractMarkup, will usually be ""
                     s += e.text()
-            return s
+            if normalize_spaces:
+                return norm_spaces(s)
+            else:
+                return s
         elif not self.PRINTABLE: #only printable elements can hold text
             raise NoSuchText
         else:
@@ -782,7 +918,8 @@ class AbstractElement(object):
             delimiter = ""
             s = ""
             for e in self:
-                if e.PRINTABLE and not isinstance(e, TextContent) and not isinstance(e, String):
+                #was: e.PRINTABLE and not isinstance(e, TextContent) and not isinstance(e, String):
+                if isinstance(e, (AbstractStructureElement, Correction, AbstractSpanAnnotation)):   #AbstractSpanAnnotation is needed when requesting text() on nested span annotations
                     try:
                         s += e.text(cls,retaintokenisation, delimiter,False,correctionhandling)
 
@@ -796,9 +933,12 @@ class AbstractElement(object):
                 s = self.textcontent(cls, correctionhandling).text()
 
             if s and previousdelimiter:
-                return previousdelimiter + s
-            elif s:
-                return s
+                s = previousdelimiter + s
+            if s:
+                if normalize_spaces:
+                    return norm_spaces(s)
+                else:
+                    return s
             else:
                 #No text found at all :`(
                 raise NoSuchText
@@ -1239,6 +1379,24 @@ class AbstractElement(object):
             if isinstance(e,AbstractElement): e.setdocument(doc)
 
     @classmethod
+    def accepts(Parentclass, Class, raiseexceptions=True, parentinstance=None):
+        if Class in Parentclass.ACCEPTED_DATA:
+            return True
+        else:
+            #Class is not in accepted data, but perhaps any of its ancestors is?
+            for c in Class.__mro__: #iterate over all base/super methods (automatically recurses)
+                if c is not Class and c in Parentclass.ACCEPTED_DATA:
+                    return True
+            if raiseexceptions:
+                extra = ""
+                if parentinstance and parentinstance.id:
+                    extra = ' (id=' + parentinstance.id + ')'
+                raise ValueError("Unable to add object of type " + Class.__name__ + " to " + Parentclass.__name__ + " " + extra + ". Type not allowed as child.")
+            else:
+                return False
+
+
+    @classmethod
     def addable(Class, parent, set=None, raiseexceptions=True):
         """Tests whether a new element of this class can be added to the parent.
 
@@ -1258,29 +1416,8 @@ class AbstractElement(object):
          """
 
 
-        if not Class in parent.ACCEPTED_DATA:
-            #Class is not in accepted data, but perhaps any of its ancestors is?
-            found = False
-            c = Class
-            try:
-                while c.__base__:
-                    if c.__base__ in parent.ACCEPTED_DATA:
-                        found = True
-                        break
-                    c = c.__base__
-            except Exception:
-                pass
-            if not found:
-                if raiseexceptions:
-                    if parent.id:
-                        extra = ' (id=' + parent.id + ')'
-                    else:
-                        extra = ''
-                    raise ValueError("Unable to add object of type " + Class.__name__ + " to " + parent.__class__.__name__ + " " + extra + ". Type not allowed as child.")
-                else:
-                    return False
-
-
+        if not parent.__class__.accepts(Class, raiseexceptions, parent):
+            return False
 
         if Class.OCCURRENCES > 0:
             #check if the parent doesn't have too many already
@@ -1757,7 +1894,10 @@ class AbstractElement(object):
                     if self.set:
                         if not self.ANNOTATIONTYPE in self.doc.annotationdefaults or len(self.doc.annotationdefaults[self.ANNOTATIONTYPE]) != 1 or list(self.doc.annotationdefaults[self.ANNOTATIONTYPE].keys())[0] != self.set:
                             if self.set != None:
-                                attribs['{' + NSFOLIA + '}set'] = self.set
+                                if self.ANNOTATIONTYPE in self.doc.set_alias and self.set in self.doc.set_alias[self.ANNOTATIONTYPE]:
+                                    attribs['{' + NSFOLIA + '}set'] = self.doc.set_alias[self.ANNOTATIONTYPE][self.set] #use alias instead
+                                else:
+                                    attribs['{' + NSFOLIA + '}set'] = self.set
                 except AttributeError:
                     pass
 
@@ -1815,6 +1955,13 @@ class AbstractElement(object):
             if self.endtime:
                 attribs['{' + NSFOLIA + '}endtime'] = "%02d:%02d:%02d.%03d" % self.endtime
 
+        if '{' + NSFOLIA + '}textclass' not in attribs: #do not override if caller already set it
+            if self.textclass and self.textclass != "current":
+                attribs['{' + NSFOLIA + '}textclass'] = self.textclass
+
+        if '{' + NSFOLIA + '}metadata' not in attribs: #do not override if caller already set it
+            if self.metadata:
+                attribs['{' + NSFOLIA + '}metadata'] = self.metadata
 
         if self.XLINK:
             if self.href:
@@ -2065,6 +2212,23 @@ class AbstractElement(object):
                     l += e.items(l)
         return l
 
+    def getmetadata(self, key=None):
+        """Get the metadata that applies to this element, automatically inherited from parent elements"""
+        if self.metadata:
+            d =  self.doc.submetadata[self.metadata]
+        elif self.parent:
+            d =  self.parent.getmetadata()
+        elif self.doc:
+            d =  self.doc.metadata
+        else:
+            return None
+        if key:
+            return d[key]
+        else:
+            return d
+
+
+
     def getindex(self, child, recursive=True, ignore=True):
         """Get the index at which an element occurs, recursive by default!
 
@@ -2243,28 +2407,28 @@ class AbstractElement(object):
 
         attribs = [ ]
         if cls.REQUIRED_ATTRIBS and Attrib.ID in cls.REQUIRED_ATTRIBS:
-            attribs.append( E.attribute(name='id', ns="http://www.w3.org/XML/1998/namespace") )
+            attribs.append( E.attribute(E.data(type='ID',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='id', ns="http://www.w3.org/XML/1998/namespace") )
         elif Attrib.ID in cls.OPTIONAL_ATTRIBS:
-            attribs.append( E.optional( E.attribute(name='id', ns="http://www.w3.org/XML/1998/namespace") ) )
+            attribs.append( E.optional( E.attribute(E.data(type='ID',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='id', ns="http://www.w3.org/XML/1998/namespace") ) )
         if Attrib.CLASS in cls.REQUIRED_ATTRIBS:
             #Set is a tough one, we can't require it as it may be defined in the declaration: we make it optional and need schematron to resolve this later
-            attribs.append( E.attribute(name='class') )
-            attribs.append( E.optional( E.attribute( name='set' ) ) )
+            attribs.append( E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='class') )
+            attribs.append( E.optional( E.attribute( E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='set' ) ) )
         elif Attrib.CLASS in cls.OPTIONAL_ATTRIBS:
-            attribs.append( E.optional( E.attribute(name='class') ) )
-            attribs.append( E.optional( E.attribute( name='set' ) ) )
+            attribs.append( E.optional( E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='class') ) )
+            attribs.append( E.optional( E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='set' ) ) )
         if Attrib.ANNOTATOR in cls.REQUIRED_ATTRIBS or Attrib.ANNOTATOR in cls.OPTIONAL_ATTRIBS:
             #Similarly tough
-            attribs.append( E.optional( E.attribute(name='annotator') ) )
+            attribs.append( E.optional( E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='annotator') ) )
             attribs.append( E.optional( E.attribute(name='annotatortype') ) )
         if Attrib.CONFIDENCE in cls.REQUIRED_ATTRIBS:
             attribs.append(  E.attribute(E.data(type='double',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='confidence') )
         elif Attrib.CONFIDENCE in cls.OPTIONAL_ATTRIBS:
             attribs.append(  E.optional( E.attribute(E.data(type='double',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='confidence') ) )
         if Attrib.N in cls.REQUIRED_ATTRIBS:
-            attribs.append( E.attribute( name='n') )
+            attribs.append( E.attribute( E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='n') )
         elif Attrib.N in cls.OPTIONAL_ATTRIBS:
-            attribs.append( E.optional( E.attribute( name='n') ) )
+            attribs.append( E.optional( E.attribute( E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='n') ) )
         if Attrib.DATETIME in cls.REQUIRED_ATTRIBS:
             attribs.append( E.attribute(E.data(type='dateTime',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='datetime') )
         elif Attrib.DATETIME in cls.OPTIONAL_ATTRIBS:
@@ -2278,13 +2442,21 @@ class AbstractElement(object):
         elif Attrib.ENDTIME in cls.OPTIONAL_ATTRIBS:
             attribs.append( E.optional( E.attribute(name='endtime') ) )
         if Attrib.SRC in cls.REQUIRED_ATTRIBS:
-            attribs.append(E.attribute(name='src') )
+            attribs.append(E.attribute(E.data(type='anyURI',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='src') )
         elif Attrib.SRC in cls.OPTIONAL_ATTRIBS:
-            attribs.append( E.optional( E.attribute(name='src') ) )
+            attribs.append( E.optional( E.attribute(E.data(type='anyURI',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='src') ) )
         if Attrib.SPEAKER in cls.REQUIRED_ATTRIBS:
-            attribs.append(E.attribute(name='speaker') )
+            attribs.append(E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'), name='speaker') )
         elif Attrib.SPEAKER in cls.OPTIONAL_ATTRIBS:
-            attribs.append( E.optional( E.attribute(name='speaker') ) )
+            attribs.append( E.optional( E.attribute(E.data(type='string',datatypeLibrary='http://www.w3.org/2001/XMLSchema-datatypes'),name='speaker') ) )
+        if Attrib.TEXTCLASS in cls.REQUIRED_ATTRIBS:
+            attribs.append(E.attribute(name='textclass') )
+        elif Attrib.TEXTCLASS in cls.OPTIONAL_ATTRIBS:
+            attribs.append( E.optional( E.attribute(name='textclass') ) )
+        if Attrib.METADATA in cls.REQUIRED_ATTRIBS:
+            attribs.append(E.attribute(name='metadata') )
+        elif Attrib.METADATA in cls.OPTIONAL_ATTRIBS:
+            attribs.append( E.optional( E.attribute(name='metadata') ) )
         if cls.XLINK:
             attribs += [ #loose interpretation of specs, not checking whether xlink combinations are valid
                     E.optional(E.attribute(name='href',ns="http://www.w3.org/1999/xlink"),E.attribute(name='type',ns="http://www.w3.org/1999/xlink") ),
@@ -2404,10 +2576,19 @@ class AbstractElement(object):
 
         for subnode in node: #pylint: disable=too-many-nested-blocks
             #don't trip over comments
-            if not isinstance(subnode, ElementTree._Comment): #pylint: disable=protected-access
+            if isinstance(subnode, ElementTree._Comment): #pylint: disable=protected-access
+                if (Class.TEXTCONTAINER or Class.PHONCONTAINER) and subnode.tail:
+                    args.append(subnode.tail)
+            else:
                 if subnode.tag.startswith('{' + NSFOLIA + '}'):
                     if doc.debug >= 1: print("[PyNLPl FoLiA DEBUG] Processing subnode " + subnode.tag[nslen:],file=stderr)
-                    e = doc.parsexml(subnode, Class)
+                    try:
+                        e = doc.parsexml(subnode, Class)
+                    except ParseError as e:
+                        raise #just re-raise deepest parseError
+                    except Exception as e:
+                        #Python 3 will preserve full original traceback, Python 2 does not, original cause is explicitly passed to ParseError anyway:
+                        raise ParseError("FoLiA exception in handling of <" + subnode.tag[len(NSFOLIA)+2:] + "> @ line " + str(subnode.sourceline) + ": [" + e.__class__.__name__ + "] " + str(e), cause=e)
                     if e is not None:
                         args.append(e)
                     if (Class.TEXTCONTAINER or Class.PHONCONTAINER) and subnode.tail:
@@ -2990,6 +3171,11 @@ class AbstractStructureElement(AbstractElement, AllowTokenAnnotation, AllowGener
         return e
 
 
+    def postappend(self):
+        super(AbstractStructureElement,self).postappend()
+        if self.doc and self.doc.textvalidation:
+            self.doc.textvalidationerrors += int(not self.textvalidation())
+
     def words(self, index = None):
         """Returns a generator of Word elements found (recursively) under this element.
 
@@ -3238,22 +3424,23 @@ class TextContent(AbstractElement):
         else:
             self.offset = None
 
+
+        #If no class is specified, it defaults to 'current'. (FoLiA uncharacteristically predefines two classes for t: current and original)
+        if 'cls' not in kwargs and 'class' not in kwargs:
+            kwargs['cls'] = 'current'
+
         if 'ref' in kwargs: #reference to offset
             if isinstance(kwargs['ref'], AbstractElement):
-                self.ref = kwargs['ref']
+                if kwargs['ref'].id is None:
+                    raise ValueError("Reference for text content must have an ID or can't act as reference!")
+                self.ref = kwargs['ref'].id
             else:
-                try:
-                    self.ref = doc.index[kwargs['ref']]
-                except:
-                    raise UnresolvableTextContent("Unable to resolve textcontent reference: " + kwargs['ref'] + " (class=" + self.cls+")")
+                #a string (ID) is passed, we can't resolve it yet cause it may not exist at construction time, use getreference() to resolve when needed
+                self.ref = kwargs['ref']
             del kwargs['ref']
         else:
-            self.ref = None #will be set upon parent.append()
-
+            self.ref = None #no explicit reference; if the reference is implicit, getreference() will still work
 
-        #If no class is specified, it defaults to 'current'. (FoLiA uncharacteristically predefines two classes for t: current and original)
-        if 'cls' not in kwargs and 'class' not in kwargs:
-            kwargs['cls'] = 'current'
 
         super(TextContent,self).__init__(doc, *args, **kwargs)
 
@@ -3265,9 +3452,9 @@ class TextContent(AbstractElement):
         #    raise ValueError("There are illegal unicode control characters present in TextContent: " + repr(self.data[0]))
 
 
-    def text(self):
+    def text(self, normalize_spaces=False):
         """Obtain the text (unicode instance)"""
-        return super(TextContent,self).text() #AbstractElement will handle it now, merely overridden to get rid of parameters that dont make sense in this context
+        return super(TextContent,self).text(normalize_spaces=normalize_spaces) #AbstractElement will handle it now, merely overridden to get rid of parameters that dont make sense in this context
 
     def settext(self, text):
         self.data = [text]
@@ -3277,24 +3464,25 @@ class TextContent(AbstractElement):
         #    raise ValueError("There are illegal unicode control characters present in TextContent: " + repr(self.data[0]))
 
 
-    def validateref(self):
-        """Validates the Text Content's references. Raises UnresolvableTextContent when invalid"""
+    def getreference(self, validate=True):
+        """Returns and validates the Text Content's reference. Raises UnresolvableTextContent when invalid"""
 
-        if self.offset is None: return True #nothing to test
+        if self.offset is None: return None #nothing to test
         if self.ref:
-            ref = self.ref
+            ref = self.doc[self.ref]
         else:
             ref = self.finddefaultreference()
 
         if not ref:
             raise UnresolvableTextContent("Default reference for textcontent not found!")
-        elif ref.hastext(self.cls):
-            raise UnresolvableTextContent("Reference has no such text (class=" + self.cls+")")
-        elif self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]:
-            raise UnresolvableTextContent("Referenced found but does not match!")
+        elif not ref.hastext(self.cls):
+            raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ") has no such text (class=" + self.cls+")")
+        elif validate and self.text() != ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])]:
+            raise UnresolvableTextContent("Reference (ID " + str(ref.id) + ", class=" + self.cls+") found but no text match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
         else:
             #finally, we made it!
-            return True
+            return ref
+
 
     def deepvalidation(self):
         return True
@@ -3332,7 +3520,7 @@ class TextContent(AbstractElement):
                 #no parent, breaking
                 return False
 
-            if isinstance(e,AbstractStructureElement) or isinstance(e,AbstractSubtokenAnnotation):
+            if isinstance(e, (AbstractStructureElement, AbstractSubtokenAnnotation, String)):
                 depth += 1
                 if depth == 2:
                     return e
@@ -3376,7 +3564,7 @@ class TextContent(AbstractElement):
         if not self.offset is None:
             attribs['{' + NSFOLIA + '}offset'] = str(self.offset)
         if self.parent and self.ref:
-            attribs['{' + NSFOLIA + '}ref'] = self.ref.id
+            attribs['{' + NSFOLIA + '}ref'] = self.ref
 
         #if self.cls != 'current' and not (self.cls == 'original' and any( isinstance(x, Original) for x in self.ancestors() )  ):
         #    attribs['{' + NSFOLIA + '}class'] = self.cls
@@ -3398,7 +3586,7 @@ class TextContent(AbstractElement):
         if not self.offset is None:
             attribs['offset'] = self.offset
         if self.parent and self.ref:
-            attribs['ref'] = self.ref.id
+            attribs['ref'] = self.ref
         return super(TextContent,self).json(attribs, recurse,ignorelist)
 
 
@@ -3449,22 +3637,23 @@ class PhonContent(AbstractElement):
         else:
             self.offset = None
 
+
+
+        #If no class is specified, it defaults to 'current'. (FoLiA uncharacteristically predefines two classes for phon: current and original)
+        if 'cls' not in kwargs and 'class' not in kwargs:
+            kwargs['cls'] = 'current'
+
         if 'ref' in kwargs: #reference to offset
             if isinstance(kwargs['ref'], AbstractElement):
-                self.ref = kwargs['ref']
+                if kwargs['ref'].id is None:
+                    raise ValueError("Reference for phonetic content must have an ID or can't act as reference!")
+                self.ref = kwargs['ref'].id
             else:
-                try:
-                    self.ref = doc.index[kwargs['ref']]
-                except:
-                    raise UnresolvableTextContent("Unable to resolve phonetic content reference: " + kwargs['ref'] + " (class=" + self.cls+")")
+                #a string (ID) is passed, we can't resolve it yet cause it may not exist at construction time, use getreference() to resolve when needed
+                self.ref = kwargs['ref']
             del kwargs['ref']
         else:
-            self.ref = None #will be set upon parent.append()
-
-
-        #If no class is specified, it defaults to 'current'. (FoLiA uncharacteristically predefines two classes for t: current and original)
-        if 'cls' not in kwargs and 'class' not in kwargs:
-            kwargs['cls'] = 'current'
+            self.ref = None #no explicit reference; if the reference is implicit, getreference() will still work
 
         super(PhonContent,self).__init__(doc, *args, **kwargs)
 
@@ -3488,24 +3677,24 @@ class PhonContent(AbstractElement):
         #    raise ValueError("There are illegal unicode control characters present in TextContent: " + repr(self.data[0]))
 
 
-    def validateref(self):
-        """Validates the Phonetic Content's references. Raises UnresolvableTextContent when invalid"""
+    def getreference(self, validate=True):
+        """Return and validate the Phonetic Content's reference. Raises UnresolvableTextContent when invalid"""
 
-        if self.offset is None: return True #nothing to test
+        if self.offset is None: return None #nothing to test
         if self.ref:
-            ref = self.ref
+            ref = self.doc[self.ref]
         else:
             ref = self.finddefaultreference()
 
         if not ref:
             raise UnresolvableTextContent("Default reference for phonetic content not found!")
-        elif ref.hasphon(self.cls):
+        elif not ref.hasphon(self.cls):
             raise UnresolvableTextContent("Reference has no such phonetic content (class=" + self.cls+")")
-        elif self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]:
-            raise UnresolvableTextContent("Referenced found but does not match!")
+        elif validate and self.phon() != ref.textcontent(self.cls).phon()[self.offset:self.offset+len(self.data[0])]:
+            raise UnresolvableTextContent("Reference (class=" + self.cls+") found but no phonetic match at specified offset ("+str(self.offset)+")! Expected '" + self.text() + "', got '" + ref.textcontent(self.cls).text()[self.offset:self.offset+len(self.data[0])] +"'")
         else:
             #finally, we made it!
-            return True
+            return ref
 
     def deepvalidation(self):
         return True
@@ -3528,11 +3717,14 @@ class PhonContent(AbstractElement):
     #append is implemented, the default suffices
 
     def postappend(self):
-        """(Method for internal usage, see ``AbstractElement.postappend()``)"""
-        if isinstance(self.parent, Original):
-            if self.cls == 'current': self.cls = 'original' #pylint: disable=attribute-defined-outside-init
-
-        super(PhonContent, self).postappend()
+        super(PhonContent,self).postappend()
+        found = set()
+        for c in self.parent:
+            if isinstance(c,PhonContent):
+                if c.cls in found:
+                    raise DuplicateAnnotationError("Can not add multiple text content elements with the same class (" + c.cls + ") to the same structural element!")
+                else:
+                    found.add(c.cls)
 
 
     def finddefaultreference(self):
@@ -3594,7 +3786,7 @@ class PhonContent(AbstractElement):
         if not self.offset is None:
             attribs['{' + NSFOLIA + '}offset'] = str(self.offset)
         if self.parent and self.ref:
-            attribs['{' + NSFOLIA + '}ref'] = self.ref.id
+            attribs['{' + NSFOLIA + '}ref'] = self.ref
 
         #if self.cls != 'current' and not (self.cls == 'original' and any( isinstance(x, Original) for x in self.ancestors() )  ):
         #    attribs['{' + NSFOLIA + '}class'] = self.cls
@@ -3615,7 +3807,7 @@ class PhonContent(AbstractElement):
         if not self.offset is None:
             attribs['offset'] = self.offset
         if self.parent and self.ref:
-            attribs['ref'] = self.ref.id
+            attribs['ref'] = self.ref
         return super(PhonContent,self).json(attribs, recurse, ignorelist)
 
 
@@ -3735,8 +3927,11 @@ class Linebreak(AbstractStructureElement, AbstractTextMarkup): #this element has
         super(Linebreak, self).__init__(doc, *args, **kwargs)
 
 
-    def text(self, cls='current', retaintokenisation=False, previousdelimiter="", strict=False, correctionhandling=None):
-        return previousdelimiter.strip(' ') + "\n"
+    def text(self, cls='current', retaintokenisation=False, previousdelimiter="", strict=False, correctionhandling=None, normalize_spaces=False):
+        if normalize_spaces:
+            return " "
+        else:
+            return previousdelimiter.strip(' ') + "\n"
 
     @classmethod
     def parsexml(Class, node, doc):#pylint: disable=bad-classmethod-argument
@@ -3747,7 +3942,12 @@ class Linebreak(AbstractStructureElement, AbstractTextMarkup): #this element has
             kwargs['pagenr'] = node.attrib['pagenr']
         if 'newpage' in node.attrib and node.attrib['newpage'] == 'yes':
             kwargs['newpage'] = True
-        return Linebreak(doc, **kwargs)
+        br = Linebreak(doc, **kwargs)
+        if '{http://www.w3.org/1999/xlink}href' in node.attrib:
+            br.href = node.attrib['{http://www.w3.org/1999/xlink}href']
+        if '{http://www.w3.org/1999/xlink}type' in node.attrib:
+            br.xlinktype = node.attrib['{http://www.w3.org/1999/xlink}type']
+        return br
 
     def xml(self, attribs = None,elements = None, skipchildren = False):
         if attribs is None: attribs = {}
@@ -3771,8 +3971,11 @@ class Linebreak(AbstractStructureElement, AbstractTextMarkup): #this element has
 class Whitespace(AbstractStructureElement):
     """Whitespace element, signals a vertical whitespace"""
 
-    def text(self, cls='current', retaintokenisation=False, previousdelimiter="", strict=False,correctionhandling=None):
-        return previousdelimiter.strip(' ') + "\n\n"
+    def text(self, cls='current', retaintokenisation=False, previousdelimiter="", strict=False,correctionhandling=None, normalize_spaces=False):
+        if normalize_spaces:
+            return " "
+        else:
+            return previousdelimiter.strip(' ') + "\n\n"
 
 
 class Word(AbstractStructureElement, AllowCorrections):
@@ -4088,8 +4291,8 @@ class AbstractSpanAnnotation(AbstractElement, AllowGenerateID, AllowCorrections)
             if isinstance(child, Word) or isinstance(child, Morpheme) or isinstance(child, Phoneme):
                 #Include REFERENCES to word items instead of word items themselves
                 attribs['{' + NSFOLIA + '}id'] = child.id
-                if child.PRINTABLE and child.hastext():
-                    attribs['{' + NSFOLIA + '}t'] = child.text()
+                if child.PRINTABLE and child.hastext(self.textclass):
+                    attribs['{' + NSFOLIA + '}t'] = child.text(self.textclass)
                 e.append( E.wref(**attribs) )
             elif not (isinstance(child, Feature) and child.SUBSET): #Don't add pre-defined features, they are already added as attributes
                 e.append( child.xml() )
@@ -4775,17 +4978,25 @@ class Correction(AbstractElement, AllowGenerateID):
                     return e.hastext(cls,strict, correctionhandling)
         return False
 
-    def text(self, cls = 'current', retaintokenisation=False, previousdelimiter="",strict=False, correctionhandling=CorrectionHandling.CURRENT):
+    def text(self, cls = 'current', retaintokenisation=False, previousdelimiter="",strict=False, correctionhandling=CorrectionHandling.CURRENT, normalize_spaces=False):
         """See :meth:`AbstractElement.text`"""
         if cls == 'original': correctionhandling = CorrectionHandling.ORIGINAL #backward compatibility
         if correctionhandling in (CorrectionHandling.CURRENT, CorrectionHandling.EITHER):
             for e in self:
                 if isinstance(e, New) or isinstance(e, Current):
-                    return previousdelimiter + e.text(cls, retaintokenisation,"", strict, correctionhandling)
+                    s = previousdelimiter + e.text(cls, retaintokenisation,"", strict, correctionhandling)
+                    if normalize_spaces:
+                        return norm_spaces(s)
+                    else:
+                        return s
         if correctionhandling in (CorrectionHandling.ORIGINAL, CorrectionHandling.EITHER):
             for e in self:
                 if isinstance(e, Original):
-                    return previousdelimiter + e.text(cls, retaintokenisation,"", strict, correctionhandling)
+                    s =  previousdelimiter + e.text(cls, retaintokenisation,"", strict, correctionhandling)
+                    if normalize_spaces:
+                        return norm_spaces(s)
+                    else:
+                        return s
         raise NoSuchText
 
     def hasphon(self, cls='current',strict=True, correctionhandling=CorrectionHandling.CURRENT):
@@ -5044,9 +5255,9 @@ class External(AbstractElement):
         #special handling for external
         source = node.attrib['src']
         if 'include' in node.attrib:
-            kwargs['include'] = node.attrib['include']
+            include = node.attrib['include']
         else:
-            kwargs['include'] = False
+            include = False
         if doc.debug >= 1: print("[PyNLPl FoLiA DEBUG] Found external",file=stderr)
         return External(doc, source=source, include=include)
 
@@ -5260,6 +5471,9 @@ class Morpheme(AbstractStructureElement):
                         if self in e2.wrefs():
                             yield e2
 
+    def textvalidation(self, warnonly=None): #warnonly will change at some point in the future to be stricter
+        return True
+
 
 class Phoneme(AbstractStructureElement):
     """Phone element, represents one phone in phonetic analysis, subtoken annotation element to be used in :class:`PhonologyLayer`"""
@@ -5600,8 +5814,14 @@ class Sentence(AbstractStructureElement):
     def gettextdelimiter(self, retaintokenisation=False):
         #no text delimiter of itself, recurse into children to inherit delimiter
         for child in reversed(self):
-            if isinstance(child, Linebreak) or isinstance(child, Whitespace):
+            if isinstance(child, (Linebreak, Whitespace)):
                 return "" #if a sentence ends in a linebreak, we don't want any delimiter
+            elif isinstance(child, Word) and not child.space:
+                return "" #if a sentence ends in a word with space=no, then we don't delimit either
+            elif isinstance(child, AbstractStructureElement):
+                #recurse? if the child is hidden in another element (part for instance?)
+                return child.gettextdelimiter(retaintokenisation) #if a sentence ends in a word with space=no, then we don't delimit either
+            #TODO: what about corrections?
             else:
                 break
         return self.TEXTDELIMITER
@@ -5949,6 +6169,9 @@ this, first a trivial example of searching for one word::
         d = { 'matchannotation':self.matchannotation, 'matchannotationset':self.matchannotationset, 'casesensitive':self.casesensitive }
         yield Pattern(*newsequence, **d )
 
+class ExternalMetaData(object):
+    def __init__(self, url):
+        self.url = url
 
 
 class NativeMetaData(object):
@@ -6027,6 +6250,7 @@ class Document(object):
             setdefinition (dict):  A dictionary of set definitions, the key corresponds to the set name, the value is a SetDefinition instance
             loadsetdefinitions (bool):  download and load set definitions (default: False)
             deepvalidation (bool): Do deep validation of the document (default: False), implies ``loadsetdefinitions``
+            textvalidation (bool): Do validation of text consistency (default: False)``
             preparsexmlcallback (function):  Callback for a function taking one argument (``node``, an lxml node). Will be called whenever an XML element is parsed into FoLiA. The function should return an instance inherited from folia.AbstractElement, or None to abort parsing this element (and all its children)
             parsexmlcallback (function):  Callback for a function taking one argument (``element``, a FoLiA element). Will be called whenever an XML element is parsed into FoLiA. The function should return an instance inherited from folia.AbstractElement, or None to abort adding this element (and all its children)
             debug (bool): Boolean to enable/disable debug
@@ -6051,10 +6275,13 @@ class Document(object):
         self.declareprocessed = False # Will be set to True when declarations have been processed
 
         self.metadata = NativeMetaData() #will point to XML Element holding native metadata
-        self.metadatatype = MetaDataType.NATIVE
-        self.metadatafile = None #reference to external metadata file
+        self.metadatatype = "native"
 
+        self.submetadata = OrderedDict()
+        self.submetadatatype = {}
 
+        self.alias_set = {} #alias to set map (via annotationtype => first)
+        self.set_alias = {} #set to alias map (via annotationtype => first)
 
         self.textclasses = set() #will contain the text classes found
 
@@ -6116,6 +6343,14 @@ class Document(object):
         if self.deepvalidation:
             self.loadsetdefinitions = True
 
+
+        if 'textvalidation' in kwargs:
+            self.textvalidation = bool(kwargs['textvalidation'])
+        else:
+            self.textvalidation = False
+        self.textvalidationerrors = 0 #will count the number of text validation errors
+        self.offsetvalidationbuffer = [] #will hold (AbstractStructureElement, textclass pairs) that need to be validated still (if textvalidation == True), validation will be done when all parsing is complete and/or prior to serialisation
+
         if 'allowadhocsets' in kwargs:
             self.allowadhocsets = bool(kwargs['allowadhocsets'])
         else:
@@ -6215,6 +6450,22 @@ class Document(object):
             yield self.parsexml(result)
 
 
+    def alias(self, annotationtype, set, fallback=False):
+        """Return the alias for a set (if applicable, returns the unaltered set otherwise iff fallback is enabled)"""
+        if inspect.isclass(annotationtype): annotationtype = annotationtype.ANNOTATIONTYPE
+        if annotationtype in self.set_alias and set in self.set_alias[annotationtype]:
+            return self.set_alias[annotationtype][set]
+        elif fallback:
+            return set
+        else:
+            raise KeyError("No alias for set " + set)
+
+
+    def unalias(self, annotationtype, alias):
+        """Return the set for an alias (if applicable, raises an exception otherwise)"""
+        if inspect.isclass(annotationtype): annotationtype = annotationtype.ANNOTATIONTYPE
+        return self.alias_set[annotationtype][alias]
+
     def findwords(self, *args, **kwargs):
         for x in findwords(self,self.words,*args,**kwargs):
             yield x
@@ -6414,20 +6665,25 @@ class Document(object):
         See also:
             :meth:`Document.xmlstring`
         """
+
+        self.pendingvalidation()
+
         E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={'xml' : "http://www.w3.org/XML/1998/namespace", 'xlink':"http://www.w3.org/1999/xlink"})
         attribs = {}
         attribs['{http://www.w3.org/XML/1998/namespace}id'] = self.id
 
-        if self.version:
-            attribs['version'] = self.version
-        else:
-            attribs['version'] = FOLIAVERSION
+        #if self.version:
+        #    attribs['version'] = self.version
+        #else:
+        attribs['version'] = FOLIAVERSION
 
         attribs['generator'] = 'pynlpl.formats.folia-v' + LIBVERSION
 
         metadataattribs = {}
         metadataattribs['{' + NSFOLIA + '}type'] = self.metadatatype
-        if self.metadatafile: metadataattribs['{' + NSFOLIA + '}src'] = self.metadatafile
+
+        if isinstance(self.metadata, ExternalMetaData):
+            metadataattribs['{' + NSFOLIA + '}src'] = self.metadata.url
 
         e = E.FoLiA(
             E.metadata(
@@ -6450,6 +6706,8 @@ class Document(object):
             import json
             jsondoc = json.dumps(doc.json())
         """
+        self.pendingvalidation()
+
         jsondoc = {'id': self.id, 'children': [], 'declarations': self.jsondeclarations() }
         if self.version:
             jsondoc['version'] = self.version
@@ -6462,27 +6720,38 @@ class Document(object):
         return jsondoc
 
     def xmlmetadata(self):
-        """Internal method to serialize XML declarations"""
+        """Internal method to serialize metadata to XML"""
         E = ElementMaker(namespace="http://ilk.uvt.nl/folia",nsmap={None: "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
-        if self.metadatatype == MetaDataType.NATIVE:
-            e = []
-            if not self.metadatafile:
+        elements = []
+        if self.metadatatype == "native":
+            if isinstance(self.metadata, NativeMetaData):
                 for key, value in self.metadata.items():
-                    e.append(E.meta(value,id=key) )
-            return e
+                    elements.append(E.meta(value,id=key) )
         else:
-            if self.metadatafile:
-                return [] #external
-            elif self.metadata is not None:
+            if isinstance(self.metadata, ForeignData):
                 #in-document
-                e = []
                 m = self.metadata
                 while m is not None:
-                    e.append(m.xml())
+                    elements.append(m.xml())
                     m = m.next
-                return e
-            else:
-                return []
+        for metadata_id, submetadata in self.submetadata.items():
+            subelements = []
+            attribs = {
+                "{http://www.w3.org/XML/1998/namespace}id": metadata_id,
+                "type": self.submetadatatype[metadata_id] }
+            if isinstance(submetadata, NativeMetaData):
+                for key, value in submetadata.items():
+                    subelements.append(E.meta(value,id=key) )
+            elif isinstance(submetadata, ExternalMetaData):
+                attribs['src'] = submetadata.url
+            elif isinstance(submetadata, ForeignData):
+                #in-document
+                m = submetadata
+                while m is not None:
+                    subelements.append(m.xml())
+                    m = m.next
+            elements.append( E.submetadata(*subelements, **attribs))
+        return elements
 
 
 
@@ -6639,6 +6908,7 @@ class Document(object):
             annotator (str): Sets a default annotator
             annotatortype: Should be either ``AnnotatorType.MANUAL`` or ``AnnotatorType.AUTO``, indicating whether the annotation was performed manually or by an automated process.
             datetime (datetime.datetime): Sets the default datetime
+            alias (str): Defines alias that may be used in set attribute of elements instead of the full set name
 
         Example::
 
@@ -6649,21 +6919,36 @@ class Document(object):
 
         if inspect.isclass(annotationtype):
             annotationtype = annotationtype.ANNOTATIONTYPE
+        if annotationtype in self.alias_set and set in self.alias_set[annotationtype]:
+            raise ValueError("Set " + set + " conflicts with alias, may not be equal!")
         if not (annotationtype, set) in self.annotations:
             self.annotations.append( (annotationtype,set) )
             if set and self.loadsetdefinitions and not set in self.setdefinitions:
                 if set[:7] == "http://" or set[:8] == "https://" or set[:6] == "ftp://":
-                    self.setdefinitions[set] = loadsetdefinition(set) #will raise exception on error
+                    self.setdefinitions[set] = SetDefinition(set,verbose=self.verbose) #will raise exception on error
         if not annotationtype in self.annotationdefaults:
             self.annotationdefaults[annotationtype] = {}
         self.annotationdefaults[annotationtype][set] = kwargs
+        if 'alias' in kwargs:
+            if annotationtype in self.set_alias and set in self.set_alias[annotationtype] and self.set_alias[annotationtype][set] != kwargs['alias']:
+                raise ValueError("Redeclaring set " + set + " with another alias ('"+kwargs['alias']+"') is not allowed!")
+            if annotationtype in self.alias_set and kwargs['alias'] in self.alias_set[annotationtype] and self.alias_set[annotationtype][kwargs['alias']] != set:
+                raise ValueError("Redeclaring alias " + kwargs['alias'] + " with another set ('"+set+"') is not allowed!")
+            if annotationtype in self.set_alias and kwargs['alias'] in self.set_alias[annotationtype]:
+                raise ValueError("Alias " + kwargs['alias'] + " conflicts with set name, may not be equal!")
+            if annotationtype not in self.alias_set:
+                self.alias_set[annotationtype] = {}
+            if annotationtype not in self.set_alias:
+                self.set_alias[annotationtype] = {}
+            self.alias_set[annotationtype][kwargs['alias']] = set
+            self.set_alias[annotationtype][set] = kwargs['alias']
 
     def declared(self, annotationtype, set):
         """Checks if the annotation type is present (i.e. declared) in the document.
 
         Arguments:
             annotationtype: The type of annotation, this is conveyed by passing the corresponding annototion class (such as :class:`PosAnnotation` for example), or a member of :class:`AnnotationType`, such as ``AnnotationType.POS``.
-            set (str): the set, should formally be a URL pointing to the set definition
+            set (str): the set, should formally be a URL pointing to the set definition (aliases are also supported)
 
         Example::
 
@@ -6674,7 +6959,7 @@ class Document(object):
             bool
         """
         if inspect.isclass(annotationtype): annotationtype = annotationtype.ANNOTATIONTYPE
-        return ( (annotationtype,set) in self.annotations)
+        return ( (annotationtype,set) in self.annotations) or (set in self.alias_set and self.alias_set[set] and (annotationtype, self.alias_set[set]) in self.annotations )
 
 
     def defaultset(self, annotationtype):
@@ -6772,11 +7057,11 @@ class Document(object):
            Argument: Set the document's title in metadata
         """
         if not (value is None):
-            if (self.metadatatype == MetaDataType.NATIVE):
+            if (self.metadatatype == "native"):
                 self.metadata['title'] = value
             else:
                 self._title = value
-        if (self.metadatatype == MetaDataType.NATIVE):
+        if (self.metadatatype == "native"):
             if 'title' in self.metadata:
                 return self.metadata['title']
             else:
@@ -6791,11 +7076,11 @@ class Document(object):
            Argument: Set the document's date in metadata
         """
         if not (value is None):
-            if (self.metadatatype == MetaDataType.NATIVE):
+            if (self.metadatatype == "native"):
                 self.metadata['date'] = value
             else:
                 self._date = value
-        if (self.metadatatype == MetaDataType.NATIVE):
+        if (self.metadatatype == "native"):
             if 'date' in self.metadata:
                 return self.metadata['date']
             else:
@@ -6808,11 +7093,11 @@ class Document(object):
            Argument: Set the document's publisher in metadata
         """
         if not (value is None):
-            if (self.metadatatype == MetaDataType.NATIVE):
+            if (self.metadatatype == "native"):
                 self.metadata['publisher'] = value
             else:
                 self._publisher = value
-        if (self.metadatatype == MetaDataType.NATIVE):
+        if (self.metadatatype == "native"):
             if 'publisher' in self.metadata:
                 return self.metadata['publisher']
             else:
@@ -6825,11 +7110,11 @@ class Document(object):
            Argument: Set the document's license in metadata
         """
         if not (value is None):
-            if (self.metadatatype == MetaDataType.NATIVE):
+            if (self.metadatatype == "native"):
                 self.metadata['license'] = value
             else:
                 self._license = value
-        if (self.metadatatype == MetaDataType.NATIVE):
+        if (self.metadatatype == "native"):
             if 'license' in self.metadata:
                 return self.metadata['license']
             else:
@@ -6842,11 +7127,11 @@ class Document(object):
            Argument: Set the document's language (ISO-639-3) in metadata
         """
         if not (value is None):
-            if (self.metadatatype == MetaDataType.NATIVE):
+            if (self.metadatatype == "native"):
                 self.metadata['language'] = value
             else:
                 self._language = value
-        if (self.metadatatype == MetaDataType.NATIVE):
+        if self.metadatatype == "native":
             if 'language' in self.metadata:
                 return self.metadata['language']
             else:
@@ -6864,14 +7149,11 @@ class Document(object):
             self.metadatatype = "native"
 
         if 'src' in node.attrib:
-            self.metadatafile =  node.attrib['src']
-        else:
-            self.metadatafile = None
-
-        if self.metadatatype == "native":
+            self.metadata = ExternalMetaData(node.attrib['src'])
+        elif self.metadatatype == "native":
             self.metadata = NativeMetaData()
         else:
-            self.metadata = None #will be set below
+            self.metadata = None #may be set below to ForeignData
 
         for subnode in node:
             if subnode.tag == '{' + NSFOLIA + '}annotations':
@@ -6893,14 +7175,51 @@ class Document(object):
                     e.next = ForeignData(self, node=subnode)
                 else:
                     self.metadata = ForeignData(self, node=subnode)
+            elif subnode.tag == '{' + NSFOLIA + '}submetadata':
+                self.parsesubmetadata(subnode)
             elif subnode.tag == '{http://www.mpi.nl/IMDI/Schema/IMDI}METATRANSCRIPT': #backward-compatibility for old IMDI without foreign-key
                 E = ElementMaker(namespace=NSFOLIA,nsmap={None: NSFOLIA, 'xml' : "http://www.w3.org/XML/1998/namespace"})
                 self.metadatatype = "imdi"
-                self.metadata = makeelement(E, '{'+NSFOLIA+'}foreign-data')
-                self.metadata.append(subnode)
+                self.metadata = ForeignData(self, node=subnode)
 
+    def parsesubmetadata(self, node):
+        if '{http://www.w3.org/XML/1998/namespace}id' not in node.attrib:
+            raise MetaDataError("Encountered a submetadata element without xml:id!")
+        else:
+            id = node.attrib['{http://www.w3.org/XML/1998/namespace}id']
 
 
+        if 'type' in node.attrib:
+            self.submetadatatype[id] = node.attrib['type']
+        else:
+            self.submetadatatype[id] = "native"
+
+        if 'src' in node.attrib:
+            self.submetadata[id] = ExternalMetaData(node.attrib['src'])
+        elif self.submetadatatype[id] == "native":
+            self.submetadata[id] = NativeMetaData()
+        else:
+            self.submetadata[id] = None
+
+        for subnode in node:
+            if subnode.tag == '{' + NSFOLIA + '}meta':
+                if self.submetadatatype[id] == "native":
+                    if subnode.text:
+                        self.submetadata[id][subnode.attrib['id']] = subnode.text
+                else:
+                    raise MetaDataError("Encountered a meta element but metadata type is not native!")
+            elif subnode.tag == '{' + NSFOLIA + '}foreign-data':
+                if self.submetadatatype[id] == "native":
+                    raise MetaDataError("Encountered a foreign-data element but metadata type is native!")
+                elif self.submetadata[id] is not None:
+                    #multiple foreign-data elements, chain:
+                    e = self.submetadata[id]
+                    while e.next is not None:
+                        e = e.next
+                    e.next = ForeignData(self, node=subnode)
+                else:
+                    self.submetadata[id] = ForeignData(self, node=subnode)
+
     def parsexml(self, node, ParentClass = None):
         """Internal method.
 
@@ -6993,6 +7312,39 @@ class Document(object):
         else:
             raise Exception("Unknown FoLiA XML tag: " + node.tag)
 
+        self.pendingvalidation() #perform  any pending offset validations (if applicable)
+
+
+    def pendingvalidation(self, warnonly=None):
+        """Perform any pending validations
+
+        Parameters:
+            warnonly (bool): Warn only (True) or raise exceptions (False). If set to None then this value will be determined based on the document's FoLiA version (Warn only before FoLiA v1.5)
+
+        Returns:
+            bool
+        """
+        if self.debug: print("[PyNLPl FoLiA DEBUG] Processing pending validations (if any)",file=stderr)
+
+        if warnonly is None and self and self.version:
+            warnonly = (checkversion(self.version, '1.5.0') < 0) #warn only for documents older than FoLiA v1.5
+        if self.textvalidation:
+            while self.offsetvalidationbuffer:
+                structureelement, textclass = self.offsetvalidationbuffer.pop()
+
+                if self.debug: print("[PyNLPl FoLiA DEBUG] Performing offset validation on " + repr(structureelement) + " textclass " + textclass,file=stderr)
+
+                #validate offsets
+                tc = structureelement.textcontent(textclass)
+                if tc.offset is not None:
+                    try:
+                        tc.getreference(validate=True)
+                    except UnresolvableTextContent:
+                        msg = "Text for " + structureelement.__class__.__name__ + ", ID " + str(structureelement.id) + ", textclass " + textclass  + ", has incorrect offset " + str(tc.offset) + " or invalid reference"
+                        print("TEXT VALIDATION ERROR: " + msg,file=sys.stderr)
+                        if not warnonly:
+                            raise
+
 
     def select(self, Class, set=None, recursive=True,  ignore=True):
         """See :meth:`AbstractElement.select`"""
@@ -7271,6 +7623,20 @@ def relaxng(filename=None):
                     E.zeroOrMore(
                         E.ref(name="foreign-data"),
                     ),
+                    E.zeroOrMore(
+                        E.element( #submetadata
+                            E.attribute(name='id',ns="http://www.w3.org/XML/1998/namespace"),
+                            E.optional(E.attribute(name='type')),
+                            E.optional(E.attribute(name='src')),
+                            E.zeroOrMore(
+                                E.element(E.attribute(name='id'), E.text(), name='meta'),
+                            ),
+                            E.zeroOrMore(
+                                E.ref(name="foreign-data"),
+                            ),
+                            name="submetadata"
+                        )
+                    ),
                     #E.optional(
                     #    E.ref(name='METATRANSCRIPT')
                     #),
@@ -7532,7 +7898,7 @@ def isncname(name):
     #not entirely according to specs http://www.w3.org/TR/REC-xml/#NT-Name , but simplified:
     for i, c in enumerate(name):
         if i == 0:
-            if not c.isalpha():
+            if not c.isalpha() and c != '_':
                 raise ValueError('Invalid XML NCName identifier: ' + name + ' (at position ' + str(i+1)+')')
         else:
             if not c.isalnum() and not (c in ['-','_','.']):
@@ -7564,8 +7930,13 @@ def validate(filename,schema=None,deep=False):
     if not schema:
         schema = ElementTree.RelaxNG(relaxng())
 
+    try:
+        schema.assertValid(doc) #will raise exceptions
+    except Exception as e:
+        for error in schema.error_log:
+            print("Error on line " + str(error.line) + ": " + error.message, file=sys.stderr)
+        raise e
 
-    schema.assertValid(doc) #will raise exceptions
 
     if deep:
         doc = Document(tree=doc, deepvalidation=True)
@@ -7573,7 +7944,7 @@ def validate(filename,schema=None,deep=False):
 #================================= FOLIA SPECIFICATION ==========================================================
 
 #foliaspec:header
-#This file was last updated according to the FoLiA specification for version 1.4.0 on 2016-12-09 14:31:07, using foliaspec.py
+#This file was last updated according to the FoLiA specification for version 1.5.1 on 2017-11-21 13:18:02, using foliaspec.py
 #Code blocks after a foliaspec comment (until the next newline) are automatically generated. **DO NOT EDIT THOSE** and **DO NOT REMOVE ANY FOLIASPEC COMMENTS** !!!
 
 #foliaspec:structurescope:STRUCTURESCOPE
@@ -7787,7 +8158,7 @@ AbstractElement.XMLTAG = None
 #Sets all element properties for all elements
 #------ AbstractAnnotationLayer -------
 AbstractAnnotationLayer.ACCEPTED_DATA = (Comment, Correction, Description, ForeignData,)
-AbstractAnnotationLayer.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N,)
+AbstractAnnotationLayer.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N, Attrib.TEXTCLASS, Attrib.METADATA,)
 AbstractAnnotationLayer.PRINTABLE = False
 AbstractAnnotationLayer.SETONLY = True
 AbstractAnnotationLayer.SPEAKABLE = False
@@ -7800,7 +8171,7 @@ AbstractCorrectionChild.TEXTDELIMITER = None
 #------ AbstractExtendedTokenAnnotation -------
 #------ AbstractSpanAnnotation -------
 AbstractSpanAnnotation.ACCEPTED_DATA = (AlignReference, Alignment, Comment, Description, ForeignData, Metric,)
-AbstractSpanAnnotation.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+AbstractSpanAnnotation.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.TEXTCLASS, Attrib.METADATA,)
 AbstractSpanAnnotation.PRINTABLE = True
 AbstractSpanAnnotation.SPEAKABLE = True
 #------ AbstractSpanRole -------
@@ -7809,14 +8180,14 @@ AbstractSpanRole.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.N, Attr
 #------ AbstractStructureElement -------
 AbstractStructureElement.ACCEPTED_DATA = (AbstractAnnotationLayer, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Part,)
 AbstractStructureElement.AUTO_GENERATE_ID = True
-AbstractStructureElement.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+AbstractStructureElement.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 AbstractStructureElement.PRINTABLE = True
 AbstractStructureElement.REQUIRED_ATTRIBS = None
 AbstractStructureElement.SPEAKABLE = True
 AbstractStructureElement.TEXTDELIMITER = "\n\n"
 #------ AbstractTextMarkup -------
-AbstractTextMarkup.ACCEPTED_DATA = (AbstractTextMarkup, Comment, Description,)
-AbstractTextMarkup.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+AbstractTextMarkup.ACCEPTED_DATA = (AbstractTextMarkup, Comment, Description, Linebreak,)
+AbstractTextMarkup.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 AbstractTextMarkup.PRIMARYELEMENT = False
 AbstractTextMarkup.PRINTABLE = True
 AbstractTextMarkup.TEXTCONTAINER = True
@@ -7825,7 +8196,7 @@ AbstractTextMarkup.XLINK = True
 #------ AbstractTokenAnnotation -------
 AbstractTokenAnnotation.ACCEPTED_DATA = (Comment, Description, Feature, ForeignData, Metric,)
 AbstractTokenAnnotation.OCCURRENCES_PER_SET = 1
-AbstractTokenAnnotation.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+AbstractTokenAnnotation.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.TEXTCLASS, Attrib.METADATA,)
 AbstractTokenAnnotation.REQUIRED_ATTRIBS = (Attrib.CLASS,)
 #------ ActorFeature -------
 ActorFeature.SUBSET = "actor"
@@ -7836,7 +8207,7 @@ AlignReference.XMLTAG = "aref"
 Alignment.ACCEPTED_DATA = (AlignReference, Comment, Description, Feature, ForeignData, Metric,)
 Alignment.ANNOTATIONTYPE = AnnotationType.ALIGNMENT
 Alignment.LABEL = "Alignment"
-Alignment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+Alignment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 Alignment.PRINTABLE = False
 Alignment.REQUIRED_ATTRIBS = None
 Alignment.SPEAKABLE = False
@@ -7846,7 +8217,7 @@ Alignment.XMLTAG = "alignment"
 Alternative.ACCEPTED_DATA = (AbstractTokenAnnotation, Comment, Correction, Description, ForeignData, MorphologyLayer, PhonologyLayer,)
 Alternative.AUTH = False
 Alternative.LABEL = "Alternative"
-Alternative.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+Alternative.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 Alternative.PRINTABLE = False
 Alternative.REQUIRED_ATTRIBS = None
 Alternative.SPEAKABLE = False
@@ -7855,7 +8226,7 @@ Alternative.XMLTAG = "alt"
 AlternativeLayers.ACCEPTED_DATA = (AbstractAnnotationLayer, Comment, Description, ForeignData,)
 AlternativeLayers.AUTH = False
 AlternativeLayers.LABEL = "Alternative Layers"
-AlternativeLayers.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+AlternativeLayers.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 AlternativeLayers.PRINTABLE = False
 AlternativeLayers.REQUIRED_ATTRIBS = None
 AlternativeLayers.SPEAKABLE = False
@@ -7885,13 +8256,14 @@ ChunkingLayer.PRIMARYELEMENT = False
 ChunkingLayer.XMLTAG = "chunking"
 #------ Comment -------
 Comment.LABEL = "Comment"
-Comment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N,)
+Comment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N, Attrib.METADATA,)
+Comment.PRINTABLE = False
 Comment.XMLTAG = "comment"
 #------ ComplexAlignment -------
 ComplexAlignment.ACCEPTED_DATA = (Alignment, Comment, Description, Feature, ForeignData, Metric,)
 ComplexAlignment.ANNOTATIONTYPE = AnnotationType.COMPLEXALIGNMENT
 ComplexAlignment.LABEL = "Complex Alignment"
-ComplexAlignment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+ComplexAlignment.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 ComplexAlignment.PRINTABLE = False
 ComplexAlignment.REQUIRED_ATTRIBS = None
 ComplexAlignment.SPEAKABLE = False
@@ -7926,7 +8298,7 @@ CoreferenceLink.XMLTAG = "coreferencelink"
 Correction.ACCEPTED_DATA = (Comment, Current, Description, ErrorDetection, Feature, ForeignData, Metric, New, Original, Suggestion,)
 Correction.ANNOTATIONTYPE = AnnotationType.CORRECTION
 Correction.LABEL = "Correction"
-Correction.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+Correction.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 Correction.PRINTABLE = True
 Correction.SPEAKABLE = True
 Correction.TEXTDELIMITER = None
@@ -7936,7 +8308,7 @@ Current.OCCURRENCES = 1
 Current.OPTIONAL_ATTRIBS = None
 Current.XMLTAG = "current"
 #------ Definition -------
-Definition.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, Figure, ForeignData, List, Metric, Paragraph, Part, PhonContent, Reference, Sentence, String, Table, TextContent, Utterance, Word,)
+Definition.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, Figure, ForeignData, Linebreak, List, Metric, Paragraph, Part, PhonContent, Reference, Sentence, String, Table, TextContent, Utterance, Whitespace, Word,)
 Definition.ANNOTATIONTYPE = AnnotationType.DEFINITION
 Definition.LABEL = "Definition"
 Definition.XMLTAG = "def"
@@ -7958,7 +8330,7 @@ DependencyDependent.XMLTAG = "dep"
 #------ Description -------
 Description.LABEL = "Description"
 Description.OCCURRENCES = 1
-Description.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N,)
+Description.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N, Attrib.METADATA,)
 Description.XMLTAG = "desc"
 #------ Division -------
 Division.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Division, Entry, Event, Example, Feature, Figure, ForeignData, Gap, Head, Linebreak, List, Metric, Note, Paragraph, Part, PhonContent, Quote, Reference, Sentence, Table, TextContent, Utterance, Whitespace,)
@@ -7995,7 +8367,7 @@ ErrorDetection.LABEL = "Error Detection"
 ErrorDetection.OCCURRENCES_PER_SET = 0
 ErrorDetection.XMLTAG = "errordetection"
 #------ Event -------
-Event.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, ActorFeature, Alignment, Alternative, AlternativeLayers, BegindatetimeFeature, Comment, Correction, Description, Division, EnddatetimeFeature, Event, Example, Feature, Figure, ForeignData, Head, Linebreak, List, Metric, Paragraph, Part, PhonContent, Reference, Sentence, String, Table, TextContent, Utterance, Whitespace, Word,)
+Event.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, ActorFeature, Alignment, Alternative, AlternativeLayers, BegindatetimeFeature, Comment, Correction, Description, Division, EnddatetimeFeature, Entry, Event, Example, Feature, Figure, ForeignData, Gap, Head, Linebreak, List, Metric, Note, Paragraph, Part, PhonContent, Quote, Reference, Sentence, String, Table, TextContent, Utterance, Whitespace, Word,)
 Event.ANNOTATIONTYPE = AnnotationType.EVENT
 Event.LABEL = "Event"
 Event.XMLTAG = "event"
@@ -8032,7 +8404,7 @@ FunctionFeature.XMLTAG = None
 Gap.ACCEPTED_DATA = (Comment, Content, Description, Feature, ForeignData, Metric, Part,)
 Gap.ANNOTATIONTYPE = AnnotationType.GAP
 Gap.LABEL = "Gap"
-Gap.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME,)
+Gap.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.METADATA,)
 Gap.XMLTAG = "gap"
 #------ Head -------
 Head.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Event, Feature, ForeignData, Gap, Linebreak, Metric, Part, PhonContent, Reference, Sentence, String, TextContent, Whitespace, Word,)
@@ -8048,7 +8420,7 @@ Headspan.LABEL = "Head"
 Headspan.OCCURRENCES = 1
 Headspan.XMLTAG = "hd"
 #------ Label -------
-Label.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Part, PhonContent, Reference, String, TextContent, Word,)
+Label.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Linebreak, Metric, Part, PhonContent, Reference, String, TextContent, Whitespace, Word,)
 Label.LABEL = "Label"
 Label.XMLTAG = "label"
 #------ LangAnnotation -------
@@ -8066,6 +8438,7 @@ LevelFeature.XMLTAG = None
 Linebreak.ANNOTATIONTYPE = AnnotationType.LINEBREAK
 Linebreak.LABEL = "Linebreak"
 Linebreak.TEXTDELIMITER = ""
+Linebreak.XLINK = True
 Linebreak.XMLTAG = "br"
 #------ List -------
 List.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Caption, Comment, Correction, Description, Event, Feature, ForeignData, ListItem, Metric, Note, Part, PhonContent, Reference, String, TextContent,)
@@ -8074,7 +8447,7 @@ List.LABEL = "List"
 List.TEXTDELIMITER = "\n\n"
 List.XMLTAG = "list"
 #------ ListItem -------
-ListItem.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Event, Feature, ForeignData, Gap, Label, Linebreak, List, Metric, Note, Part, PhonContent, Reference, Sentence, String, TextContent, Whitespace,)
+ListItem.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Event, Feature, ForeignData, Gap, Label, Linebreak, List, Metric, Note, Paragraph, Part, PhonContent, Reference, Sentence, String, TextContent, Whitespace, Word,)
 ListItem.LABEL = "List Item"
 ListItem.TEXTDELIMITER = "\n"
 ListItem.XMLTAG = "item"
@@ -8082,7 +8455,7 @@ ListItem.XMLTAG = "item"
 Metric.ACCEPTED_DATA = (Comment, Description, Feature, ForeignData, ValueFeature,)
 Metric.ANNOTATIONTYPE = AnnotationType.METRIC
 Metric.LABEL = "Metric"
-Metric.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER,)
+Metric.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.METADATA,)
 Metric.XMLTAG = "metric"
 #------ ModalityFeature -------
 ModalityFeature.SUBSET = "modality"
@@ -8129,7 +8502,7 @@ Paragraph.LABEL = "Paragraph"
 Paragraph.TEXTDELIMITER = "\n\n"
 Paragraph.XMLTAG = "p"
 #------ Part -------
-Part.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, AbstractStructureElement, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Part,)
+Part.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, AbstractStructureElement, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Part, PhonContent, TextContent,)
 Part.ANNOTATIONTYPE = AnnotationType.PART
 Part.LABEL = "Part"
 Part.TEXTDELIMITER = None
@@ -8139,7 +8512,7 @@ PhonContent.ACCEPTED_DATA = (Comment, Description,)
 PhonContent.ANNOTATIONTYPE = AnnotationType.PHON
 PhonContent.LABEL = "Phonetic Content"
 PhonContent.OCCURRENCES = 0
-PhonContent.OPTIONAL_ATTRIBS = (Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME,)
+PhonContent.OPTIONAL_ATTRIBS = (Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.METADATA,)
 PhonContent.PHONCONTAINER = True
 PhonContent.PRINTABLE = False
 PhonContent.SPEAKABLE = True
@@ -8169,11 +8542,11 @@ Predicate.ANNOTATIONTYPE = AnnotationType.PREDICATE
 Predicate.LABEL = "Predicate"
 Predicate.XMLTAG = "predicate"
 #------ Quote -------
-Quote.ACCEPTED_DATA = (AbstractAnnotationLayer, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Division, Feature, ForeignData, Gap, Metric, Paragraph, Part, Quote, Sentence, String, TextContent, Utterance, Word,)
+Quote.ACCEPTED_DATA = (AbstractAnnotationLayer, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Division, Feature, ForeignData, Gap, Metric, Paragraph, Part, Quote, Reference, Sentence, String, TextContent, Utterance, Word,)
 Quote.LABEL = "Quote"
 Quote.XMLTAG = "quote"
 #------ Reference -------
-Reference.ACCEPTED_DATA = (AbstractAnnotationLayer, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Paragraph, Part, PhonContent, Quote, Sentence, String, TextContent, Utterance, Word,)
+Reference.ACCEPTED_DATA = (AbstractAnnotationLayer, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Linebreak, Metric, Paragraph, Part, PhonContent, Quote, Sentence, String, TextContent, Utterance, Whitespace, Word,)
 Reference.LABEL = "Reference"
 Reference.TEXTDELIMITER = None
 Reference.XMLTAG = "ref"
@@ -8246,7 +8619,7 @@ String.ACCEPTED_DATA = (AbstractExtendedTokenAnnotation, Alignment, Comment, Cor
 String.ANNOTATIONTYPE = AnnotationType.STRING
 String.LABEL = "String"
 String.OCCURRENCES = 0
-String.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME,)
+String.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.N, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.METADATA,)
 String.PRINTABLE = True
 String.XMLTAG = "str"
 #------ StyleFeature -------
@@ -8287,12 +8660,12 @@ Target.LABEL = "Target"
 Target.OCCURRENCES = 1
 Target.XMLTAG = "target"
 #------ Term -------
-Term.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Event, Feature, Figure, ForeignData, Gap, List, Metric, Paragraph, Part, PhonContent, Reference, Sentence, String, Table, TextContent, Utterance, Word,)
+Term.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Event, Feature, Figure, ForeignData, Gap, Linebreak, List, Metric, Paragraph, Part, PhonContent, Reference, Sentence, String, Table, TextContent, Utterance, Whitespace, Word,)
 Term.ANNOTATIONTYPE = AnnotationType.TERM
 Term.LABEL = "Term"
 Term.XMLTAG = "term"
 #------ Text -------
-Text.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Division, Entry, Event, Example, External, Feature, Figure, ForeignData, Gap, List, Metric, Note, Paragraph, Part, PhonContent, Quote, Reference, Sentence, String, Table, TextContent, Word,)
+Text.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractExtendedTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Division, Entry, Event, Example, External, Feature, Figure, ForeignData, Gap, Linebreak, List, Metric, Note, Paragraph, Part, PhonContent, Quote, Reference, Sentence, String, Table, TextContent, Whitespace, Word,)
 Text.LABEL = "Text Body"
 Text.TEXTDELIMITER = "\n\n\n"
 Text.XMLTAG = "text"
@@ -8301,7 +8674,7 @@ TextContent.ACCEPTED_DATA = (AbstractTextMarkup, Comment, Description, Linebreak
 TextContent.ANNOTATIONTYPE = AnnotationType.TEXT
 TextContent.LABEL = "Text"
 TextContent.OCCURRENCES = 0
-TextContent.OPTIONAL_ATTRIBS = (Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME,)
+TextContent.OPTIONAL_ATTRIBS = (Attrib.CLASS, Attrib.ANNOTATOR, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.METADATA,)
 TextContent.PRINTABLE = True
 TextContent.SPEAKABLE = False
 TextContent.TEXTCONTAINER = True
@@ -8358,6 +8731,7 @@ Whitespace.XMLTAG = "whitespace"
 Word.ACCEPTED_DATA = (AbstractAnnotationLayer, AbstractTokenAnnotation, Alignment, Alternative, AlternativeLayers, Comment, Correction, Description, Feature, ForeignData, Metric, Part, PhonContent, Reference, String, TextContent,)
 Word.ANNOTATIONTYPE = AnnotationType.TOKEN
 Word.LABEL = "Word/Token"
+Word.OPTIONAL_ATTRIBS = (Attrib.ID, Attrib.CLASS, Attrib.ANNOTATOR, Attrib.N, Attrib.CONFIDENCE, Attrib.DATETIME, Attrib.SRC, Attrib.BEGINTIME, Attrib.ENDTIME, Attrib.SPEAKER, Attrib.TEXTCLASS, Attrib.METADATA,)
 Word.TEXTDELIMITER = " "
 Word.XMLTAG = "w"
 #------ WordReference -------
diff --git a/pynlpl/formats/fql.py b/pynlpl/formats/fql.py
index eed347d..709bae1 100644
--- a/pynlpl/formats/fql.py
+++ b/pynlpl/formats/fql.py
@@ -35,8 +35,8 @@ MASK_LITERAL = 1
 MASK_EXPRESSION = 2
 MAXEXPANSION = 99
 
-FOLIAVERSION = '1.4.0'
-FQLVERSION = '0.4.0'
+FOLIAVERSION = '1.5.0'
+FQLVERSION = '0.4.1'
 
 class SyntaxError(Exception):
     pass
@@ -1424,6 +1424,11 @@ class Action(object): #Action expression
 
         if action.action in ("EDIT","ADD", "APPEND","PREPEND") and q.kw(i,("RESPAN","SPAN")):
             action.span, i = Span.parse(q,i+1)
+        if action.action == "DELETE" and q.kw(i,("RESTORE")):
+            action.restore = q[i+1].upper()
+            i += 2
+        else:
+            action.restore = None
 
         done = False
         while not done:
@@ -1602,6 +1607,15 @@ class Action(object): #Action expression
                             elif action.action == "DELETE":
                                 if debug: print("[FQL EVALUATION DEBUG] Action - Applying DELETE to focus ", repr(focus),file=sys.stderr)
                                 p = focus.parent
+                                if action.restore == "ORIGINAL":
+                                    index = p.getindex(focus, False, False)
+                                    if not isinstance(focus, folia.Correction):
+                                        raise QueryError("RESTORE ORIGINAL can only be performed when the focus is a correction")
+                                    #restore original
+                                    for original in reversed(focus.original()):
+                                        if debug: print("[FQL EVALUATION DEBUG] Action - Restoring original: ", repr(original),file=sys.stderr)
+                                        original.parent = p
+                                        p.insert(index, original)
                                 p.remove(focus)
                                 #we set the parent back on the element we return, so return types like ancestor-focus work
                                 focus.parent = p
diff --git a/pynlpl/tests/FoLiA/foliatools/__init__.py b/pynlpl/tests/FoLiA/foliatools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pynlpl/tests/FoLiA/foliatools/alpino2folia.py b/pynlpl/tests/FoLiA/foliatools/alpino2folia.py
new file mode 100755
index 0000000..b481436
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/alpino2folia.py
@@ -0,0 +1,175 @@
+#! /usr/bin/env python3
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import lxml
+import getopt
+import sys
+import os
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git", file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("alpino2folia",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2012-2016 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("This conversion script reads an Alpino XML document and converts",file=sys.stderr)
+    print("it to FoLiA. If multiple input files are specified, and/or the output FoLiA document already exists, then the",file=sys.stderr)
+    print("converter will append it.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: alpino2folia [options] alpino-input [alpino-input 2..] folia-output"   ,file=sys.stderr)
+
+def extract_syntax(alpinonode, folianode, foliasentence, alpinoroot):
+    for node in alpinonode:
+        if 'word' in node.attrib:
+            folianode.append(folia.SyntacticUnit, foliasentence[int(node.attrib['begin'])], cls=node.attrib['pos'],id=foliasentence.id+'.alpinonode.'+node.attrib['id'] )
+        elif 'cat' in node.attrib:
+            su = folianode.append(folia.SyntacticUnit, cls=node.attrib['cat'],id=foliasentence.id+'.alpinonode.'+node.attrib['id'] )
+            extract_syntax(node, su, foliasentence,alpinoroot)
+        else:
+            print("SYNTAX: Don't know what to do with node...", repr(node.attrib) ,file=sys.stderr)
+
+def extract_dependencies(alpinonode, deplayer, foliasentence):
+    deps = []
+    head = None
+    for node in alpinonode:
+        #print("DEP:", node,file=sys.stderr)
+        if not 'word' in node.attrib:
+            extract_dependencies(node, deplayer, foliasentence )
+        if 'rel' in node.attrib:
+            if node.attrib['rel'] == 'hd':
+                head = folia.DependencyHead(deplayer.doc, foliasentence[int(node.attrib['begin'])])
+            else:
+                deps.append( (node.attrib['rel'], folia.DependencyDependent(deplayer.doc, foliasentence[int(node.attrib['begin'])]) )  )
+
+    if head:
+        for cls, dep in deps:
+            deplayer.append( folia.Dependency, head, dep, cls=cls)
+
+
+def makefoliadoc(outputfile):
+    baseid = os.path.basename(outputfile).replace('.folia.xml','').replace('.xml','')
+    foliadoc = folia.Document(id=baseid)
+    foliadoc.append(folia.Text(foliadoc, id=baseid+'.text'))
+
+    if not foliadoc.declared(folia.AnnotationType.TOKEN, 'alpino-tokens'):
+        foliadoc.declare(folia.AnnotationType.TOKEN, 'alpino-tokens')
+    if not foliadoc.declared(folia.LemmaAnnotation, 'alpino-lemmas'):
+        foliadoc.declare(folia.LemmaAnnotation, 'alpino-lemmas')
+    if not foliadoc.declared(folia.SenseAnnotation, 'alpino-sense'):
+        foliadoc.declare(folia.SenseAnnotation, 'alpino-sense')
+    if not foliadoc.declared(folia.PosAnnotation, 'alpino-pos'):
+        foliadoc.declare(folia.PosAnnotation, 'alpino-pos')
+    if not foliadoc.declared(folia.AnnotationType.DEPENDENCY, 'alpino-dependency'):
+        foliadoc.declare(folia.AnnotationType.DEPENDENCY, 'alpino-dependency')
+    if not foliadoc.declared(folia.AnnotationType.SYNTAX, 'alpino-syntax'):
+        foliadoc.declare(folia.AnnotationType.SYNTAX, 'alpino-syntax')
+    if not foliadoc.declared(folia.AnnotationType.MORPHOLOGICAL, 'alpino-morphology'):
+        foliadoc.declare(folia.AnnotationType.MORPHOLOGICAL, 'alpino-morphology')
+
+    return foliadoc
+
+
+def alpino2folia(alpinofile, foliadoc):
+    tree = lxml.etree.parse(alpinofile)
+    alpinoroot = tree.getroot()
+    if alpinoroot.tag != 'alpino_ds':
+        raise Exception("source file is not an alpino file")
+    sentencenode = alpinoroot.xpath('//sentence')[0]
+
+
+    foliatextbody = foliadoc[-1]
+    foliasentence = foliatextbody.append(folia.Sentence)
+
+
+    #first pass, extract words
+    alpinowords = sentencenode.text.split(' ')
+    for alpinoword in alpinowords:
+        foliasentence.append(folia.Word,alpinoword.strip())
+
+
+    #loop over lexical nodes
+    for node in alpinoroot.xpath('//node'):
+        if 'word' in node.attrib and 'pos' in node.attrib:
+            index = int(node.attrib['begin'])
+            if alpinowords[index].strip() != node.attrib['word'].strip():
+                raise Exception("Inconsistency in Alpino XML! Node at begin refers to word index " + str(index) + ", which has value \"" + alpinowords[index] + "\" and does not correspond with node at word \"" + node.attrib['word'] +  "\"")
+            foliaword = foliasentence[index]
+
+            if 'lemma' in node.attrib:
+                foliaword.append(folia.LemmaAnnotation, cls=node.attrib['lemma'])
+            if 'sense' in node.attrib:
+                foliaword.append(folia.SenseAnnotation, cls=node.attrib['sense'])
+            if 'root' in node.attrib:
+                layer = foliaword.append(folia.MorphologyLayer)
+                layer.append(folia.Morpheme, folia.TextContent(foliadoc, node.attrib['root']), cls='root')
+
+            if 'postag' in node.attrib and 'pt' in node.attrib:
+                foliapos = foliaword.append(folia.PosAnnotation, cls=node.attrib['postag'], head=node.attrib['pt'])
+            elif 'frame' in node.attrib:
+                foliaword.append(folia.PosAnnotation, cls=node.attrib['frame'], head=node.attrib['pos'])
+            else:
+                foliaword.append(folia.PosAnnotation, cls=node.attrib['pos'])
+
+            #gather pos features
+            for key, value in node.attrib.items():
+                if key in ('wh','per','num','gen','case','def','infl','sc','buiging','refl','tense','comparative','positie','pvagr','pvtijd','graad','pdtype','wvorm','ntype','vwtype','getal','status','naamval','persoon','genus'):
+                    foliapos.append(folia.Feature, subset=key, cls=value)
+                elif not key in ('sense','pos','rel','postag','pt','frame','root','lemma','id','begin','end','word','index'):
+                    print("WARNING: Ignored attribute " + key + "=\"" + value + "\" on node ",file=sys.stderr)
+
+    foliasyntaxlayer = foliasentence.append(folia.SyntaxLayer)
+    foliasyntaxtop = foliasyntaxlayer.append(folia.SyntacticUnit, cls='top')
+
+    #extract syntax
+    extract_syntax(alpinoroot[0], foliasyntaxtop, foliasentence, alpinoroot)
+
+    foliadeplayer = foliasentence.append(folia.DependenciesLayer)
+
+    #extract dependencies:
+    extract_dependencies(alpinoroot[0], foliadeplayer, foliasentence)
+
+    return foliadoc
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "-h", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err),file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        else:
+            raise Exception("No such option: " + o)
+
+    if len(args) < 2:
+        usage()
+        sys.exit(2)
+    else:
+        alpinofiles = []
+        for i, arg in enumerate(args):
+            if i < len(args) - 1:
+                alpinofiles.append(arg)
+        foliafile = args[-1]
+
+    if os.path.exists(foliafile):
+        doc = folia.Document(file=foliafile)
+    else:
+        doc = makefoliadoc(foliafile)
+    for alpinofile in alpinofiles:
+        doc = alpino2folia(alpinofile, doc)
+    doc.save(foliafile)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/cgn2folia.py b/pynlpl/tests/FoLiA/foliatools/cgn2folia.py
new file mode 100755
index 0000000..3616344
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/cgn2folia.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+#---------------------------------------------------------------
+# CGN to FoLiA Converter
+#   by Maarten van Gompel
+#   Centre for Language Studies
+#   Radboud University Nijmegen
+#   proycon AT anaproy DOT nl
+#
+#   Licensed under GPLv3
+#
+# This script converts CGN to FoLiA format. (Note that paragraph information
+# is not available in CGN and therefore not stored in FoLiA format either.)
+#
+#----------------------------------------------------------------
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import glob
+import gzip
+import os
+from pynlpl.formats import folia
+
+CGN_ENCODING = 'iso-8859-15' #not yet used!
+
+if len(sys.argv) != 3:
+    print("SYNTAX: ./cgn2folia.py cgnrootdir outputdir", file=sys.stderr)
+    sys.exit(1)
+
+cgndir = sys.argv[1]
+outdir = sys.argv[2]
+
+
+plkdir = os.path.join(cgndir,"data","annot","text","plk")
+for compdir in glob.glob(os.path.join(plkdir, "comp-*")):
+    collection_id = "CGN-" + os.path.basename(compdir)
+    print(collection_id)
+    try:
+        os.mkdir(os.path.join(outdir, collection_id))
+    except:
+        pass
+    files = list(glob.glob(os.path.join(compdir,"nl","*.gz"))) + list(glob.glob(os.path.join(compdir, "vl","*.gz")))
+    for path in files:
+        text_id = os.path.basename(path).split(".")[0]
+        print("\t" + text_id)
+        full_id = collection_id + "_" + text_id
+        au_id = None
+        sentence = None
+
+        doc = folia.Document(id=full_id)
+        doc.metadatatype = folia.MetaDataType.IMDI
+        doc.metadatafile = text_id + ".imdi"
+        textbody = doc.append(folia.Text(doc, id=full_id+"."+text_id))
+        doc.declare(folia.PosAnnotation, set="hdl:1839/00-SCHM-0000-0000-000B-9")
+        doc.declare(folia.LemmaAnnotation, set="hdl:1839/00-SCHM-0000-0000-000E-3")
+
+        fin = gzip.open(path,'r')
+        for line in fin:
+            line = unicode(line,CGN_ENCODING)
+            if line:
+                if line[0:3] == '<au':
+                    end = line[8:].find('"')
+                    if end != -1:
+                        end += 8
+                        au_id = line[8:end]
+                        sentence = textbody.append(folia.Sentence, id=full_id + ".s." + au_id)
+                elif line[0:3] == '<mu':
+                    au_id = None
+                    pass #ignore
+                elif au_id:
+                    try:
+                        wordtext,pos,lemma, extra = line.split("\t",3)
+                    except ValueError:
+                        print("\tWARNING: Line malformed: ", line, file=sys.stderr)
+                        continue
+                    word = sentence.append(folia.Word, wordtext)
+                    word.append(folia.PosAnnotation, cls=pos)
+                    word.append(folia.LemmaAnnotation, cls=lemma)
+        fin.close()
+        doc.save(os.path.join(outdir ,collection_id, full_id , '.folia.xml'))
+
+
+
diff --git a/pynlpl/tests/FoLiA/foliatools/dcoi2folia.py b/pynlpl/tests/FoLiA/foliatools/dcoi2folia.py
new file mode 100755
index 0000000..35302d7
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/dcoi2folia.py
@@ -0,0 +1,21 @@
+#! /usr/bin/env python
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+import foliatools.xslt as xslt
+
+def main():
+    usage = """dcoi2folia
+  by Maarten van Gompel (proycon)
+  Tilburg University / Radboud University Nijmegen
+  2012 - Licensed under GPLv3
+
+This conversion script converts one or more D-Coi XML documents to FoLiA.
+
+Usage: dcoi2folia [options] file-or-dir1 file-or-dir2 ..etc.."""
+
+    xslt.main('dcoi2folia.xsl','folia.xml', usage)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2annotatedtxt.py b/pynlpl/tests/FoLiA/foliatools/folia2annotatedtxt.py
new file mode 100755
index 0000000..a9ba4ce
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2annotatedtxt.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git", file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("folia2annotatedtxt", file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)", file=sys.stderr)
+    print("  Tilburg University / Radboud University Nijmegen", file=sys.stderr)
+    print("  2012 - Licensed under GPLv3", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("This conversion script reads a FoLiA XML document and produces a", file=sys.stderr)
+    print("simple inline output format in which tokens are space-separated and annotations are separated by a pipe symbol (|)", file=sys.stderr)
+    print("Note that only simple token annotations are supported and a lot", file=sys.stderr)
+    print("of FoLiA data can not be intuitively expressed in a simple columned format!", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Usage: folia2annotatedtxt [options] -C [columns] file-or-dir1 file-or-dir2 ..etc..", file=sys.stderr)
+
+    print("Parameters:", file=sys.stderr)
+    print("  -c [columns]                 Comma separated list of desired column layout (mandatory), choose from:", file=sys.stderr)
+    print("                               id      - output word ID", file=sys.stderr)
+    print("                               text    - output the text of the word (the word itself)", file=sys.stderr)
+    print("                               pos     - output PoS annotation class", file=sys.stderr)
+    print("                               poshead - output PoS annotation head feature", file=sys.stderr)
+    print("                               lemma   - output lemma annotation class", file=sys.stderr)
+    print("                               sense   - output sense annotation class", file=sys.stderr)
+    print("                               phon    - output phonetic annotation class", file=sys.stderr)
+    print("                               senid   - output sentence ID", file=sys.stderr)
+    print("                               parid   - output paragraph ID", file=sys.stderr)
+    print("                               N     - word/token number (absolute)", file=sys.stderr)
+    print("                               n     - word/token number (relative to sentence)", file=sys.stderr)
+    print("Options:", file=sys.stderr)
+    print("  -o [filename]                Output to a single output file instead of stdout", file=sys.stderr)
+    print("  -O                           Output each file to similarly named file (.columns or .csv)", file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)", file=sys.stderr)
+    print("  -S                           Output one sentence per line", file=sys.stderr)
+    print("Parameters for processing directories:", file=sys.stderr)
+    print("  -r                           Process recursively", file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)", file=sys.stderr)
+    print("  -O                           Output each file to similarly named .txt file", file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory", file=sys.stderr)
+    print("  -q                           Ignore errors", file=sys.stderr)
+
+class settings:
+    output_header = True
+    outputfile = None
+    ignoreerrors = False
+    autooutput = False
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+    sentenceperline = False
+    columnconf = []
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OPhHSc:x:E:rq", ["help", "csv"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    outputfile = None
+
+    for o, a in opts:
+        if o == '-c':
+            for a in a.split(','):
+                settings.columnconf.append(a)
+        elif o == '-h':
+            usage()
+            sys.exit(0)
+        elif o == '-H':
+            settings.output_header = False
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-o':
+            outputfile = a
+        elif o == '-O':
+            settings.autooutput = True
+        elif o == '-P':
+            settings.autooutput = True
+            settings.autooutput_cwd = True
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        elif o == '-S':
+            settings.sentenceperline = True
+        else:
+            raise Exception("No such option: " + o)
+
+    if not settings.columnconf:
+        print("ERROR: No column configuration specified (use -c)", file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    if args:
+        if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x,outputfile)
+            elif os.path.isfile(x):
+                process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+        if outputfile: outputfile.close()
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+
+
+def resize(s, i, spacing):
+    if len(s) >= spacing[i]:
+        s = s[0:spacing[i] - 1] + ' '
+    elif len(s) < spacing[i]:
+        s = s + (' ' * (spacing[i] - len(s)))
+    #print '[' + s + ']', len(s), spacing[i]
+    return s
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, outputfile)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, outputfile)
+
+def process(filename, outputfile=None):
+    try:
+        print("Processing " + filename, file=sys.stderr)
+        doc = folia.Document(file=filename)
+        prevsen = None
+        prevpar = None
+
+        if settings.autooutput:
+            ext = '.txt'
+            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
+                outfilename = filename[:-len(settings.extension) - 1] + ext
+            else:
+                outfilename += ext
+            if settings.autooutput_cwd:
+                outfilename = os.path.basename(outfilename)
+
+            print(" Saving as " + outfilename, file=sys.stderr)
+            outputfile = io.open(outfilename,'w',encoding=settings.encoding)
+
+
+        wordnum = 0
+
+
+        for i, w in enumerate(doc.words()):
+            if settings.sentenceperline:
+                if w.sentence() != prevsen and i > 0:
+                    if outputfile:
+                        outputfile.write("\n")
+                    else:
+                        print()
+                wordnum = 0
+            if w.paragraph() != prevpar and i > 0:
+                if outputfile:
+                    outputfile.write("\n")
+                else:
+                    print()
+                wordnum = 0
+            prevpar = w.paragraph()
+            prevsen = w.sentence()
+            wordnum += 1
+            columns = []
+            for c in settings.columnconf:
+                if c == 'id':
+                    columns.append(w.id)
+                elif c == 'text':
+                    columns.append(w.text())
+                elif c == 'n':
+                    columns.append(str(wordnum))
+                elif c == 'N':
+                    columns.append(str(i+1))
+                elif c == 'pos':
+                    try:
+                        columns.append(w.annotation(folia.PosAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'poshead':
+                    try:
+                        columns.append(w.annotation(folia.PosAnnotation).feat('head'))
+                    except:
+                        columns.append('-')
+                elif c == 'lemma':
+                    try:
+                        columns.append(w.annotation(folia.LemmaAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'sense':
+                    try:
+                        columns.append(w.annotation(folia.SenseAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'phon':
+                    try:
+                        columns.append(w.annotation(folia.PhonAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'senid':
+                    columns.append(w.sentence().id)
+                elif c == 'parid':
+                    try:
+                        columns.append(w.paragraph().id)
+                    except:
+                        columns.append('-')
+                elif c:
+                    print("ERROR: Unsupported configuration: " + c, file=sys.stderr)
+                    sys.exit(1)
+
+
+            word = "|".join(columns).strip()
+            if outputfile:
+                if wordnum > 1: outputfile.write(" ")
+                outputfile.write(word)
+            else:
+                if wordnum > 1: print(" ", end="")
+                if sys.version < '3':
+                    print(word.encode(settings.encoding),end="")
+                else:
+                    print(word,end="")
+
+        if settings.autooutput:
+            outputfile.close()
+        elif outputfile:
+            outputfile.flush()
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename, e, file=sys.stderr)
+        else:
+            raise
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2columns.py b/pynlpl/tests/FoLiA/foliatools/folia2columns.py
new file mode 100755
index 0000000..6216095
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2columns.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git", file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("folia2columns", file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)", file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2016 - Licensed under GPLv3", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("This conversion script reads a FoLiA XML document and produces a", file=sys.stderr)
+    print("simple columned output format in which each token appears on one", file=sys.stderr)
+    print("line. Note that only simple token annotations are supported and a lot", file=sys.stderr)
+    print("of FoLiA data can not be intuitively expressed in a simple columned format!", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Usage: folia2columns [options] -C [columns] file-or-dir1 file-or-dir2 ..etc..", file=sys.stderr)
+
+    print("Parameters:", file=sys.stderr)
+    print("  -c [columns]                 Comma separated list of desired column layout (mandatory), choose from:", file=sys.stderr)
+    print("                               id      - output word ID", file=sys.stderr)
+    print("                               text    - output the text of the word (the word itself)", file=sys.stderr)
+    print("                               pos     - output PoS annotation class", file=sys.stderr)
+    print("                               poshead - output PoS annotation head feature", file=sys.stderr)
+    print("                               lemma   - output lemma annotation class", file=sys.stderr)
+    print("                               sense   - output sense annotation class", file=sys.stderr)
+    print("                               phon    - output phonetic annotation class", file=sys.stderr)
+    print("                               senid   - output sentence ID", file=sys.stderr)
+    print("                               parid   - output paragraph ID", file=sys.stderr)
+    print("                               N     - word/token number (absolute)", file=sys.stderr)
+    print("                               n     - word/token number (relative to sentence)", file=sys.stderr)
+    print("Options:", file=sys.stderr)
+    print("  --csv                        Output in CSV format", file=sys.stderr)
+    print("  -o [filename]                Output to a single output file instead of stdout", file=sys.stderr)
+    print("  -O                           Output each file to similarly named file (.columns or .csv)", file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)", file=sys.stderr)
+    print("  -H                           Suppress header output", file=sys.stderr)
+    print("  -S                           Suppress sentence spacing  (no whitespace between sentences)", file=sys.stderr)
+    print("  -x [sizeinchars]             Space columns for human readability (instead of plain tab-separated columns)", file=sys.stderr)
+    print("Parameters for processing directories:", file=sys.stderr)
+    print("  -r                           Process recursively", file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)", file=sys.stderr)
+    print("  -O                           Output each file to similarly named .txt file", file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory", file=sys.stderr)
+    print("  -q                           Ignore errors", file=sys.stderr)
+
+class settings:
+    output_header = True
+    csv = False
+    outputfile = None
+    sentencespacing = True
+    ignoreerrors = False
+    nicespacing = 0
+    autooutput = False
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+    columnconf = []
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OPhHSc:x:E:rq", ["help", "csv"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    outputfile = None
+
+    for o, a in opts:
+        if o == '-c':
+            for a in a.split(','):
+                settings.columnconf.append(a)
+        elif o == '-h':
+            usage()
+            sys.exit(0)
+        elif o == '-H':
+            settings.output_header = False
+        elif o == '-S':
+            settings.sentencespacing = False
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-o':
+            outputfile = a
+        elif o == '-O':
+            settings.autooutput = True
+        elif o == '-P':
+            settings.autooutput = True
+            settings.autooutput_cwd = True
+        elif o == '-x':
+            settings.nicespacing = int(a)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        elif o == '--csv':
+            settings.csv = True
+        else:
+            raise Exception("No such option: " + o)
+
+    if not settings.columnconf:
+        print("ERROR: No column configuration specified (use -c)", file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    if args:
+        if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x,outputfile)
+            elif os.path.isfile(x):
+                process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+        if outputfile: outputfile.close()
+    else:
+        print ("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+
+
+def resize(s, i, spacing):
+    if len(s) >= spacing[i]:
+        s = s[0:spacing[i] - 1] + ' '
+    elif len(s) < spacing[i]:
+        s = s + (' ' * (spacing[i] - len(s)))
+    #print '[' + s + ']', len(s), spacing[i]
+    return s
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d, '*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, outputfile)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, outputfile)
+
+def process(filename, outputfile=None):
+    try:
+        print("Processing " + filename, file=sys.stderr)
+        doc = folia.Document(file=filename)
+        prevsen = None
+
+        if settings.autooutput:
+            if settings.csv:
+                ext = '.csv'
+            else:
+                ext = '.columns'
+            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
+                outfilename = filename[:-len(settings.extension) - 1] + ext
+            else:
+                outfilename += ext
+            if settings.autooutput_cwd:
+                outfilename = os.path.basename(outfilename)
+
+            print(" Saving as " + outfilename, file=sys.stderr)
+            outputfile = io.open(outfilename,'w',encoding=settings.encoding)
+
+
+        if settings.nicespacing:
+            spacing = []
+            for c in settings.columnconf:
+                if c == 'n':
+                    spacing.append(3)
+                elif c == 'N':
+                    spacing.append(7)
+                elif c == 'poshead':
+                    spacing.append(5)
+                else:
+                    spacing.append(settings.nicespacing)
+
+        if settings.output_header:
+
+            if settings.csv:
+                columns = [ '"' + x.upper()  + '"' for x in settings.columnconf ]
+            else:
+                columns = [ x.upper()  for x in settings.columnconf ]
+
+            if settings.nicespacing and not settings.csv:
+                columns = [ resize(x, i, spacing) for i, x in enumerate(settings.columnconf) ]
+
+            if settings.csv:
+                line = ','.join(columns)
+            else:
+                line = '\t'.join(columns)
+
+            if outputfile:
+                outputfile.write(line)
+                outputfile.write('\n')
+            else:
+                if sys.version < '3':
+                    print(line.encode(settings.encoding))
+                else:
+                    print(line)
+
+        wordnum = 0
+
+
+
+        for i, w in enumerate(doc.words()):
+            if w.sentence() != prevsen and i > 0:
+                if settings.sentencespacing:
+                    if outputfile:
+                        outputfile.write('\n')
+                    else:
+                        print()
+                wordnum = 0
+            prevsen = w.sentence()
+            wordnum += 1
+            columns = []
+            for c in settings.columnconf:
+                if c == 'id':
+                    columns.append(w.id)
+                elif c == 'text':
+                    columns.append(w.text())
+                elif c == 'n':
+                    columns.append(str(wordnum))
+                elif c == 'N':
+                    columns.append(str(i+1))
+                elif c == 'pos':
+                    try:
+                        columns.append(w.annotation(folia.PosAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'poshead':
+                    try:
+                        columns.append(w.annotation(folia.PosAnnotation).feat('head'))
+                    except:
+                        columns.append('-')
+                elif c == 'lemma':
+                    try:
+                        columns.append(w.annotation(folia.LemmaAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'sense':
+                    try:
+                        columns.append(w.annotation(folia.SenseAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'phon':
+                    try:
+                        columns.append(w.annotation(folia.PhonAnnotation).cls)
+                    except:
+                        columns.append('-')
+                elif c == 'senid':
+                    columns.append(w.sentence().id)
+                elif c == 'parid':
+                    try:
+                        columns.append(w.paragraph().id)
+                    except:
+                        columns.append('-')
+                elif c:
+                    print("ERROR: Unsupported configuration: " + c, file=sys.stderr)
+                    sys.exit(1)
+
+            if settings.nicespacing and not settings.csv:
+                columns = [ resize(x,j, spacing) for j,x  in enumerate(columns) ]
+
+            if settings.csv:
+                line = ",".join([ '"' + x  + '"' for x in columns ])
+            else:
+                line = "\t".join(columns)
+
+            if outputfile:
+                outputfile.write(line)
+                outputfile.write('\n')
+            else:
+                if sys.version < '3':
+                    print(line.encode(settings.encoding))
+                else:
+                    print(line)
+
+        if settings.autooutput:
+            outputfile.close()
+        elif outputfile:
+            outputfile.flush()
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename, e, file=sys.stderr)
+        else:
+            raise
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2dcoi.py b/pynlpl/tests/FoLiA/foliatools/folia2dcoi.py
new file mode 100755
index 0000000..8286951
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2dcoi.py
@@ -0,0 +1,21 @@
+#! /usr/bin/env python
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+import foliatools.xslt as xslt
+
+def main():
+    usage = """folia2dcoi
+  by Maarten van Gompel (proycon)
+  Tilburg University / Radboud University Nijmegen
+  2012 - Licensed under GPLv3
+
+This conversion script converts one or more FoLiA documents to D-Coi XML format, omitting
+any annotations that can not be represented in the D-Coi format.
+
+Usage: folia2dcoi [options] file-or-dir1 file-or-dir2 ..etc.."""
+
+    xslt.main('folia2dcoi.xsl','dcoi.xml', usage)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2html.py b/pynlpl/tests/FoLiA/foliatools/folia2html.py
new file mode 100755
index 0000000..5a36e57
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2html.py
@@ -0,0 +1,23 @@
+#! /usr/bin/env python
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import foliatools.xslt as xslt
+
+def main():
+    usage = """folia2html
+  by Maarten van Gompel (proycon)
+  Centre for Language and Speech Technology, Radboud University Nijmegen
+  2016 - Licensed under GPLv3
+
+
+This conversion script converts one or more FoLiA documents to a semi-interactive HTML document for
+viewing in a web-browser.
+
+Usage: folia2html [options] file-or-dir1 file-or-dir2 ..etc.."""
+
+    xslt.main('folia2html.xsl','html',usage)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2rst.py b/pynlpl/tests/FoLiA/foliatools/folia2rst.py
new file mode 100755
index 0000000..b4950a0
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2rst.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+#---------------------------------------------------------------
+# FoLiA to ReStructuredText Converter
+#   by Maarten van Gompel
+#   Centre for Language Studies
+#   Radboud University Nijmegen
+#   proycon AT anaproy DOT nl
+#
+#   Licensed under GPLv3
+#
+# This script converts RST to FoLiA format.
+#
+#----------------------------------------------------------------
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import glob
+import gzip
+import os
+import io
+
+from pynlpl.formats import folia
+from pynlpl.common import u, isstring
+
+
+ADORNMENT = ['=','-','+','~','`',"'",'#']
+
+
+
+def element2rst(element, retaintokenisation=False, _previousdelimiter=""):
+    """Get the text associated with this element (of the specified class), will always be a unicode instance.
+    If no text is directly associated with the element, it will be obtained from the children. If that doesn't result
+    in any text either, a NoSuchText exception will be raised.
+
+    If retaintokenisation is True, the space attribute on words will be ignored, otherwise it will be adhered to and text will be detokenised as much as possible.
+    """
+
+
+    prefix = suffix = ""
+    indent = ""
+
+
+    if element.TEXTCONTAINER:
+        if isinstance(element, folia.TextMarkupStyle):
+            #we guess how possible class names may be mapped to RST directly, set-agnostic
+            if element.href:
+                prefix = "`"
+                suffix = " <" + element.href + ">`_"
+            elif element.cls and (element.cls == 'strong' or element.cls[:4] == 'bold' or element.cls == 'b'):
+                prefix = suffix ="**"
+            elif element.cls and (element.cls[:2] == 'em' or element.cls[:6] == 'italic' or element.cls == 'i' or element.cls[:5] == 'slant'):
+                prefix = suffix ="*"
+            elif element.cls and (element.cls[:3] == 'lit' or element.cls[:4] == 'verb' or element.cls[:4] == 'code'):
+                prefix = suffix ="``"
+        s = prefix
+        for e in element:
+            if isstring(e):
+                s += e
+            else:
+                if s: s += e.TEXTDELIMITER #for AbstractMarkup, will usually be ""
+                s += element2rst(e)
+        return s + suffix
+    if not element.PRINTABLE: #only printable elements can hold text
+        raise folia.NoSuchText
+
+
+    if isinstance(element, folia.ListItem):
+        if element.n:
+            prefix = element.n + ") "
+        else:
+            prefix = "- "
+    elif isinstance(element, folia.Head):
+        level = 0
+        for div in element.ancestors(folia.Division):
+            if div.count(folia.Head,None,[folia.Division]):
+                level += 1
+        suffix = "\n" + ADORNMENT[level-1] * (len(element.text()) + 10) + "\n\n"
+    elif isinstance(element, folia.Figure) and element.src:
+        prefix = ".. figure::" + element.src + "\n\n"
+    elif isinstance(element, folia.Note):
+        #TODO
+        pass
+    elif isinstance(element, folia.Caption):
+        indent =  "    "
+    elif isinstance(element, folia.Quote) and not isinstance(element.parent, folia.Sentence) and not isinstance(element.parent, folia.Paragraph):
+        indent = "    " #block quote
+    elif isinstance(element, folia.Gap) and not isinstance(element.parent, folia.Sentence) and not isinstance(element.parent, folia.Paragraph):
+        prefix = "\n\n::\n\n" + element.content() + "\n\n" #literal block
+    elif isinstance(element, folia.List):
+        suffix = "\n\n"
+
+
+
+    if element.hastext():
+        if indent:
+            for i, ss in enumerate(element2rst(element.textcontent()).split("\n")):
+                if i == 0:
+                    s = indent + prefix + ss + "\n"
+                else:
+                    s = indent + ss + "\n"
+        else:
+            s = prefix + element2rst(element.textcontent())
+    else:
+        #Not found, descend into children
+        delimiter = ""
+        s = ""
+        for e in element:
+            if e.PRINTABLE and not isinstance(e, folia.TextContent):
+                try:
+                    if indent:
+                        for ss in element2rst(e,retaintokenisation, delimiter).split("\n"):
+                            if not s:
+                                s += indent + prefix + ss + "\n"
+                            else:
+                                s += indent + ss + "\n"
+                    else:
+                        if not s: s += prefix
+                        s += element2rst(e,retaintokenisation, delimiter)
+                    delimiter = e.gettextdelimiter(retaintokenisation)
+                    #delimiter will be buffered and only printed upon next iteration, this prevents the delimiter being output at the end of a sequence
+                except folia.NoSuchText:
+                    continue
+
+
+    if s and _previousdelimiter:
+        return _previousdelimiter + s + suffix
+    elif s:
+        return s + suffix
+    else:
+        #No text found at all :`(
+        raise folia.NoSuchText
+
+
+def main():
+    inputfile = None
+    outputfile = None
+    if len(sys.argv) == 2:
+        inputfile = sys.argv[1]
+    if len(sys.argv) == 3:
+        outputfile = sys.argv[2]
+
+    if inputfile:
+        doc = folia.Document(file=inputfile)
+    else:
+        #stdin
+        data = sys.stdin.read()
+        doc = folia.Document(string=data)
+
+    if outputfile:
+        f = io.open(outputfile,'w',encoding='utf-8')
+    else:
+        f = sys.stdout
+    for data in doc:
+        print(element2rst(data), file=f)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/folia2txt.py b/pynlpl/tests/FoLiA/foliatools/folia2txt.py
new file mode 100755
index 0000000..5e1774c
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/folia2txt.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("folia2txt",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2012-2016 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("This conversion script reads a FoLiA XML document and outputs the",file=sys.stderr)
+    print("document's text as plain text, *without* any annotations.",file=sys.stderr)
+    print("Use folia2annotatedtxt if you want limited support for inline annotations.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: folia2txt [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:",file=sys.stderr)
+    print("  -t                           Retain tokenisation, do not detokenise",file=sys.stderr)
+    print("                               (By default output will be detokenised if",file=sys.stderr)
+    print("                               such information is explicitly available in the",file=sys.stderr)
+    print("                               FoLiA document)",file=sys.stderr)
+    print("  -c                           Text class to output (defaults to: current)",file=sys.stderr)
+    print("  -w                           One word per line",file=sys.stderr)
+    print("  -s                           One sentence per line",file=sys.stderr)
+    print("  -p                           One paragraph per line",file=sys.stderr)
+    print("  -o [filename]                Output to a single file (instead of default stdout)",file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)",file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -O                           Output each file to similarly named .txt file",file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+def out(s, outputfile):
+    if sys.version < '3':
+        if outputfile:
+            outputfile.write(s + "\n")
+        else:
+            print(s.encode(settings.encoding))
+    else:
+        if outputfile:
+            print(s,file=outputfile)
+        else:
+            print(s)
+
+
+def process(filename, outputfile = None):
+    print("Converting " + filename,file=sys.stderr)
+    try:
+        doc = folia.Document(file=filename)
+
+        if settings.autooutput:
+            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
+                outfilename = filename[:-len(settings.extension) - 1] + '.txt'
+            else:
+                outfilename += '.txt'
+            if settings.autooutput_cwd:
+                outfilename = os.path.basename(outfilename)
+
+            print(" Saving as " + outfilename,file=sys.stderr)
+            outputfile = io.open(outfilename,'w',encoding=settings.encoding)
+
+        if settings.wordperline:
+            for word in doc.words():
+                out(word.text(settings.textclass, settings.retaintokenisation), outputfile)
+        elif settings.sentenceperline:
+            for sentence in doc.sentences():
+                out(sentence.text(settings.textclass, settings.retaintokenisation) , outputfile)
+        elif settings.paragraphperline:
+            for paragraph in doc.paragraphs():
+                out(paragraph.text(settings.textclass, settings.retaintokenisation) , outputfile)
+        else:
+            out(doc.text(settings.textclass, settings.retaintokenisation) , outputfile)
+
+        if settings.autooutput:
+            outputfile.close()
+        elif outputfile:
+            outputfile.flush()
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+
+
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d, '*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, outputfile)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, outputfile)
+
+
+class settings:
+    wordperline = False
+    sentenceperline = False
+    paragraphperline = False
+    retaintokenisation = False
+    autooutput = False
+    autooutput_cwd = False
+    extension = 'xml'
+    recurse = False
+    ignoreerrors = False
+    encoding = 'utf-8'
+    textclass = "current"
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OPE:htspwrqc:", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-t':
+            settings.retaintokenisation = True
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-o':
+            outputfile = a
+        elif o == '-O':
+            settings.autooutput = True
+        elif o == '-P':
+            settings.autooutput = True
+            settings.autooutput_cwd = True
+        elif o == '-c':
+            settings.textclass = a
+        elif o == '-s':
+            settings.sentenceperline = True
+        elif o == '-p':
+            settings.paragraphperline = True
+        elif o == '-w':
+            settings.wordperline = True
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x,outputfile)
+            elif os.path.isfile(x):
+                process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+    if outputfile: outputfile.close()
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliacat.py b/pynlpl/tests/FoLiA/foliatools/foliacat.py
new file mode 100755
index 0000000..ca3c003
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliacat.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# -*- coding: utf8 -*-
+
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliacat",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Radboud University Nijmegen",file=sys.stderr)
+    print("  2014 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Concatenates multiple FoLiA documents into one; provided that all IDs are unique.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliacat [options] file1 file2 file3 ... ",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Options:",file=sys.stderr)
+    print("  -o [file]                    Output file",file=sys.stderr)
+    print("  -i [id]                      ID for output file (mandatory)",file=sys.stderr)
+
+
+def concat(target, source):
+    merges = 0
+    for e in source:
+        c = e.copy(target.doc)
+        target.append(c)
+        merges += 1
+    return merges
+
+
+
+def foliacat(id, outputfile, *files):
+    totalmerges = 0
+    outputdoc = folia.Document(id=id)
+    text = outputdoc.append(folia.Text(outputdoc,id=id + ".text"))
+    for i, filename in enumerate(files):
+        merges = 0
+        print("Processing " + filename, file=sys.stderr)
+        inputdoc = folia.Document(file=filename)
+        print("(merging document)",file=sys.stderr)
+
+        for annotationtype,set in inputdoc.annotations:
+            if not outputdoc.declared(annotationtype,set):
+                outputdoc.declare( annotationtype, set)
+
+        for d in inputdoc.data:
+            merges += concat(text, d)
+
+        print("(merged " + str(merges) + " elements, with all elements contained therein)",file=sys.stderr)
+        totalmerges += merges
+
+    print("(TOTAL: merged " + str(totalmerges) + " elements, with all elements contained therein)",file=sys.stderr)
+    if outputfile and merges > 0:
+        outputdoc.save(outputfile)
+
+    return outputdoc
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:i:h", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err),file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    outputfile = None
+    substitute = False
+
+    id = None
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-o':
+            outputfile = a
+        elif o == '-i':
+            id = a
+        else:
+            raise Exception("No such option: " + o)
+
+    if len(args) < 2:
+        print("WARNING: only one file specified", file=sys.stderr)
+    if not id:
+        print("ERROR: Please specify an ID for the result document with the -i option",file=sys.stderr)
+        sys.exit(2)
+
+
+    if substitute:
+        outputfile = args[0]
+
+    outputdoc = foliacat(id, outputfile, *args)
+    if not outputfile:
+        xml = outputdoc.xmlstring()
+        if sys.version < '3':
+            if isinstance(xml,unicode):
+                print(xml.encode('utf-8'))
+            else:
+                print(xml)
+        else:
+            print(xml)
+
+
+if __name__ == "__main__":
+    main()
+
+
+
diff --git a/pynlpl/tests/FoLiA/foliatools/foliacorrect.py b/pynlpl/tests/FoLiA/foliatools/foliacorrect.py
new file mode 100644
index 0000000..d81e2b0
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliacorrect.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+import os
+import glob
+import traceback
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git", file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliacorrect", file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)", file=sys.stderr)
+    print("  Radboud University Nijmegen", file=sys.stderr)
+    print("  2015 - Licensed under GPLv3", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("FoLiA " + folia.FOLIAVERSION + ", library version " + folia.LIBVERSION, file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Corrections are one of the most complex forms of annotation in FoLiA. It may occur that you want to strip explicit corrections from a document, and leave only either the corrected or original version. The document can then be parsed by simpler parsers that can not handle explicit corrections. Alternatively, you may want to accept the best suggestion for correction (with or without stripping the correction element). This tool provides the means to do all that.", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Usage: foliacorrect [options] file-or-dir1 file-or-dir2 ..etc..", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Parameters for processing directories:", file=sys.stderr)
+    print("  -r                           Process recursively", file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)", file=sys.stderr)
+    print("  -V                           Show version info", file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+    print("  --corrected                  Keep the corrected versions, removing all explicit corrections", file=sys.stderr)
+    print("  --original                   Keep the original versions,  removing all explicit corrections", file=sys.stderr)
+    print("  --acceptsuggestion           Automatically accept the suggestion with the hightest confidence (can not be used with --original, can be used with --corrected)", file=sys.stderr)
+    print("  --set                        Correction set to filter on", file=sys.stderr)
+    print("  --class                      Correction class to filter on", file=sys.stderr)
+    print("  --print                      Print all corrections, do not change the document", file=sys.stderr)
+
+
+
+def replace(correction, correctionchild):
+    parent = correction.parent
+    index = parent.getindex(correction)
+    elements = correctionchild.copychildren(correction.doc)
+    parent.remove(correction)
+    for i, e in enumerate(elements):
+        if isinstance(e, folia.TextContent) and e.cls == 'original':
+            e.cls = 'current'
+        parent.insert(index+i, e)
+
+
+
+def correct(filename,corrected, original, acceptsuggestion, setfilter,classfilter, output):
+    changed = False
+    try:
+        doc = folia.Document(file=filename)
+        for text in doc:
+            for correction in list(text.select(folia.Correction, setfilter)):
+                if not classfilter or correction.cls == classfilter:
+                    if original:
+                        if correction.hasoriginal():
+                            #restore original
+                            print("Restoring original version for " + str(correction.id),file=sys.stderr)
+                            replace(correction, correction.original())
+                            changed = True
+                        elif correction.hasoriginal(True): #check for empty original
+                            #insertion, remove it
+                            correction.parent.remove(correction)
+                    elif corrected:
+                        if correction.hasnew():
+                            print("Keeping corrected version for " + str(correction.id),file=sys.stderr)
+                            replace(correction, correction.new())
+                            changed = True
+                    elif correction.hassuggestions() and acceptsuggestion:
+                        bestsuggestion = None
+                        changed = True
+                        for suggestion in correction.hassuggestions():
+                            if not bestsuggestion or (suggestion.confidence and not bestsuggestion.confidence) or (suggestion.confidence and bestsuggestion.confidence and suggestion.confidence > bestsuggestion.confidence):
+                                bestsuggestion = suggestion
+                        if bestsuggestion:
+                            if corrected:
+                                replace(correction, bestsuggestion)
+                            else:
+                                raise NotImplementedError #TODO
+                    if output:
+                        print(correction.xmlstring())
+        if changed:
+            if settings.stdout:
+                print(doc.xmlstring())
+            else:
+                doc.save()
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+
+
+
+def processdir(d,corrected, original, acceptsuggestion, setfilter,classfilter,output):
+    print("Searching in  " + d,file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            correct(f,corrected,original,  acceptsuggestion, setfilter,classfilter,output)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, corrected,original, acceptsuggestion, setfilter,classfilter,output)
+
+
+class settings:
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+    ignoreerrors = False
+    stdout = False
+
+def main():
+    original = acceptsuggestion = output = corrected = False
+    setfilter = classfilter = None
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "E:srhqVO", ["help","original","corrected","acceptsuggestion","set=","class=","print"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        elif o == '-O':
+            settings.stdout = True
+        elif o == '-V':
+            print("FoLiA " + folia.FOLIAVERSION + ", library version " + folia.LIBVERSION,file=sys.stderr)
+            sys.exit(0)
+        elif o == '--original':
+            original = True
+        elif o == '--corrected':
+            corrected = True
+        elif o == '--acceptsuggestion':
+            acceptsuggestion = True
+        elif o == '--set' or o == '--set=':
+            setfilter = a
+        elif o == '--class' or o == '--class=':
+            classfilter = a
+        elif o == '--print':
+            output = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if len(args) >= 1:
+        for x in sys.argv[1:]:
+            if x[0] != '-':
+                if os.path.isdir(x):
+                    processdir(x,corrected,original, acceptsuggestion, setfilter,classfilter,output)
+                elif os.path.isfile(x):
+                    correct(x,corrected, original, acceptsuggestion, setfilter,classfilter,output)
+                else:
+                    print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                    sys.exit(3)
+    else:
+        print("ERROR: No files specified",file=sys.stderr)
+        sys.exit(2)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliacount.py b/pynlpl/tests/FoLiA/foliatools/foliacount.py
new file mode 100755
index 0000000..c3599c0
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliacount.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+from collections import Counter
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliacount",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2016 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("This script reads a FoLiA XML document and counts certain structure elements.", file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliacount [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -C [type>count,type<count]   Count only documents that match the constraints", file=sys.stderr)
+    print("  -t [types]                   Output only these elements (comma separated list)", file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+def out(s, outputfile):
+    if sys.version < '3':
+        if outputfile:
+            outputfile.write(s + "\n")
+        else:
+            print(s.encode(settings.encoding))
+    else:
+        if outputfile:
+            print(s,file=outputfile)
+        else:
+            print(s)
+
+
+def process(filename, outputfile = None):
+    print("Processing " + filename,file=sys.stderr)
+    count = Counter()
+    try:
+        doc = folia.Document(file=filename)
+        count['documents'] += 1
+
+        for e in doc.select(folia.AbstractElement):
+            if e.XMLTAG and (not settings.types or e.XMLTAG in settings.types):
+                count[e.XMLTAG] += 1
+
+        for constraintag, constrainf in settings.constraints:
+            if not constrainf(count[constraintag]): 
+                print("Skipping due to unmet constraints (" + constraintag+"): " + filename,file=sys.stderr)
+                return Counter({'skipped_documents':1})
+
+        print("Counted " + filename,file=sys.stderr)
+
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+    return count
+
+
+
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    count = Counter()
+    for f in glob.glob(os.path.join(d, '*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            count.update(process(f, outputfile))
+        elif settings.recurse and os.path.isdir(f):
+            count.update(processdir(f, outputfile))
+    return count
+
+
+class settings:
+    extension = 'xml'
+    recurse = False
+    ignoreerrors = False
+    types = None
+    constraints = []
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OPE:ht:spwrqC:", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-t':
+            settings.types = a.split(',')
+        elif o == '-C':
+            for rawconstraint in a.split(','):
+                if '>' in rawconstraint:
+                    tag, value = rawconstraint.split('>')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x > value) )
+                elif '<' in rawconstraint:
+                    tag, value = rawconstraint.split('<')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x < value) )
+                elif rawconstraint.find('>=') != -1:
+                    tag, value = rawconstraint.split('>=')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x >= value) )
+                elif rawconstraint.find('<=') != -1:
+                    tag, value = rawconstraint.split('<=')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x <= value) )
+                elif rawconstraint.find('==') != -1:
+                    tag, value = rawconstraint.split('==')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x == value) )
+                elif rawconstraint.find('!=') != -1:
+                    tag, value = rawconstraint.split('!=')
+                    value = int(value)
+                    settings.constraints.append( (tag, lambda x: x != value) )
+                else:
+                    tag = a
+                    settings.constraints.append( (tag, lambda x: x > 0) )
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                count = processdir(x,outputfile)
+            elif os.path.isfile(x):
+                count = process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+
+        for xmltag, freq in sorted(count.items(), key=lambda x: x[1]*-1):
+            print(xmltag+"\t" + str(freq))
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliafreqlist.py b/pynlpl/tests/FoLiA/foliatools/foliafreqlist.py
new file mode 100755
index 0000000..20b7545
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliafreqlist.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+    from pynlpl.statistics import FrequencyList
+    from pynlpl.textprocessors import Windower
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliafreqlist",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Radboud University Nijmegen",file=sys.stderr)
+    print("  2012 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Compute a frequency list on one or more *tokenised* FoLiA documents.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliafreqlist [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:",file=sys.stderr)
+    print("  -i                           Case insensitive",file=sys.stderr)
+    print("  -n [n]                       Count n-grams rather than unigrams",file=sys.stderr)
+    print("  -s                           Add begin/end of sentence markers (with -n > 1)",file=sys.stderr)
+    print("  -o [filename]                Output to a single file (instead of default stdout)",file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)",file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -O                           Output each file to similarly named .freqlist file",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+
+
+
+
+def process(filename):
+    try:
+        print("Processing " + filename,file=sys.stderr)
+        doc = folia.Document(file=filename)
+
+        freqlist = FrequencyList()
+
+        if settings.n == 1:
+            for word in doc.words():
+                text = word.toktext()
+                if settings.casesensitive: text = text.lower()
+                freqlist.count(text)
+        elif settings.sentencemarkers:
+            for sentence in doc.sentences():
+                for ngram in Windower(sentence.words(), settings.n):
+                    text = ' '.join([x for x in ngram.toktext() ])
+                    if settings.casesensitive: text = text.lower()
+                    freqlist.count(text)
+        else:
+            for word in Windower(sentence.words(), settings.n, None, None):
+                text = ' '.join([x for x in ngram.toktext() ])
+                if settings.casesensitive: text = text.lower()
+                freqlist.count(text)
+
+        if settings.autooutput:
+            if filename[-len(settings.extension) - 1:].lower() == '.' +settings.extension:
+                outfilename = filename[:-len(settings.extension) - 1] + '.freqlist'
+            else:
+                outfilename += '.freqlist'
+            freqlist.save(outfilename,True)
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename, e,file=sys.stderr)
+        else:
+            raise
+
+    return freqlist
+
+
+
+def processdir(d, freqlist = None):
+    if not freqlist: freqlist = FrequencyList()
+    print("Searching in  " + d,file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            freqlist += process(f)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, freqlist)
+    return freqlist
+
+
+class settings:
+    casesensitive = True
+    autooutput = False
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+    sentencemarkers = False
+    ignoreerrors = False
+    n = 1
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OE:htspwrq", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err),file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-o':
+            outputfile = a
+        elif o == '-O':
+            settings.autooutput = True
+        elif o == '-s':
+            settings.sentencemarkers = True
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if len(sys.argv) >= 2:
+        freqlist = FrequencyList()
+        for x in sys.argv[1:]:
+            if os.path.isdir(x):
+                processdir(x,freqlist)
+            elif os.path.isfile(x):
+                freqlist += process(x)
+            else:
+                print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                sys.exit(3)
+        if outputfile:
+            freqlist.save(outputfile, True)
+        else:
+            for line in freqlist.output("\t", True):
+                print(line)
+    else:
+        print("ERROR: No files specified",file=sys.stderr)
+        sys.exit(2)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliaid.py b/pynlpl/tests/FoLiA/foliatools/foliaid.py
new file mode 100755
index 0000000..1b260c7
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliaid.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+from collections import Counter
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliaid",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2017 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Assign IDs to structural elements that have none yet", file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliacount [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -t [types]                   Output only these elements (comma separated list)", file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+def out(s, outputfile):
+    if sys.version < '3':
+        if outputfile:
+            outputfile.write(s + "\n")
+        else:
+            print(s.encode(settings.encoding))
+    else:
+        if outputfile:
+            print(s,file=outputfile)
+        else:
+            print(s)
+
+
+def process(filename, outputfile = None):
+    print("Processing " + filename,file=sys.stderr)
+    try:
+        doc = folia.Document(file=filename)
+        for e in doc.data:
+            if e.id is None:
+                e.id = doc.id + '.' + e.XMLTAG + '.1'
+                doc[e.id] = e
+                print(" /-> ", e.id,file=sys.stderr)
+
+        for e in doc.select(folia.AbstractStructureElement):
+            if e.id is None:
+                if not settings.types or e.XMLTAG in settings.types:
+                    parent = e.parent
+                    while not parent.id:
+                        parent = parent.parent
+                    try:
+                        e.id = parent.generate_id(e.__class__)
+                    except folia.GenerateIDException:
+                        print(repr(e), repr(parent), parent.id, file=sys.stderr)
+                        raise
+                    print(" --> ", e.id,file=sys.stderr)
+
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+    doc.save()
+
+
+
+
+
+def processdir(d, outputfile = None):
+    print("Processing directory  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d, '*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, outputfile)
+        elif settings.recurse and os.path.isdir(f):
+            rocessdir(f, outputfile)
+
+
+class settings:
+    extension = 'xml'
+    recurse = False
+    ignoreerrors = False
+    types = None
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OPE:ht:spwrq", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-t':
+            settings.types = a.split(',')
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x,outputfile)
+            elif os.path.isfile(x):
+                process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliamerge.py b/pynlpl/tests/FoLiA/foliatools/foliamerge.py
new file mode 100755
index 0000000..20da474
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliamerge.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# -*- coding: utf8 -*-
+
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliamerge",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Radboud University Nijmegen",file=sys.stderr)
+    print("  2017 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Merges annotations from two or more FoLiA documents. Structural elements are never added. Annotations can only be merged if their parent elements have IDs.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliamerge [options] file1 file2 file3 ... ",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Options:",file=sys.stderr)
+    print("  -o [file]                    Output file",file=sys.stderr)
+    print("  -s                           Substitute: use first input file as output as well",file=sys.stderr)
+    print("  -a                           Annotations from file2 onwards are included as alternatives",file=sys.stderr)
+
+def attach(parent, child):
+    """Rather than copy children, we just abduct them from the other-doc parent"""
+    child.doc = parent.doc
+    child.parent = parent
+    child.setparents()
+
+def reID(doc, element, oldprefix, newprefix):
+    if element.id:
+        element.id = element.id.replace(oldprefix + '.', newprefix + '.')
+    for child in element.data:
+        if isinstance(child, folia.AbstractElement) and not isinstance(child, folia.AbstractStructureElement):
+            reID(doc, child, oldprefix, newprefix)
+
+def mergechildren(parent, outputdoc, asalternative):
+    if hasattr(parent,'merged'): return 0
+    merges = 0
+    for e in parent:
+        if isinstance(e, (folia.AbstractTokenAnnotation, folia.AbstractAnnotationLayer)) and parent.id and not hasattr(e, 'merged'):
+            try:
+                e.ANNOTATIONTYPE
+            except:
+                continue
+
+            if (e.ANNOTATIONTYPE, e.set) in outputdoc.annotations:
+                assert e.parent == parent
+                try:
+                    newparent = outputdoc[parent.id]
+                except:
+                    continue #parent does not exist, nothing to merge here
+                if isinstance(newparent, (folia.Alternative, folia.AlternativeLayers)):
+                    #we do not merge alternatives, each alternative has its own scope
+                    continue
+                #check if the annotation already exists
+                #print("DEBUG: Adding annotation type " + e.__class__.__name__ + ", set " + e.set + ", under " + newparent.id, file=sys.stderr)
+                if asalternative:
+                    if isinstance(e, folia.AbstractTokenAnnotation):
+                        print("Adding Annotation type " + e.__class__.__name__ + ", set " + str(e.set) + " to " + newparent.id + " as alternative", file=sys.stderr)
+                        alt = newparent.append(folia.Alternative, generate_id_in=newparent)
+                        reID(newparent.doc, e, newparent.id, alt.id)
+                        alt.append(e)
+                        e.setdoc(outputdoc)
+                        assert e.parent is alt
+                        alt.merged = True
+                        e.merged = True
+                        merges += 1
+                    elif isinstance(e, folia.AbstractAnnotationLayer):
+                        print("Adding Annotation type " + e.__class__.__name__ + ", set " + str(e.set) + " to " + newparent.id + " as alternative", file=sys.stderr)
+                        alt = newparent.append(folia.AlternativeLayers, generate_id_in=newparent)
+                        reID(newparent.doc, e, newparent.id, alt.id)
+                        alt.append(e)
+                        assert e.parent is alt
+                        e.setdoc(outputdoc)
+                        alt.merged = True
+                        e.merged = True
+                        merges += 1
+                elif isinstance(e, folia.AbstractTokenAnnotation) and newparent.hasannotation(e.__class__, e.set):
+                    print("Annotation type " + e.__class__.__name__ + ", set " + e.set + ", under " + newparent.id + " , already exists... skipping", file=sys.stderr)
+                    pass
+                elif isinstance(e, folia.AbstractAnnotationLayer) and newparent.hasannotationlayer(e.__class__, e.set):
+                    print("Annotation type " + e.__class__.__name__ + ", set " + e.set + ", under " + newparent.id + " , already exists... skipping", file=sys.stderr)
+                    pass
+                else:
+                    print("Adding Annotation type " + e.__class__.__name__ + ", set " + str(e.set) + " to " + newparent.id, file=sys.stderr)
+                    newparent.append(e)
+                    assert e.parent is newparent
+                    e.setdoc(outputdoc)
+                    e.merged = True
+                    merges += 1
+        elif isinstance(e, folia.AbstractElement):
+            merges += mergechildren(e, outputdoc, asalternative)
+    return merges
+
+
+
+def foliamerge(outputfile, *files, **kwargs):
+    asalternative = 'asalternative' in kwargs and kwargs['asalternative']
+    outputdoc = None
+    merges = 0
+
+    for i, filename in enumerate(files):
+        print("Processing " + filename, file=sys.stderr)
+        inputdoc = folia.Document(file=filename)
+        if i == 0:
+             print("(pivot document)",file=sys.stderr)
+             outputdoc = inputdoc
+        else:
+            print("(merging document)",file=sys.stderr)
+
+            for annotationtype,set in inputdoc.annotations:
+                if not outputdoc.declared(annotationtype,set):
+                    outputdoc.declare( annotationtype, set)
+
+            for e in inputdoc:
+                merges += mergechildren(e, outputdoc, asalternative)
+
+    if outputfile and merges > 0:
+        outputdoc.save(outputfile)
+
+    return outputdoc
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:sha", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err),file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    outputfile = None
+    substitute = False
+    asalternative = False
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-o':
+            outputfile = a
+        elif o == '-s':
+            substitute = True
+        elif o == '-a':
+            asalternative = True
+        else:
+            raise Exception("No such option: " + o)
+
+    if len(args) < 2:
+        print("ERROR: At least two files need to be specified",file=sys.stderr)
+        sys.exit(2)
+
+    if substitute:
+        outputfile = args[0]
+
+    outputdoc = foliamerge(outputfile, *args, asalternative=asalternative)
+    if not outputfile:
+        xml = outputdoc.xmlstring()
+        if sys.version < '3':
+            if isinstance(xml, unicode):
+                print(xml.encode('utf-8'))
+            else:
+                print(xml)
+        else:
+            print(xml)
+
+
+if __name__ == "__main__":
+    main()
+
+
+
diff --git a/pynlpl/tests/FoLiA/foliatools/foliaquery.py b/pynlpl/tests/FoLiA/foliatools/foliaquery.py
new file mode 100755
index 0000000..423a686
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliaquery.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia, fql
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliaquery",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Radboud University Nijmegen",file=sys.stderr)
+    print("  2015 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Query one or more FoLiA documents for certain patterns.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliaquery [options] -q <FQL query> file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:"        ,file=sys.stderr)
+    print("  -q 'fql query'               Query (May be specified multiple times)",file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)"     ,file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -i                           Ignore errors",file=sys.stderr)
+    print("",file=sys.stderr)
+
+
+
+def process(filename, queries):
+    try:
+        print("Processing " + filename, file=sys.stderr)
+        doc = folia.Document(file=filename)
+        dosave = False
+        for query in queries:
+            if query.format == "python":
+                query.format = "xml"
+            output = query(doc)
+            print(output)
+            if query.action and query.action.action in ('EDIT','DELETE','SUBSTITUTE','PREPEND','APPEND'):
+                dosave = True
+        #save document if changes are made
+        if dosave:
+            print("Saving " + filename, file=sys.stderr)
+            doc.save()
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e            ,file=sys.stderr)
+        else:
+            raise
+
+
+def processdir(d, queries):
+    print("Searching in  " + d,file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, queries)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, queries)
+
+
+class settings:
+    leftcontext = 0
+    rightcontext = 0
+
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+
+    ignoreerrors = False
+    casesensitive = True
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OE:hq:nr", ["help","text=","pos=","lemma=","sense=","phon="])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    queries = []
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-n':
+            settings.ignoreerrors = True
+        elif o == '-q':
+            try:
+                queries.append(fql.Query(a))
+            except Exception as e:
+                print("FQL SYNTAX ERROR: " + str(e), file=sys.stderr)
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if queries and args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x, queries)
+            elif os.path.isfile(x):
+                process(x, queries)
+            elif x[0:2] != '--':
+                print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                sys.exit(3)
+    elif not queries:
+        docs = []
+        if len(args) > 50:
+            print("ERROR: Too many files specified for interactive mode, specify a query on the command line instead",file=sys.stderr)
+        for x in args:
+            if os.path.isdir(x):
+                print("ERROR: Directories are not allowed in interactive mode, specify a query on the command line",file=sys.stderr)
+        for x in args:
+            print("Loading " + x + "...",file=sys.stderr)
+            docs.append( folia.Document(file=x) )
+
+        import readline
+        print("Starting interactive mode, enter your FQL queries, QUIT to save changes and exit.",file=sys.stderr)
+        savedocs = []
+        while True:
+            query = input("FQL> ")
+            if query == "QUIT" or query == "EXIT":
+                break
+            if query.startswith == "LOAD ":
+                print("Loading " + x + "...",file=sys.stderr)
+                docs.append( folia.Document(file=query[5:]))
+                continue
+
+            try:
+                query = fql.Query(query)
+            except fql.SyntaxError as e:
+                print("FQL SYNTAX ERROR: " + str(e), file=sys.stderr)
+                continue
+
+            if query.format == "python":
+                query.format = "xml"
+
+            for doc in docs:
+                output = query(doc)
+                print(output)
+                if query.action and query.action.action in ('EDIT','DELETE','SUBSTITUTE','PREPEND','APPEND'):
+                    if not doc in savedocs:
+                        savedocs.append(doc)
+
+
+        print("Saving changes to documents, please wait...",file=sys.stderr)
+        #save documents if changes are made
+        for doc in savedocs:
+            print("Saving " + doc.filename)
+            doc.save()
+        print("done.",file=sys.stderr)
+
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories",file=sys.stderr)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliaquery1.py b/pynlpl/tests/FoLiA/foliatools/foliaquery1.py
new file mode 100755
index 0000000..8300c5d
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliaquery1.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliaquery",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Tilburg University / Radboud University Nijmegen",file=sys.stderr)
+    print("  2012 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Query one or more FoLiA documents for certain patterns.",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliaquery [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:"        ,file=sys.stderr)
+    print("  --text \"[words]\"           Text pattern (Case sensitive)",file=sys.stderr)
+    print("  --pos \"[postags]\"          Pos pattern",file=sys.stderr)
+    print("  --lemma \"[lemmas]\"         Lemma pattern",file=sys.stderr)
+    print("  --sense \"[sense]\"          Sense pattern",file=sys.stderr)
+    print("  --phon \"[phon]\"            Phonetic pattern",file=sys.stderr)
+    print("  -i                           Patterns are case-insensitive",file=sys.stderr)
+    print("  -L [length]                  Left-context size words/tokens (default: 0)",file=sys.stderr)
+    print("  -R [length]                  Right-context size in words/tokens (default: 0)"    ,file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)"     ,file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Pattern syntax:",file=sys.stderr)
+    print("    Fixed-width wildcard: ^ ",file=sys.stderr)
+    print("    Variable-width wildcard: * ",file=sys.stderr)
+    print("    Disjunction: | ",file=sys.stderr)
+    print("    Regular Expression: {REGEXP}",file=sys.stderr)
+
+    print(""    ,file=sys.stderr)
+    print("Examples:",file=sys.stderr)
+    print("   1) foliaquery --text=\"to be * to be\"",file=sys.stderr)
+    print("       Matches any gap of any size (up to the maximum)"    ,file=sys.stderr)
+    print("   2) foliaquery --text=\"to be ^ ^ to be\""   ,file=sys.stderr)
+    print("       Matches any gap of exactly two tokens",file=sys.stderr)
+    print("   3) foliaquery --pos=\"ADJ NOUN\"",file=sys.stderr)
+    print("       Searching by annotation"    ,file=sys.stderr)
+    print("   4) foliaquery --text=\"rent\" --pos=\"NOUN\"",file=sys.stderr)
+    print("       Patterns may be combined, matches have to satisfy all patterns",file=sys.stderr)
+    print("   5) foliaquery --text=\"he leaves|departs today|tomorrow\" --pos=\"PRON VERB ^\"",file=sys.stderr)
+    print("       The pipe character allows for disjunctions in single tokens",file=sys.stderr)
+    print("   6a) foliaquery --text=\"we {w[io]n}\" --pos=\"PRON VERB\"",file=sys.stderr)
+    print("   6b) foliaquery --text=\"{.*able}\" --pos=\"ADJ\"",file=sys.stderr)
+    print("       Curly braces specify a regular expression for a single token"    ,file=sys.stderr)
+
+
+
+
+def parsepattern(rawpattern, annotationtype): #, annotationset=None):
+    components = []
+    for tokenpattern in rawpattern.strip().split(' '):
+        if tokenpattern == '*':
+            components.append('*')
+        elif tokenpattern == '^':
+            components.append(True)
+        elif tokenpattern[0] == '{' and tokenpattern[-1] == '}':
+            components.append( folia.RegExp(tokenpattern[1:-1]) )
+        elif '|' in tokenpattern:
+            components.append( tuple(tokenpattern.split('|')) )
+        else:
+            components.append(tokenpattern)
+    d = {'casesensitive':settings.casesensitive}
+    if annotationtype:
+        d['matchannotation'] = annotationtype
+    return folia.Pattern(*components,**d) #, matchannotationset=annotationset)
+
+
+
+def process(filename, patterns):
+    try:
+        print("Processing " + filename, file=sys.stderr)
+        doc = folia.Document(file=filename)
+        for match in doc.findwords(*patterns ):
+            s = ""
+            for token in match:
+                s += "\t" + token.text()
+            s = filename + "\t" + match[0].id + s
+            if sys.version < '3':
+                print(s.encode(settings.encoding))
+            else:
+                print(s)
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e            ,file=sys.stderr)
+        else:
+            raise
+
+
+def processdir(d, patterns):
+    print("Searching in  " + d,file=sys.stderr)
+    for f in glob.glob(os.path.join(d,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, patterns)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, patterns)
+
+
+class settings:
+    leftcontext = 0
+    rightcontext = 0
+
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+
+    ignoreerrors = False
+    casesensitive = True
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:OE:hqr", ["help","text=","pos=","lemma=","sense=","phon="])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+
+    patterns = []
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '--text':
+            patterns.append( parsepattern(a, None) )
+        elif o == '--pos':
+            patterns.append( parsepattern(a, folia.PosAnnotation) )
+        elif o == '--lemma':
+            patterns.append( parsepattern(a, folia.LemmaAnnotation) )
+        elif o == '--sense':
+            patterns.append( parsepattern(a, folia.SenseAnnotation) )
+        elif o == '--phon':
+            patterns.append( parsepattern(a, folia.PhonAnnotation) )
+
+
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x, patterns)
+            elif os.path.isfile(x):
+                process(x, patterns)
+            elif x[0:2] != '--':
+                print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                sys.exit(3)
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories",file=sys.stderr)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliasetdefinition.py b/pynlpl/tests/FoLiA/foliatools/foliasetdefinition.py
new file mode 100755
index 0000000..7f9d588
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliasetdefinition.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+#---------------------------------------------------------------
+# FoLiA to ReStructuredText Converter
+#   by Maarten van Gompel
+#   Centre for Language Studies
+#   Radboud University Nijmegen
+#   proycon AT anaproy DOT nl
+#
+#   Licensed under GPLv3
+#
+# This script converts RST to FoLiA format.
+#
+#----------------------------------------------------------------
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import glob
+import gzip
+import os
+import io
+import json
+import argparse
+
+from pynlpl.formats import folia, foliaset
+from pynlpl.common import u, isstring
+
+def printclass(classinfo, args, indent):
+    if args.outputuri:
+        printuri = " <" + classinfo['uri'] + ">"
+    else:
+        printuri = ""
+    assert isinstance(classinfo, dict)
+    print(indent + " -> CLASS " + classinfo['id'] + printuri + ": " + classinfo['label'])
+    if 'subclasses' in classinfo:
+        for subclassinfo in classinfo['subclasses'].values():
+            printclass(subclassinfo, args, indent + "  ")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="A tool to read FoLiA Set Definitions and perform some operations on them. By default it will print all sets and classes. This tool can also convert from legacy XML to RDF.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--basenamespace', type=str,help="Base RDF namespace to use when converting from legacy XML to RDF", action='store',default="",required=False)
+    parser.add_argument('--rdfttl', help="Output RDF in Turtle", action='store_true',required=False)
+    parser.add_argument('--rdfxml',help="Output RDF in XML", action='store_true',required=False)
+    parser.add_argument('--json', help="Output set definition in JSON", action='store_true',required=False)
+    parser.add_argument('--outputuri',help="Output full RDF URIs in text output", action='store_true',required=False)
+    parser.add_argument('--class', type=str,help="Test for the specified class, by ID", action='store',required=False)
+    parser.add_argument('--subset', type=str,help="Test for the specified subset (--class will be interpreted relative to subset then)", action='store',required=False)
+    parser.add_argument('--shell', help="Start an interactive Python shell for debugging (with PDB)", action='store_true',required=False)
+    parser.add_argument('url', nargs=1, help='URL or filename to a FoLiA Set Definition')
+
+    args = parser.parse_args()
+    url = args.url[0]
+    if url[0] not in ('.','/') and not url.startswith('http'):
+        url = './' + url
+    setdefinition = foliaset.SetDefinition(url, basens=args.basenamespace)
+    if args.rdfttl:
+        print(str(setdefinition.graph.serialize(None, 'turtle',base=setdefinition.basens),'utf-8') )
+    elif args.rdfxml:
+        print(str(setdefinition.graph.serialize(None, 'xml',base=setdefinition.basens),'utf-8') )
+    elif args.json:
+        print(json.dumps(setdefinition.json()))
+    elif args.shell:
+        print("Set Definition is loaded in variable: setdefinition; RDF graph in setdefinition.graph",file=sys.stderr)
+        import pdb; pdb.set_trace()
+    else:
+        #default visualization
+        setinfo = setdefinition.mainset()
+        if args.outputuri:
+            printuri = " <" + setinfo['uri'] + ">"
+        else:
+            printuri = ""
+        print("SET " + setinfo['id'] + printuri + ": " + setinfo['label'])
+        for classinfo in setdefinition.orderedclasses(setinfo['uri'], nestedhierarchy=True):
+            printclass(classinfo, args, "  ")
+        print()
+
+        for subsetinfo in sorted(setdefinition.subsets(), key=lambda subsetinfo: subsetinfo['label'] if 'label' in subsetinfo else subsetinfo['id']):
+            if args.outputuri:
+                printuri = " <" + subsetinfo['uri'] + ">"
+            else:
+                printuri = ""
+            print("SUBSET " + subsetinfo['id'] + printuri + ": " + subsetinfo['label'])
+            for classinfo in setdefinition.orderedclasses(subsetinfo['uri'], nestedhierarchy=True):
+                printclass(classinfo, args, "  ")
+            print()
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliaspec.py b/pynlpl/tests/FoLiA/foliatools/foliaspec.py
new file mode 100755
index 0000000..daccf57
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliaspec.py
@@ -0,0 +1,563 @@
+#!/usr/bin/env python3
+#Generate library specification code (for either Python or C++) on the the basis of folia.yml
+#Used by respectively pynlpl and libfolia
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import datetime
+import os
+from collections import defaultdict
+import yaml
+
+
+
+skip_properties = {
+    'c++': ('primaryelement',), #these are not handled in libfolia, or handled differently, don't output these in the source
+}
+
+#Load specification
+specfiles= [  os.path.join(os.path.dirname(__file__) ,'../schemas/folia.yml'), 'folia.yml' ]
+spec = None
+for specfile in specfiles:
+    spec = yaml.load(open(specfile,'r'))
+    break
+
+if spec is None:
+    print("FoLiA Specification file folia.yml could not be found in " + ", ".join(specfiles) ,file=sys.stderr)
+
+
+parents = defaultdict(list)
+
+elementdict = {} #flat (unnested) dictionary
+
+def getelements(d):
+    elements = []
+    if 'elements' in d:
+        for e in d['elements']:
+            elementdict[e['class']] = e
+            elements.append(e)
+            children = getelements(e)
+            elements += children
+            for c in children:
+                if e['class'] not in parents[c['class']]:
+                    parents[c['class']].append(e['class'])
+    return elements
+
+elements = getelements(spec) #gathers all class names
+elements.sort(key=lambda x: x['class'])
+elementnames = [ e['class'] for e in elements ]
+
+
+################################################################
+
+def addfromparents(elementname, key):
+    value = set(spec['defaultproperties']['accepted_data'])
+    if 'properties' in elementdict[elementname] and key in elementdict[elementname]['properties'] and elementdict[elementname]['properties'][key]:
+        value |= set(elementdict[elementname]['properties'][key])
+    else:
+        value |= set()
+    for parent in parents[elementname]:
+        value |= addfromparents(parent, key)
+    return value
+
+
+
+def outputvar(var, value, target, declare = False):
+    """Output a variable ``var`` with value ``value`` in the specified target language."""
+
+    #do we need to quote the value? (bool)
+    varname = var.split('.')[-1]
+
+    if isinstance(value, str) and varname.upper() in ('ACCEPTED_DATA','REQUIRED_DATA','REQUIRED_ATTRIBS', 'OPTIONAL_ATTRIBS','ANNOTATIONTYPE'):
+        quote = False
+    else:
+        quote = True
+
+
+    if isinstance(value, str):
+        value = value.replace("\n","\\n").replace("\t","\\t")
+
+    if target == 'python':
+        if varname == 'ANNOTATIONTYPE' and isinstance(value,str):
+            value = 'AnnotationType.' + value
+
+        if value is None:
+                return var + ' = None'
+        elif isinstance(value, bool):
+            if value:
+                return var + ' = True'
+            else:
+                return var + ' = False'
+        elif isinstance(value, (int, float) ):
+            return var + ' = ' + str(value)
+        elif isinstance(value, (list,tuple,set) ):
+            if varname in ('ACCEPTED_DATA','REQUIRED_DATA') or  all([ x in elementnames for x in value ]):
+                return var + ' = (' + ', '.join(value) + ',)'
+            elif all([ x in spec['attributes'] for x in value ]):
+                return var + ' = (' + ', '.join(['Attrib.' + x for x in value]) + ',)'
+
+            if len(value) == 0:
+                return var + ' = ()'
+
+            #list items are  enums or classes, never string literals
+            if quote:
+                return var + ' = (' + ', '.join([ '"' + x + '"' for x in value]) + ',)'
+            else:
+                return var + ' = (' + ', '.join(value) + ',)'
+        else:
+            if quote:
+                return var + ' = "' + value  + '"'
+            else:
+                return var + ' = ' + value
+    elif target == 'c++':
+        typedeclaration = ''
+        if value is None:
+            if declare: raise NotImplementedError("Declare not supported for None values")
+            if varname in ('REQUIRED_ATTRIBS','OPTIONAL_ATTRIBS'):
+                return var + ' = NO_ATT;'
+            elif varname == 'ANNOTATIONTYPE':
+                return var + ' = AnnotationType::NO_ANN;'
+            elif varname in ('XMLTAG','TEXTDELIMITER'):
+                return var + ' = "NONE";'
+            elif varname  == 'REQUIRED_DATA':
+                return var + ' = {};'
+            elif varname  == 'SUBSET':
+                return var + ' = "";'
+            else:
+                raise NotImplementedError("Don't know how to handle None for " + var)
+        elif isinstance(value, bool):
+            if declare: typedeclaration = 'const bool '
+            if value:
+                return typedeclaration + var + ' = true;'
+            else:
+                return typedeclaration + var + ' = false;'
+        elif isinstance(value, int ):
+            if declare: typedeclaration = 'const int '
+            return typedeclaration + var + ' = ' + str(value) + ';'
+        elif isinstance(value, float ):
+            if declare: typedeclaration = 'const double '
+            return typedeclaration + var + ' = ' + str(value) + ';'
+        elif isinstance(value, (list,tuple,set)):
+            #list items are  enums or classes, never string literals
+            if varname in ('ACCEPTED_DATA','REQUIRED_DATA') or  all([ x in elementnames for x in value ]):
+                if declare:
+                    typedeclarion = 'const set<ElementType> '
+                    operator = '='
+                else:
+                    typedeclaration = ''
+                    operator = '+='
+                value = [ x + '_t' for x in value ]
+                return typedeclaration + var + ' ' + operator + ' {' + ', '.join(value) + '};'
+            elif all([ x in spec['attributes'] for x in value ]):
+                return var + ' = ' + '|'.join(value) + ';'
+            else:
+                return typedeclaration + var + ' = { ' + ', '.join([ '"' + x + '"' for x in value if x]) + ', };'
+        else:
+            if varname == 'ANNOTATIONTYPE':
+                value = "AnnotationType::" + value
+
+            if quote:
+                if declare: typedeclaration = 'const string '
+                return typedeclaration + var + ' = "' + value+ '";'
+            else:
+                if declare: typedeclaration = 'const auto '
+                return typedeclaration + var + ' = ' + value+ ';'
+
+#concise description for all available template blocks
+blockhelp = {
+        'namespace': 'The FoLiA XML namespace',
+        'version': 'The FoLiA version',
+        'version_major': 'The FoLiA version (major)',
+        'version_minor': 'The FoLiA version (minor)',
+        'version_sub': 'The FoLiA version (sub/rev)',
+        'attributes': 'Defines all common FoLiA attributes (as part of the Attrib enumeration)',
+        'annotationtype': 'Defines all annotation types (as part of the AnnotationType enumeration)',
+        'instantiateelementproperties': 'Instantiates all element properties for the first time, setting them to the default properties',
+        'setelementproperties': 'Sets all element properties for all elements',
+        'annotationtype_string_map': 'A mapping from annotation types to strings',
+        'string_annotationtype_map': 'A mapping from strings to annotation types',
+        'annotationtype_xml_map': 'A mapping from annotation types to xml tags (strings)',
+        'structurescope': 'Structure scope above the sentence level, used by next() and previous() methods',
+        'defaultproperties': 'Default properties which all elements inherit',
+        'default_ignore': 'Default ignore list for the select() method, do not descend into these',
+        'default_ignore_annotations': 'Default ignore list for token annotation',
+        'default_ignore_structure': 'Default ignore list for structure annotation',
+}
+
+def setelementproperties_cpp(element,indent, defer,done):
+    commentsign = "//"
+    target = 'c++'
+    s = commentsign + "------ " + element['class'] + " -------\n"
+    if element['class'] in parents:
+        for parent in parents[element['class']]:
+            if parent not in done:
+                defer[parent].append(element)
+                return None
+            else:
+                s += indent + element['class'] + '::PROPS = ' + parent + '::PROPS;\n'
+            break
+    s += indent + element['class'] + '::PROPS.ELEMENT_ID = ' + element['class'] + '_t;\n'
+    if 'properties' in element:
+        for prop, value in sorted(element['properties'].items()):
+            if target not in skip_properties or prop not in skip_properties[target]:
+                if prop == 'xmltag':
+                    if 'Feature' in parents[element['class']] and 'subset' in element['properties'] and element['properties']['subset']:
+                        value = element['properties']['subset']
+                elif prop == 'accepted_data':
+                    value = tuple(sorted(addfromparents(element['class'],'accepted_data')))
+                    if ('textcontainer' in element['properties'] and element['properties']['textcontainer']) or ('phoncontainer' in element['properties'] and element['properties']['phoncontainer']):
+                        value += ('XmlText',)
+                    if 'WordReference' in value:
+                        value += ('Word','Morpheme','Phoneme')
+                s += indent + outputvar(element['class'] + '::PROPS.' + prop.upper(),  value, target) + '\n'
+    done[element['class']] = True
+    return s
+
+def flattenclasses(candidates):
+    done = {}
+    resolved = set()
+    for c in candidates:
+        for child, parentlist in parents.items():
+            if c in parentlist:
+                if child not in done and child not in candidates:
+                    candidates.append(child)
+        if c[:8] != 'Abstract':
+            resolved.add(c)
+    return resolved
+
+
+
+
+def outputblock(block, target, varname, indent = ""):
+    """Output the template block (identified by ``block``) for the target language"""
+
+    if target == 'python':
+        commentsign = '#'
+    elif target == 'c++':
+        commentsign = '//'
+    else:
+        raise NotImplementedError("Unknown target language: " + target)
+
+    if block in blockhelp:
+        s = indent + commentsign + blockhelp[block] + "\n" #output what each block does
+    else:
+        s = ''
+
+    if block == 'header':
+        s += indent + commentsign + "This file was last updated according to the FoLiA specification for version " + str(spec['version']) + " on " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ", using foliaspec.py\n"
+        s += indent + commentsign + "Code blocks after a foliaspec comment (until the next newline) are automatically generated. **DO NOT EDIT THOSE** and **DO NOT REMOVE ANY FOLIASPEC COMMENTS** !!!"
+    elif block == 'version_major':
+        versionfields = [ int(x) for x in spec['version'].split('.') ]
+        s += indent + outputvar(varname, versionfields[0], target, True)
+    elif block == 'version_minor':
+        versionfields = [ int(x) for x in spec['version'].split('.') ]
+        s += indent + outputvar(varname, versionfields[1] if len(versionfields) > 1 else 0, target, True)
+    elif block == 'version_sub' or block == 'version_rev':
+        versionfields = [ int(x) for x in spec['version'].split('.') ]
+        s += indent + outputvar(varname, versionfields[2] if len(versionfields) > 2 else 0, target, True)
+    elif block == 'attributes':
+        if target == 'python':
+            s += indent + "class Attrib:\n"
+            s += indent + "    " +  ", ".join(spec['attributes']) + " = range(" + str(len(spec['attributes'])) + ")"
+        elif target == 'c++':
+            s += indent + "enum Attrib : int { NO_ATT=0, "
+            value = 1
+            for attrib in spec['attributes']:
+                s +=  attrib + '=' + str(value) + ', '
+                value *= 2
+            s += 'ALL='+str(value) + ' };'
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'elementtype':
+        if target == 'c++':
+            s += indent + "enum ElementType : unsigned int { BASE=0,"
+            s += ", ".join([ e + '_t' for e in elementnames]) + ", PlaceHolder_t, XmlComment_t, XmlText_t,  LastElement };\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'annotationtype':
+        if target == 'python':
+            s += indent + "class AnnotationType:\n"
+            s += indent + "    " +  ", ".join(spec['annotationtype']) + " = range(" + str(len(spec['annotationtype'])) + ")"
+        elif target == 'c++':
+            s += indent + "enum AnnotationType : int { NO_ANN,"
+            s += ", ".join(spec['annotationtype']) + ", LAST_ANN };\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'defaultproperties':
+        if target == 'c++':
+            s += indent + "ELEMENT_ID = BASE;\n"
+            s += indent + "ACCEPTED_DATA.insert(XmlComment_t);\n"
+            for prop, value in sorted(spec['defaultproperties'].items()):
+                if target not in skip_properties or prop not in skip_properties[target]:
+                    s += indent + outputvar( prop.upper(),  value, target) + '\n'
+        elif target == 'python':
+            for prop, value in sorted(spec['defaultproperties'].items()):
+                s += indent + outputvar('AbstractElement.' + prop.upper(),  value, target) + '\n'
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'instantiateelementproperties':
+        if target == 'c++':
+            for element in elements:
+                s += indent + "properties " + element['class'] + '::PROPS = DEFAULT_PROPERTIES;\n'
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'setelementproperties':
+        if target == 'python':
+            for element in elements:
+                s += commentsign + "------ " + element['class'] + " -------\n"
+                if 'properties' in element:
+                    for prop, value in sorted(element['properties'].items()):
+                        if prop == 'accepted_data':
+                            value = tuple(sorted(addfromparents(element['class'],'accepted_data')))
+                        s += indent + outputvar(element['class'] + '.' + prop.upper(),  value, target) + '\n'
+        elif target == 'c++':
+            done = {}
+            defer = defaultdict(list) #defer output of some elements until parent elements are processed:  hook => deferred_elements
+            for element in elements:
+                output = setelementproperties_cpp(element,indent, defer,done)
+                if output:
+                    s += output
+                    if element['class'] in defer:
+                        for deferred in defer[element['class']]:
+                            s += setelementproperties_cpp(deferred,indent, defer,done)
+
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'annotationtype_string_map':
+        if target == 'c++':
+            s += indent + "const map<AnnotationType::AnnotationType,string> ant_s_map = {\n"
+            s += indent + "  { AnnotationType::NO_ANN, \"NONE\" },\n"
+            done = {}
+            for element in elements:
+                if 'properties' in element and  'annotationtype' in element['properties'] and element['properties']['annotationtype'] not in done:
+                    #if 'primaryelement' in element['properties'] and not element['properties']['primaryelement']: continue #not primary, skip
+                    s += indent + "  { AnnotationType::" + element['properties']['annotationtype'] + ',  "' + element['properties']['annotationtype'].lower() + '" },\n'
+                    done[element['properties']['annotationtype']] = True #prevent duplicates
+            s += indent + "};\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'string_annotationtype_map':
+        if target == 'c++':
+            s += indent + "const map<string,AnnotationType::AnnotationType> s_ant_map = {\n"
+            s += indent + "  { \"NONE\", AnnotationType::NO_ANN },\n"
+            done = {}
+            for element in elements:
+                if 'properties' in element and  'annotationtype' in element['properties'] and element['properties']['annotationtype'] not in done:
+                    s += indent + '  { "' + element['properties']['annotationtype'].lower() + '", AnnotationType::' + element['properties']['annotationtype'] + ' },\n'
+                    done[element['properties']['annotationtype']] = True #prevent duplicates
+            s += indent + "};\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'annotationtype_xml_map':
+        if target == 'python':
+            s += indent + "ANNOTATIONTYPE2XML = {\n"
+            for element in elements:
+                if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag'] and 'annotationtype' in element['properties']:
+                    if 'primaryelement' in element['properties'] and not element['properties']['primaryelement']: continue #not primary, skip
+                    s += indent + "    AnnotationType." + element['properties']['annotationtype'] + ':  "' + element['properties']['xmltag'] + '" ,\n'
+            s += indent + "}"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'elementtype_string_map':
+        if target == 'c++':
+            s += indent + "const map<ElementType,string> et_s_map = {\n"
+            s += indent + "  { BASE, \"FoLiA\" },\n"
+            for element in elements:
+                if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
+                    s += indent + "  { " + element['class'] + '_t,  "' + element['properties']['xmltag'] + '" },\n'
+                elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
+                    if element['class'] == 'HeadFeature':
+                        s += indent + "  { HeadFeature_t,  \"headfeature\" },\n"
+                    else:
+                        s += indent + "  { " + element['class'] + '_t,  "' + element['properties']['subset'] + '" },\n'
+                else:
+                    s += indent + "  { " + element['class'] + '_t,  "_' + element['class'] + '" },\n'
+            s += indent + '  { PlaceHolder_t, "_PlaceHolder" },\n'
+            s += indent + '  { XmlComment_t, "_XmlComment" },\n'
+            s += indent + '  { XmlText_t, "_XmlText" }\n'
+            s += indent + "};\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'string_elementtype_map':
+        if target == 'c++':
+            s += indent + "const map<string,ElementType> s_et_map = {\n"
+            s += indent + "  { \"FoLiA\", BASE },\n"
+            for element in elements:
+                if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
+                    s += indent + '  { "' + element['properties']['xmltag'] + '", ' + element['class'] + '_t  },\n'
+                elif 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
+                    if element['class'] == 'HeadFeature':
+                        s += indent + "  { \"headfeature\", HeadFeature_t },\n"
+                    else:
+                        s += indent + '  { "' + element['properties']['subset'] + '", ' + element['class'] + '_t  },\n'
+                else:
+                    s += indent + '  { "_' + element['class'] + '", ' + element['class'] + '_t  },\n'
+            s += indent + '  { "_PlaceHolder", PlaceHolder_t  },\n'
+            s += indent + '  { "_XmlComment", XmlComment_t  },\n'
+            s += indent + '  { "_XmlText", XmlText_t  }\n'
+            s += indent + "};\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'string_class_map':
+        if target == 'python':
+            s += indent + "XML2CLASS = {\n"
+            for element in elements:
+                if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag']:
+                    s += indent + '    "' + element['properties']['xmltag'] + '": ' + element['class'] + ',\n'
+            s += indent + "}\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'annotationtype_layerclass_map':
+        if target == 'python':
+            s += indent + "ANNOTATIONTYPE2LAYERCLASS = {\n"
+            for element in elements:
+                if element['class'].endswith('Layer'):
+                    if 'properties' in element and 'xmltag' in element['properties'] and element['properties']['xmltag'] and 'annotationtype' in element['properties']:
+                        s += indent + "    AnnotationType." + element['properties']['annotationtype'] + ':  ' + element['class'] + ' ,\n'
+            s += indent + "    AnnotationType.PREDICATE:  SemanticRolesLayer\n"
+            s += indent + "}"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'default_ignore':
+        if target == 'c++':
+            s += indent + "const set<ElementType> default_ignore = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore'])) ]) + " };\n"
+        elif target == 'python':
+            s += indent + "default_ignore = ( " + ", ".join(spec['default_ignore']) + ",)\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'default_ignore_annotations':
+        if target == 'c++':
+            s += indent + "const set<ElementType> default_ignore_annotations = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_annotations'])) ]) + " };\n"
+        elif target == 'python':
+            s += indent + "default_ignore_annotations = ( " + ", ".join(spec['default_ignore_annotations']) + ",)\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'default_ignore_structure':
+        if target == 'c++':
+            s += indent + "const set<ElementType> default_ignore_structure = { " + ", ".join([ e + '_t' for e in sorted(flattenclasses(spec['default_ignore_structure'])) ]) + " };\n"
+        elif target == 'python':
+            s += indent + "default_ignore_structure = ( " + ", ".join(spec['default_ignore_structure']) + ",)\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'typehierarchy':
+        if target == 'c++':
+            s += indent + "static const map<ElementType, set<ElementType> > typeHierarchy = { "
+            for child, parentset in sorted(parents.items()):
+                s += indent + "   { " + child + '_t' + ", { " + ",".join([p + '_t' for p in parentset ]) + " } },\n"
+            s += indent + "   { PlaceHolder_t , { Word_t, AbstractStructureElement_t } }\n"
+            s += indent + "};\n";
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block == 'attributefeatures':
+        if target == 'c++':
+            l = []
+            for element in elements:
+                if 'properties' in element and 'subset' in element['properties'] and element['properties']['subset']:
+                    if element['class'] == 'HeadFeature':
+                        l.append("headfeature")
+                    else:
+                        l.append(element['properties']['subset'])
+            l.sort()
+            s += indent + "const set<string> AttributeFeatures = { " + ", ".join([ '"' + x + '"' for x in l ]) + " };\n"
+        else:
+            raise NotImplementedError("Block " + block + " not implemented for " + target)
+    elif block in spec:
+        #simple variable blocks
+        s += indent + outputvar(varname, spec[block], target, True)
+    else:
+        raise Exception("No such block exists in foliaspec: " + block)
+
+
+    if s and s[-1] != '\n': s += '\n'
+    return s
+
+
+def parser(filename):
+    if filename[-2:] in ('.h','.c') or filename[-4:] in ('.cxx','.cpp','.hpp'):
+        target = 'c++' #libfolia
+        commentsign = '//'
+    elif filename[-3:] == '.py':
+        target = 'python' #pynlpl.formats.folia
+        commentsign = '#'
+    else:
+        raise Exception("No target language could be deduced from the filename " + filename)
+
+    if not os.path.exists(filename):
+        raise FileNotFoundError("File not found: " + filename)
+
+    out = open(filename+'.foliaspec.out','w',encoding='utf-8')
+
+
+    inblock = False
+    blockname = blocktype = ""
+    indent = ""
+    with open(filename,'r',encoding='utf-8') as f:
+        for line in f:
+            strippedline = line.strip()
+            if not inblock:
+                if strippedline.startswith(commentsign + 'foliaspec:'):
+                    indent = line.find(strippedline) * ' '
+                    fields = strippedline[len(commentsign):].split(':')
+                    if fields[1] in ('begin','start'):
+                        blocktype = 'explicit'
+                        blockname = fields[2]
+                        try:
+                            varname = fields[3]
+                        except:
+                            varname = blockname
+                    elif len(fields) >= 2:
+                        blocktype = 'implicit'
+                        blockname = fields[1]
+                        try:
+                            varname = fields[2]
+                        except:
+                            varname = blockname
+                    else:
+                        raise Exception("Syntax error: " + strippedline)
+                    inblock = True
+                    out.write(line)
+                elif strippedline.split(' ')[-1].startswith(commentsign + 'foliaspec:'):
+                    fields = strippedline.split(' ')[-1][len(commentsign):].split(':')
+                    blocktype = 'line'
+                    blockname = fields[1]
+                    try:
+                        varname = fields[2]
+                    except:
+                        varname = blockname
+                    if varname != blockname:
+                        out.write( outputblock(blockname, target, varname) + " " + commentsign + "foliaspec:" + blockname + ":" + varname + "\n")
+                    else:
+                        out.write( outputblock(blockname, target, varname) + " " + commentsign + "foliaspec:" + blockname + "\n")
+                else:
+                    out.write(line)
+            else:
+                if not strippedline and blocktype == 'implicit':
+                    out.write(outputblock(blockname, target, varname,indent) + "\n")
+                    inblock = False
+                elif blocktype == 'explicit' and strippedline.startswith(commentsign + 'foliaspec:end:'):
+                    out.write(outputblock(blockname, target, varname,indent) + "\n" + commentsign + "foliaspec:end:" + blockname + "\n")
+                    inblock = False
+
+    os.rename(filename+'.foliaspec.out', filename)
+
+def usage():
+    print("Syntax: foliaspec.py [filename] [filename] ..etc.." ,file=sys.stderr)
+    print("Filenames are Python or C++ files that may contain foliaspec instructions, the files will be updated according to the latest specification in folia.yml",file=sys.stderr)
+    sys.exit(0)
+
+def main():
+    if len(sys.argv) == 1:
+        usage()
+
+    for filename in sys.argv[1:]:
+        if filename in ('-h', '--help'):
+            usage()
+        elif filename in ('-v', '--version'):
+            print("FoLiA specification is at version v" + spec['version'],file=sys.stderr)
+            sys.exit(0)
+
+        parser(filename)
+
+if __name__ == '__main__':
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliaspec2json.py b/pynlpl/tests/FoLiA/foliatools/foliaspec2json.py
new file mode 100755
index 0000000..d92257e
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliaspec2json.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+#Generate library specification code (for either Python or C++) on the the basis of folia.yml
+#Used by respectively pynlpl and libfolia
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import os
+import json
+import yaml
+
+#Load specification
+specfiles= [  os.path.join(os.path.dirname(__file__) ,'../schemas/folia.yml'), 'folia.yml' ]
+spec = None
+for specfile in specfiles:
+    spec = yaml.load(open(specfile,'r'))
+    break
+
+if spec is None:
+    print("FoLiA Specification file folia.yml could not be found in " + ", ".join(specfiles) ,file=sys.stderr)
+
+def main(var=None):
+    try:
+        var = sys.argv[1]
+        if var[0] == '-': var = None
+    except:
+        var = None
+    if var:
+        print(var + ' = ' + json.dumps(spec, sort_keys=True, indent=4) + ';')
+    else:
+        print(json.dumps(spec, sort_keys=True, indent=4))
+
+if __name__ == '__main__':
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliatextcontent.py b/pynlpl/tests/FoLiA/foliatools/foliatextcontent.py
new file mode 100755
index 0000000..7ab6af3
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliatextcontent.py
@@ -0,0 +1,359 @@
+#! /usr/bin/env python
+# -*- coding: utf8 -*-
+
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliatextcontent",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Radboud University Nijmegen",file=sys.stderr)
+    print("  2015 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("This tool operates on some of the redundancy regarding text context inherent in FoLiA documents. It adds text content elements,  on the higher (untokenised) levels, adding offset information and mark-up element if present. Secondly, the tool may als adds text-markup elements for substrings (str element) (provided there is no overlap).",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliatextcontent [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:"        ,file=sys.stderr)
+    print("  -s                           Add text content on sentence level",file=sys.stderr)
+    print("  -p                           Add text content on paragraph level"    ,file=sys.stderr)
+    print("  -d                           Add text content on division level",file=sys.stderr)
+    print("  -t                           Add text content on global text level"    ,file=sys.stderr)
+    print("  -T                           Add text content for the specified elements (comma separated list of folia xml tags)"    ,file=sys.stderr)
+    print("  -X                           Do NOT add offset information"    ,file=sys.stderr)
+    print("  -F                           Force offsets to refer to the specified structure only (only works if you specified a single element type for -T!!!)"    ,file=sys.stderr)
+    print("  -M                           Add substring markup linking to string elements (if any, and when there is no overlap)"    ,file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)",file=sys.stderr)
+    print("  -w                           Edit file(s) (overwrites input files), will output to stdout otherwise" ,file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+
+
+def linkstrings(element, cls='current',debug=False):
+    if element.hastext(cls,strict=True) and element.hasannotation(folia.String):
+        text = element.textcontent(cls, correctionhandling=folia.CorrectionHandling.EITHER)
+
+        for string in element.select(folia.String, None, False):
+            try:
+                stringtextcontent = string.textcontent(cls, correctionhandling=folia.CorrectionHandling.EITHER)
+                stringtext = stringtextcontent.text()
+                stringoffset = stringtextcontent.offset
+            except folia.NoSuchText:
+                continue
+
+            if not stringtext:
+                continue
+
+            if debug: print("Finding string '" + stringtext + "' in text: ", text.text(), file=sys.stderr)
+
+            offset = 0 #current offset cursor
+            length = len(stringtext)
+            replaceindex = 0
+            replace = []
+            for i, subtext in enumerate(text):
+                if isinstance(subtext, str):
+                    subtextlength = len(subtext)
+                    if stringoffset >= offset and stringoffset+length <= offset+subtextlength:
+                        reloffset = stringoffset-offset
+
+                        if subtext[reloffset:reloffset+length] != stringtext:
+                            print(" String refers to offset " + str(stringoffset) + ", but is not found there ! Found '" + subtext[reloffset:reloffset+length] + "' instead.",file=sys.stderr)
+                        else:
+                            #match!
+                            kwargs = {}
+                            replaceindex = i
+                            if string.id:
+                                kwargs['idref'] = string.id
+                            replace = [subtext[:reloffset], folia.TextMarkupString(element.doc, *stringtext, **kwargs), subtext[reloffset+length:]]
+                            break
+
+                elif isinstance(subtext, folia.AbstractTextMarkup):
+                    subtextlength = len(subtext.text())
+                    if stringoffset >= offset and stringoffset+length <= offset+subtextlength:
+                        print("String " + str(string.id) + " fits within other markup element ("+str(type(subtext))+"," + str(subtext.id)+ "), not implemented yet", file=sys.stderr)
+                        break
+
+                offset += subtextlength
+
+            if replace:
+                if debug: print("Replacing item " + str(replaceindex) + " with: ", replace,file=sys.stderr)
+                del text.data[replaceindex]
+                for x in reversed(replace):
+                    if x:
+                        text.data.insert(replaceindex,x)
+            else:
+                if string.id:
+                    print("Could not find string " + string.id + " !!!",file=sys.stderr)
+
+def gettextsequence(element, cls, debug=False):
+    assert element.PRINTABLE
+    if debug: print(" Getting text for ", repr(element),file=sys.stderr)
+    if element.TEXTCONTAINER:
+        if debug: print("  Found textcontainer ", repr(element), "in", repr(element.ancestor(folia.AbstractStructureElement)),file=sys.stderr)
+
+        if isinstance(element,folia.TextContent) and element.cls != cls:
+            if debug: print("  Class mismatch", element.cls,"vs",cls,file=sys.stderr)
+            raise StopIteration
+
+        for e in element:
+            if isinstance(e, str):
+                if debug: print("  Found: ", e,file=sys.stderr)
+                yield e, element
+            else: #markup (don't recurse)
+                if debug: print("  Found markup: ", repr(e),file=sys.stderr)
+                yield e, element
+                yield e.gettextdelimiter(), None
+
+        yield None,None #Signals a break after this, if we have text content we needn't delve deeper
+    else:
+        #Do we have a text content?
+        foundtext = False
+        if debug: print(" Looking for text in ", repr(element),file=sys.stderr)
+        for e in element:
+            if isinstance(e, folia.TextContent) and e.cls == cls:
+                foundtext = True
+                for x in gettextsequence(e, cls, debug):
+                    yield x
+            elif isinstance(e, folia.Correction):
+                foundtextincorrection =False
+                try:
+                    if e.hasnew() and e.new().textcontent(cls):
+                        foundtextincorrection = True
+                        for x in gettextsequence(e.new().textcontent(cls), cls, debug):
+                            yield x
+                except folia.NoSuchText:
+                    pass
+                except folia.NoSuchAnnotation:
+                    pass
+                if not foundtextincorrection:
+                    try:
+                        if e.hascurrent() and e.current().textcontent(cls):
+                            foundtextincorrection = True
+                            for x in gettextsequence(e.current().textcontent(cls), cls, debug):
+                                yield x
+                    except folia.NoSuchText:
+                        pass
+                    except folia.NoSuchAnnotation:
+                        pass
+                if not foundtextincorrection:
+                    try:
+                        if e.hasoriginal() and e.original().textcontent(cls):
+                            foundtextincorrection = True
+                            for x in gettextsequence(e.current().textcontent(cls), cls, debug):
+                                yield x
+                    except folia.NoSuchText:
+                        pass
+                    except folia.NoSuchAnnotation:
+                        pass
+                foundtext = foundtextincorrection
+
+        if not foundtext:
+            if debug: print(" Looking for text in children of ", repr(element),file=sys.stderr)
+            for e in element:
+                if e.PRINTABLE and not isinstance(e, folia.String):
+                    #abort = False
+                    for x in gettextsequence(e, cls, debug):
+                        foundtext = True
+                        if x[0] is None:
+                            abort = True
+                            break
+                        yield x
+                    #if abort:
+                    #    print(" Abort signal received, not processing further elements in ", repr(element),file=sys.stderr)
+                    #    break
+                if foundtext:
+                    delimiter = e.gettextdelimiter()
+                    if debug: print(" Got delimiter " + repr(delimiter) + " from " + repr(element), file=sys.stderr)
+                    yield e.gettextdelimiter(), None
+                elif isinstance(e, folia.AbstractStructureElement) and not isinstance(e, folia.Linebreak) and not isinstance(e, folia.Whitespace):
+                    raise folia.NoSuchText("No text was found in the scope of the structure element")
+
+
+def settext(element, cls='current', offsets=True, forceoffsetref=False, debug=False):
+    assert element.PRINTABLE
+
+    if debug: print("In settext for  ", repr(element),file=sys.stderr)
+
+    #get the raw text sequence
+    try:
+        textsequence = list(gettextsequence(element,cls,debug))
+    except folia.NoSuchText:
+        return None
+
+    if debug: print("Raw text:  ", textsequence,file=sys.stderr)
+
+    if textsequence:
+        newtextsequence = []
+        offset = 0
+        prevsrc = None
+        for i, (e, src) in enumerate(textsequence):
+            if e: #filter out empty strings
+                if isinstance(e,str):
+                    length = len(e)
+
+                    #only whitespace from here on?
+                    if not e.strip():
+                        onlywhitespace = True
+                        for x,y in textsequence[i+1:]:
+                            if y is not None:
+                                onlywhitespace = False
+                        if onlywhitespace:
+                            break
+                elif isinstance(e, folia.AbstractTextMarkup):
+                    e = e.copy()
+                    length = len(e.text())
+
+                if src and offsets and src is not prevsrc:
+                    ancestors = list(src.ancestors(folia.AbstractStructureElement))
+                    if len(ancestors) >= 2 and ancestors[1] is element:
+                        if debug: print("Setting offset for text in  " + repr(ancestors[0]) + " to " + str(offset) + ", reference " + repr(element) ,file=sys.stderr)
+                        src.offset = offset
+                    elif forceoffsetref:
+                        src.offset = offset
+                        src.ref = element
+                    prevsrc = src
+
+                newtextsequence.append(e)
+                offset += length
+
+        if newtextsequence:
+            if debug: print("Setting text for " + repr(element) + ":" , newtextsequence, file=sys.stderr)
+            return element.replace(folia.TextContent, *newtextsequence, cls=cls) #appends if new
+
+
+
+def processelement(element, settings):
+    if not isinstance(element, folia.AbstractSpanAnnotation): #prevent infinite recursion
+        for e in element:
+            if isinstance(e, folia.AbstractElement):
+                if settings.debug: print("Processing ", repr(e),file=sys.stderr)
+                processelement(e,settings)
+        if element.PRINTABLE:
+            if any( isinstance(element,C) for C in settings.Classes):
+                for cls in element.doc.textclasses:
+                    settext(element, cls, settings.offsets, settings.forceoffsetref, settings.debug)
+
+
+def process(filename, outputfile = None):
+    print("Converting " + filename,file=sys.stderr)
+    doc = folia.Document(file=filename)
+
+    if settings.linkstrings:
+        for element in doc.select(folia.AbstractStructureElement):
+            if settings.linkstrings:
+                for cls in element.doc.textclasses:
+                    linkstrings(element, cls, settings.debug)
+
+    if settings.Classes:
+        for e in doc.data:
+            processelement(e, settings)
+
+
+    if settings.inplaceedit:
+        doc.save()
+    else:
+        print(doc.xmlstring())
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            process(f, outputfile)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f)
+
+
+
+class settings:
+    Classes = []
+    inplaceedit = False
+    offsets = True
+    forceoffsetref = False
+    linkstrings = False
+
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+
+    debug = False
+
+    textclasses =[]
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "E:hsSpPdDtTXMe:wT:Fc:", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err),file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-d':
+            settings.Classes.append(folia.Division)
+        elif o == '-t':
+            settings.Classes.append(folia.Text)
+        elif o == '-s':
+            settings.Classes.append(folia.Sentence)
+        elif o == '-p':
+            settings.Classes.append(folia.Paragraph)
+        elif o == '-T':
+            settings.Classes += [ folia.XML2CLASS[tag] for tag in a.split(',') ]
+        elif o == '-X':
+            settings.offsets = False
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-F':
+            settings.forceoffsetref = True
+        elif o == '-M':
+            settings.linkstrings = True
+        elif o == '-w':
+            settings.inplaceedit = True
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-D':
+            settings.debug = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if len(settings.Classes) > 1:
+        settings.forceoffsetref = False
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x,outputfile)
+            elif os.path.isfile(x):
+                process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                sys.exit(3)
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories",file=sys.stderr)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliatree.py b/pynlpl/tests/FoLiA/foliatools/foliatree.py
new file mode 100755
index 0000000..2415c5f
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliatree.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import io
+import sys
+import os
+import glob
+from collections import Counter
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo pip install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git",file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliatree",file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)",file=sys.stderr)
+    print("  Centre for Language and Speech Technology, Radboud University Nijmegen",file=sys.stderr)
+    print("  2016 - Licensed under GPLv3",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("This conversion script reads a FoLiA XML document and outputs a tree of its structure", file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Usage: foliatree [options] file-or-dir1 file-or-dir2 ..etc..",file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -t [types]                   Output only these elements (comma separated list)", file=sys.stderr)
+    print("  -P                           Like -O, but outputs to current working directory",file=sys.stderr)
+    print("  -i                           Print IDs",file=sys.stderr)
+    print("  -c                           Print classes",file=sys.stderr)
+    print("  -a                           Print annotator",file=sys.stderr)
+    print("  -x                           Print text content",file=sys.stderr)
+    print("  -s                           Print structural elements only",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+def out(s, outputfile):
+    if sys.version < '3':
+        if outputfile:
+            outputfile.write(s + "\n")
+        else:
+            print(s.encode(settings.encoding))
+    else:
+        if outputfile:
+            print(s,file=outputfile)
+        else:
+            print(s)
+
+
+def processelement(element, depth=0, inspan=False):
+    if not isinstance(element, folia.AbstractElement): return False
+    if settings.structureonly and not isinstance(element, folia.AbstractStructureElement): return False
+    isspan = isinstance(element, folia.AbstractSpanAnnotation)
+    try:
+        if (not settings.types or element.XMLTAG in settings.types) and element.XMLTAG:
+            out = "    " * depth
+            out += element.XMLTAG
+            if settings.ids and element.id:
+                out += "; id=" + element.id
+            if settings.classes and element.cls:
+                out += "; class=" + element.cls
+            if settings.annotators and element.annotator:
+                out += "; annotator=" + element.annotator
+            if settings.text and isinstance(element, (folia.TextContent, folia.PhonContent)):
+                out += "; text=\"" + str(element) + "\""
+            print(out)
+            if not inspan:
+                for e in element.data:
+                    processelement(e,depth+1, isspan and isinstance(e, folia.AbstractStructureElement) )
+    except AttributeError:
+        pass
+    return True
+
+def process(filename, outputfile = None):
+    print("Processing " + filename,file=sys.stderr)
+    count = Counter()
+    try:
+        doc = folia.Document(file=filename)
+
+        for e in doc.data:
+            processelement(e)
+
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + filename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+    return count
+
+
+
+
+def processdir(d, outputfile = None):
+    print("Searching in  " + d, file=sys.stderr)
+    count = Counter()
+    for f in glob.glob(os.path.join(d, '*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            count.update(process(f, outputfile))
+        elif settings.recurse and os.path.isdir(f):
+            count.update(processdir(f, outputfile))
+    return count
+
+
+class settings:
+    extension = 'xml'
+    recurse = False
+    ignoreerrors = False
+    types = None
+    constraints = []
+    ids = False
+    classes = False
+    annotators = False
+    text = False
+    structureonly = False
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "E:ht:swrqicaxs", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+
+    outputfile = None
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-t':
+            settings.types = a.split(',')
+        elif o == '-q':
+            settings.ignoreerrors = True
+        elif o == '-i':
+            settings.ids = True
+        elif o == '-c':
+            settings.classes = True
+        elif o == '-a':
+            settings.annotators = True
+        elif o == '-s':
+            settings.structureonly = True
+        elif o == '-x':
+            settings.text = True
+        else:
+            raise Exception("No such option: " + o)
+
+
+    if outputfile: outputfile = io.open(outputfile,'w',encoding=settings.encoding)
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                count = processdir(x,outputfile)
+            elif os.path.isfile(x):
+                count = process(x, outputfile)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+
+        for xmltag, freq in sorted(count.items(), key=lambda x: x[1]*-1):
+            print(xmltag+"\t" + str(freq))
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories", file=sys.stderr)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/foliavalidator.py b/pynlpl/tests/FoLiA/foliatools/foliavalidator.py
new file mode 100755
index 0000000..a271473
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/foliavalidator.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import getopt
+import sys
+import os
+import glob
+import traceback
+import lxml.etree
+try:
+    from pynlpl.formats import folia
+except:
+    print("ERROR: pynlpl not found, please obtain PyNLPL from the Python Package Manager ($ sudo easy_install pynlpl) or directly from github: $ git clone git://github.com/proycon/pynlpl.git", file=sys.stderr)
+    sys.exit(2)
+
+def usage():
+    print("foliavalidator", file=sys.stderr)
+    print("  by Maarten van Gompel (proycon)", file=sys.stderr)
+    print("  Radboud University Nijmegen", file=sys.stderr)
+    print("  2016 - Licensed under GPLv3", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("FoLiA " + folia.FOLIAVERSION + ", library version " + folia.LIBVERSION, file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Validates FoLiA documents.", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Usage: foliavalidator [options] file-or-dir1 file-or-dir2 ..etc..", file=sys.stderr)
+    print("", file=sys.stderr)
+    print("Parameters for processing directories:", file=sys.stderr)
+    print("  -d                           Deep validation", file=sys.stderr)
+    print("  -r                           Process recursively", file=sys.stderr)
+    print("  -q                           Quick (more shallow) validation, only validate against RelaxNG schema - do not load document in FoLiA library", file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)", file=sys.stderr)
+    print("  -V                           Show version info", file=sys.stderr)
+    print("  -t                           Treat text validation errors strictly (recommended)", file=sys.stderr)
+    print("  -i                           Ignore validation failures, always report a successful exit code", file=sys.stderr)
+
+
+
+
+
+
+
+def validate(filename, schema = None, quick=False, deep=False, stricttextvalidation=False):
+    try:
+        folia.validate(filename, schema)
+    except Exception as e:
+        print("VALIDATION ERROR against RelaxNG schema (stage 1/2), in " + filename,file=sys.stderr)
+        print(str(e), file=sys.stderr)
+        return False
+    try:
+        document = folia.Document(file=filename, deepvalidation=deep,textvalidation=True,verbose=True)
+    except folia.DeepValidationError as e:
+        print("DEEP VALIDATION ERROR on full parse by library (stage 2/2), in " + filename,file=sys.stderr)
+        print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
+        return False
+    except Exception as e:
+        print("VALIDATION ERROR on full parse by library (stage 2/2), in " + filename,file=sys.stderr)
+        print(e.__class__.__name__ + ": " + str(e),file=sys.stderr)
+        print("-- Full traceback follows -->",file=sys.stderr)
+        ex_type, ex, tb = sys.exc_info()
+        traceback.print_exception(ex_type, ex, tb)
+        return False
+    if document.textvalidationerrors:
+        if stricttextvalidation:
+            print("VALIDATION ERROR because of text validation errors, in " + filename,file=sys.stderr)
+            return False
+        else:
+            print("WARNING: there were " + str(document.textvalidationerrors) + " text validation errors but these are currently not counted toward the full validation result (use -t for strict text validation, experimental at this stage)", file=sys.stderr)
+
+    print("Validated successfully: " +  filename,file=sys.stderr)
+    return True
+
+
+
+
+def processdir(d, schema = None,quick=False,deep=False,stricttextvalidation=False):
+    success = False
+    print("Searching in  " + d,file=sys.stderr)
+    for f in glob.glob(os.path.join(d ,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension:
+            r = validate(f, schema,quick,deep,stricttextvalidation)
+        elif settings.recurse and os.path.isdir(f):
+            r = processdir(f,schema,quick,deep,stricttextvalidation)
+        if not r: success = False
+    return success
+
+
+class settings:
+    extension = 'xml'
+    recurse = False
+    encoding = 'utf-8'
+    deep = False
+    stricttextvalidation = False
+
+def main():
+    quick = False
+    nofail = False
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "E:srhdqVi", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-t':
+            settings.stricttextvalidation = True
+        elif o == '-d':
+            settings.deep = True
+        elif o == '-q':
+            quick = True
+        elif o == '-i':
+            nofail = True
+        elif o == '-V':
+            print("FoLiA " + folia.FOLIAVERSION + ", library version " + folia.LIBVERSION,file=sys.stderr)
+            sys.exit(0)
+        else:
+            raise Exception("No such option: " + o)
+
+    schema  = lxml.etree.RelaxNG(folia.relaxng())
+
+    if len(args) >= 1:
+        success = True
+        for x in sys.argv[1:]:
+            if x[0] != '-':
+                if os.path.isdir(x):
+                    r = processdir(x,schema,quick,settings.deep, settings.stricttextvalidation)
+                elif os.path.isfile(x):
+                    r = validate(x, schema,quick,settings.deep, settings.stricttextvalidation)
+                else:
+                    print("ERROR: File or directory not found: " + x,file=sys.stderr)
+                    sys.exit(3)
+                if not r: success= False
+            if not success and not nofail:
+                sys.exit(1)
+    else:
+        print("ERROR: No files specified",file=sys.stderr)
+        sys.exit(2)
+
+if __name__ == "__main__":
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/rst2folia.py b/pynlpl/tests/FoLiA/foliatools/rst2folia.py
new file mode 100755
index 0000000..57fd98f
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/rst2folia.py
@@ -0,0 +1,538 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+#---------------------------------------------------------------
+# ReStructuredText to FoLiA Converter
+#   by Maarten van Gompel
+#   Centre for Language Studies
+#   Radboud University Nijmegen
+#   proycon AT anaproy DOT nl
+#
+#   Licensed under GPLv3
+#
+# This script converts RST to FoLiA format.
+#
+#----------------------------------------------------------------
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+import glob
+import gzip
+import os
+
+from collections import defaultdict
+from copy import copy
+
+from docutils import writers, nodes
+from docutils.core import publish_cmdline, default_description
+
+try:
+    import locale
+    locale.setlocale(locale.LC_ALL, '')
+except:
+    pass
+
+LIBVERSION = "0.1"
+
+class Writer(writers.Writer):
+
+    DEFAULTID = "untitled"
+    TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
+%(stylesheet)s
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="%(docid)s" version="0.11.3" generator="docutils-rst2folia-%(libversion)s">
+<metadata type="native">
+ <annotations>
+%(declarations)s
+ </annotations>
+%(metadata)s
+</metadata>
+%(content)s
+</FoLiA>
+"""
+
+    DEFAULTSTYLESHEET = "folia2html.xsl"
+
+    DEFAULTSETS = {
+        'division': 'https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/divisions.foliaset.xml',
+        'style': 'https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/styles.foliaset.xml',
+        'note': 'https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/notes.foliaset.xml',
+        'gap': 'https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/gaps.foliaset.xml',
+        'string': None,
+    }
+
+    #Formats this writer supports
+    supported = ('folia',)
+
+    settings_spec = (
+        'FoLiA-Specific Options',
+        None,
+        (
+            ('Document ID.  Default is "%s".' % DEFAULTID, ['--docid'], {'default': DEFAULTID, 'metavar': '<string>'}),
+            ('Parent ID. Assign IDs under the specified element, this can be used to merge output back into a larger document', ['--parentid'], {'metavar': '<string>'}),
+            ('Parent Type. Assume all new elements start under an element of this type (FoLiA tag), this can be used to merge output back into a larger document, use with --parentid', ['--parenttype'], {'default': 'div', 'metavar': '<string>'}),
+            ("Excerpt only. Output only the text node and all elements under it. No standalone document, results may be inserted verbatim into a larger document if used with --parentid/--parenttype and --declare-all", ['--excerpt'], {'default': False, 'action': 'store_true'}),
+            ("Declare all possible sets, even if they're not used.", ['--declare-all'], {'default': False, 'action': 'store_true'}),
+            ("Sets. Comma separated list of annotationtype:seturl pairs. Example: division:https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/divisions.foliaset.xml", ['--sets'],{'default':""}),
+            ("Stylesheet. XSL Stylesheet to associate with the document. Defaults to '%s'" % DEFAULTSTYLESHEET, ['--stylesheet'], {'default': "folia2html.xsl",'metavar':'<string>'}),
+        )
+    )
+
+    visitor_attributes = ('declarations','metadata','content')
+
+    def translate(self):
+        sets = copy(self.DEFAULTSETS)
+        for setassignment in self.document.settings.sets.split(','):
+            if setassignment:
+                annotationtype,set = setassignment.split(':')
+                sets[annotationtype] = set
+        self.visitor =  FoLiATranslator(self.document, sets)
+        self.document.walkabout(self.visitor)
+        for attr in self.visitor_attributes:
+            setattr(self, attr, getattr(self.visitor, attr))
+        self.output = self.apply_template()
+
+    def apply_template(self):
+        subs = self.interpolation_dict()
+        if self.document.settings.excerpt:
+            return "%(content)s" % subs
+        else:
+            return self.TEMPLATE % subs
+
+    def interpolation_dict(self):
+        subs = {}
+        for attr in self.visitor_attributes:
+            subs[attr] = ''.join(getattr(self, attr)).rstrip('\n')
+        subs['encoding'] = self.document.settings.output_encoding
+        subs['libversion'] = LIBVERSION
+        subs['docid'] = self.document.settings.docid
+        subs['stylesheet'] =  "<?xml-stylesheet type=\"text/xsl\" href=\"" + self.document.settings.stylesheet + "\"?>"
+        return subs
+
+    def assemble_parts(self):
+        writers.Writer.assemble_parts(self)
+        for part in self.visitor_attributes:
+            self.parts[part] = ''.join(getattr(self, part))
+
+
+
+class FoLiATranslator(nodes.NodeVisitor):
+
+
+    def __init__(self, document, sets={}):
+        self.textbuffer = []
+        self.path = [] #(tag, id) tuples of the current FoLiA path
+        self.content = [] #will contain all XML content as strings
+        self.metadata = []
+        self.declarations = []
+        self.id_store = defaultdict( lambda: defaultdict(int) )
+        self.docid = document.settings.docid
+        self.list_enumerated = [] #contains a 2-list of boolean, int pairs, indicating whether the list is enumerated or not, and the number of items in it thus-far (used for labels), support nesting.
+        self.rootdiv = False #create a root div element?
+        self.sets = sets
+        self.declared = {}
+        self.texthandled = False
+        if document.settings.declare_all:
+            for key in self.sets:
+                self.declare(key)
+        if document.settings.parentid:
+            self.parentid = document.settings.parentid
+            self.path.append( (document.settings.parenttype, self.parentid ) )
+            self.textid = "temporary-container-only"
+        else:
+            self.textid = self.docid + ".text"
+            self.parentid = None
+        self.excerpt = document.settings.excerpt
+
+
+        nodes.NodeVisitor.__init__(self, document)
+
+    ############# HELPERS ###############
+
+    def astext(self):
+        return ''.join(self.head + self.content)
+
+    def encode(self, text):
+        """Encode special characters in `text` & return."""
+        if sys.version < '3' and not isinstance(text, unicode):
+            text = unicode(text, 'utf-8')
+        elif sys.version >= '3' and not isinstance(text, str):
+            text = str(text, 'utf-8')
+        return text.translate({
+            ord('&'): '&',
+            ord('<'): '<',
+            ord('>'): '>',
+        })
+
+    def initstructure(self, tag, **attribs):
+        """Generic visit function for structure elements"""
+        #Generate an ID
+        if tag == "text":
+            id = self.textid
+        else:
+            parenttag, parentid = self.path[-1]
+            id = self.generate_id(parentid, tag)
+        self.declare(tag)
+        self.path.append( (tag, id ) )
+        indentation = (len(self.path)-1) * " "
+        o = indentation + "<" + tag + " xml:id=\"" + id + "\""
+        if tag == "text" and self.excerpt: #this is the root of our output, add namespace stuff
+            o += ' xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink"'
+        if attribs:
+            for key, value in attribs.items():
+                if key == "cls": key = "class"
+                if sys.version < '3':
+                    o += " " + key + "=\"" + unicode(value) + "\""
+                elif sys.version >= '3':
+                    o += " " + key + "=\"" + str(value) + "\""
+        o += ">\n"
+        self.content.append(o)
+
+    def closestructure(self, tag):
+        """Generic depart function for structure elements"""
+        _tag, id = self.path.pop()
+        if not tag == _tag:
+            raise Exception("Mismatch in closestructure, expected closure for " + tag + ", got " + _tag)
+        indentation = len(self.path) * " "
+        o = ""
+        if self.textbuffer:
+            o += indentation + " <t>"  + "".join(self.textbuffer) + "</t>\n"
+        o += indentation + "</" + tag + ">\n"
+        self.textbuffer = []
+        self.content.append(o)
+
+    def generate_id(self, parentid, tag ):
+        if parentid == "temporary-container-only" and self.parentid:
+            self.id_store[self.parentid][tag] += 1
+            return self.parentid + "." + tag + "." + str(self.id_store[parentid][tag])
+        else:
+            self.id_store[parentid][tag] += 1
+            return parentid + "." + tag + "." + str(self.id_store[parentid][tag])
+
+
+    def rightsibling(self, node):
+        fetch = False
+        for sibling in node.traverse(None,1,0,1,0):
+            if sibling is node:
+                fetch = True
+            elif fetch:
+                return sibling
+        return None
+
+
+    def ignore_depart(self, node):
+        try:
+            if node.ignore_depart:
+                return True
+        except AttributeError:
+            return False
+
+    def addstyle(self,node,style):
+        self.texthandled = True
+        self.declare('style')
+        self.textbuffer.append(  '<t-style class="' + style + '">' + self.encode(node.astext()) + '</t-style>' )
+
+    def addlink(self,node,url):
+        self.texthandled = True
+        self.declare('string')
+        self.textbuffer.append(  '<t-str xlink:type="simple" xlink:href="' + url + '">' + self.encode(node.astext()) + '</t-str>' )
+
+    def addmetadata(self, key, node):
+        self.texthandled = True
+        self.metadata.append(  " <meta id=\"" + key + "\">" + self.encode(node.astext()) + "</meta>\n" )
+
+
+    def declare(self, annotationtype):
+        if annotationtype == 'div':
+            annotationtype = 'division'
+        elif annotationtype == 's':
+            annotationtype = 'sentence'
+        elif annotationtype == 'p':
+            annotationtype = 'paragraph'
+        if not annotationtype in self.declared:
+            if annotationtype in self.sets:
+                if self.sets[annotationtype]:
+                    self.declarations.append("   <" + annotationtype + "-annotation set=\"" + self.sets[annotationtype] + "\" />\n")
+                else:
+                    self.declarations.append("   <" + annotationtype + "-annotation />\n")
+                self.declared[annotationtype] = True
+
+    ############# TRANSLATION HOOKS (MAIN STRUCTURE) ################
+
+
+    def visit_document(self, node):
+        self.initstructure('text')
+
+    def depart_document(self, node):
+        if self.rootdiv:
+            self.closestructure('div')
+        self.closestructure('text')
+
+    def visit_paragraph(self, node):
+        if node.parent.__class__.__name__ == 'list_item':
+            #this paragraph is in an item, we don't want paragraphs in items unless there actually are multiple elements in the item
+            sibling = self.rightsibling(node)
+            if sibling:
+                self.initstructure('p')
+            else:
+                node.ignore_depart = True
+        else:
+            self.initstructure('p')
+
+    def depart_paragraph(self, node):
+        if not self.ignore_depart(node):
+            self.closestructure('p')
+
+    def visit_section(self, node):
+        self.initstructure('div',cls="section")
+
+    def depart_section(self, node):
+        self.closestructure('div')
+
+    def visit_title(self, node):
+        if node.parent.__class__.__name__ == 'document':
+            self.rootdiv = True
+            self.initstructure('div',cls="document" if not self.parentid else "section")
+        self.initstructure('head')
+
+    def depart_title(self, node):
+        self.closestructure('head')
+
+    def visit_bullet_list(self,node):
+        self.list_enumerated.append([False,0])
+        self.initstructure('list')
+
+    def depart_bullet_list(self,node):
+        self.list_enumerated.pop()
+        self.closestructure('list')
+
+    def visit_enumerated_list(self,node):
+        self.list_enumerated.append([True,0])
+        self.initstructure('list')
+
+    def depart_enumerated_list(self,node):
+        self.list_enumerated.pop()
+        self.closestructure('list')
+
+    def visit_list_item(self,node):
+        if self.list_enumerated[-1][0]:
+            self.list_enumerated[-1][1] += 1
+            self.initstructure('item',n=self.list_enumerated[-1][1])
+        else:
+            self.initstructure('item')
+
+    def depart_list_item(self,node):
+        self.closestructure('item')
+
+    def visit_image(self,node):
+        self.initstructure('figure',src=node['uri'])
+    def depart_image(self,node):
+        #parent figure will do the closing if image in figure
+        if node.parent.__class__.__name__ != "figure":
+            self.closestructure('figure')
+
+    def visit_figure(self,node):
+        pass
+    def depart_figure(self,node):
+        self.closestructure('figure')
+
+    def visit_caption(self,node):
+        self.initstructure('caption')
+    def depart_caption(self,node):
+        self.closestructure('caption')
+
+
+    def visit_literal_block(self,node):
+        self.initstructure('gap',cls="verbatim")
+        self.texthandled = True
+    def depart_literal_block(self,node):
+        tag = "gap"
+        _tag, id = self.path.pop()
+        if not tag == _tag:
+            raise Exception("Mismatch in closestructure, expected closure for " + tag + ", got " + _tag)
+        indentation = len(self.path) * " "
+        o = indentation + " <content><![CDATA["  + node.astext() + "]]></content>\n"
+        o += indentation + "</" + tag + ">\n"
+        self.content.append(o)
+        self.texthandled = False
+
+    def visit_block_quote(self, node):
+        self.initstructure('quote')
+    def depart_block_quote(self, node):
+        self.closestructure('quote')
+
+    ############# TRANSLATION HOOKS (TEXT & MARKUP) ################
+
+    def visit_Text(self, node):
+        if not self.texthandled:
+            self.textbuffer.append(  self.encode(node.astext()) )
+
+    def depart_Text(self, node):
+        pass
+
+    def visit_strong(self, node):
+        self.addstyle(node,"strong")
+    def depart_strong(self, node):
+        self.texthandled = False
+
+    def visit_emphasis(self, node):
+        self.addstyle(node,"emphasis")
+    def depart_emphasis(self, node):
+        self.texthandled = False
+
+    def visit_literal(self, node):
+        self.addstyle(node,"literal")
+    def depart_literal(self, node):
+        self.texthandled = False
+
+    def visit_reference(self, node):
+        self.addlink(node,node.attributes['refuri'])
+    def depart_reference(self, node):
+        self.texthandled = False
+
+    def visit_target(self, node): #TODO? Seems to work, am I missing something?
+        pass
+    def depart_target(self, node):
+        pass
+
+    def visit_comment(self, node):
+        self.texthandled = True
+    def depart_comment(self, node):
+        self.content.append("<!-- " + node.astext() + " -->\n")
+        self.texthandled = False
+
+
+    ############# TRANSLATION HOOKS (OTHER STRUCTURE) ################
+
+    def visit_footnote(self,node):
+        #TODO: handle footnote numbering:  http://code.nabla.net/doc/docutils/api/docutils/transforms/references/docutils.transforms.references.Footnotes.html
+        self.initstructure('note',cls='footnote')
+    def depart_footnote(self,node):
+        self.closestructure('note')
+
+    def visit_attention(self,node):
+        self.initstructure('note',cls='attention')
+    def depart_attention(self,node):
+        self.initstructure('note')
+
+    def visit_hint(self,node):
+        self.initstructure('note',cls='hint')
+    def depart_hint(self,node):
+        self.closestructure('note')
+
+
+    def visit_note(self,node):
+        self.initstructure('note',cls='note')
+    def depart_note(self,node):
+        self.closestructure('note')
+
+    def visit_caution(self,node):
+        self.initstructure('note',cls='caution')
+    def depart_caution(self,node):
+        self.closestructure('note')
+
+    def visit_warning(self,node):
+        self.initstructure('note',cls='warning')
+    def depart_warning(self,node):
+        self.closestructure('note')
+
+    def visit_danger(self,node):
+        self.initstructure('note',cls='danger')
+    def depart_danger(self,node):
+        self.closestructure('note')
+
+    def visit_admonition(self,node):
+        self.initstructure('note',cls='admonition')
+    def depart_admonition(self,node):
+        self.closestructure('note')
+
+    def visit_tip(self,node):
+        self.initstructure('note',cls='tip')
+    def depart_tip(self,node):
+        self.closestructure('note')
+
+    def visit_error(self,node):
+        self.initstructure('note',cls='error')
+    def depart_error(self,node):
+        self.closestructure('note')
+
+    def visit_important(self,node):
+        self.initstructure('note',cls='important')
+    def depart_important(self,node):
+        self.closestructure('note')
+    ############# TRANSLATION HOOKS (METADATA, rst-specific fields) ################
+
+    def visit_docinfo(self, node):
+        pass
+    def depart_docinfo(self, node):
+        pass
+    def visit_authors(self, node):
+        pass
+    def depart_authors(self, node):
+        pass
+
+    def visit_author(self, node):
+        self.addmetadata('author', node)
+    def depart_author(self, node):
+        self.texthandled = False
+
+    def visit_date(self, node):
+        self.addmetadata('date', node)
+
+    def depart_date(self, node):
+        self.texthandled = False
+
+    def visit_contact(self, node):
+        self.addmetadata('contact', node)
+
+    def depart_contact(self, node):
+        self.texthandled = False
+
+    def visit_status(self, node):
+        self.addmetadata('status', node)
+
+    def depart_status(self, node):
+        self.texthandled = False
+
+
+    def visit_version(self, node):
+        self.addmetadata('version', node)
+
+    def depart_version(self, node):
+        self.texthandled = False
+
+    def visit_copyright(self, node):
+        self.addmetadata('copyright', node)
+
+    def depart_copyright(self, node):
+        self.texthandled = False
+
+
+    def visit_organization(self, node):
+        self.addmetadata('organization', node)
+
+    def depart_organization(self, node):
+        self.texthandled = False
+
+
+
+    def visit_address(self, node):
+        self.addmetadata('address', node)
+
+    def depart_address(self, node):
+        self.texthandled = False
+
+
+    def visit_contact(self, node):
+        self.addmetadata('contact', node)
+
+    def depart_contact(self, node):
+        self.texthandled = False
+
+
+def main():
+    description = 'Generates FoLiA documents from reStructuredText. ' + default_description
+    publish_cmdline(writer=Writer(), writer_name='folia', description=description)
+
+if __name__ == '__main__':
+    main()
diff --git a/pynlpl/tests/FoLiA/foliatools/xslt.py b/pynlpl/tests/FoLiA/foliatools/xslt.py
new file mode 100644
index 0000000..978fa2c
--- /dev/null
+++ b/pynlpl/tests/FoLiA/foliatools/xslt.py
@@ -0,0 +1,124 @@
+# -*- coding: utf8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import lxml.etree
+import sys
+import glob
+import getopt
+import os.path
+import io
+
+def transform(xsltfilename, sourcefilename, targetfilename = None, encoding = 'utf-8'):
+    xsldir = os.path.dirname(__file__)
+    if xsltfilename[0] != '/': xsltfilename = os.path.join(xsldir, xsltfilename)
+    if not os.path.exists(xsltfilename):
+        raise Exception("XSL Stylesheet not found: " + xsltfilename)
+    elif not os.path.exists(sourcefilename):
+        raise Exception("File not found: " + sourcefilename)
+    xslt = lxml.etree.parse(xsltfilename)
+    transformer = lxml.etree.XSLT(xslt)
+    parsedsource = lxml.etree.parse(sourcefilename)
+    transformed = transformer(parsedsource)
+    if targetfilename:
+        f = io.open(targetfilename, 'w',encoding='utf-8')
+        if sys.version < '3':
+            f.write( lxml.etree.tostring(transformed, pretty_print=True, encoding=encoding) )
+        else:
+            f.write(str(lxml.etree.tostring(transformed, pretty_print=True, encoding=encoding),encoding))
+        f.close()
+    else:
+        if sys.version < '3':
+            print(lxml.etree.tostring(transformed, pretty_print=True, encoding=encoding))
+        else:
+            print(str(lxml.etree.tostring(transformed, pretty_print=True, encoding=encoding),encoding))
+
+
+def usage():
+    print(settings.usage,file=sys.stderr)
+    print("",file=sys.stderr)
+    print("Parameters for output:"        ,file=sys.stderr)
+    print("  -o [filename]                Output to file (instead of default stdout)"    ,file=sys.stderr)
+    print("  -e [encoding]                Output encoding (default: utf-8)" ,file=sys.stderr)
+    print("Parameters for processing directories:",file=sys.stderr)
+    print("  -r                           Process recursively",file=sys.stderr)
+    print("  -E [extension]               Set extension (default: xml)",file=sys.stderr)
+    print("  -q                           Ignore errors",file=sys.stderr)
+
+
+
+class settings:
+    autooutput = False
+    extension = 'xml'
+    recurse = False
+    ignoreerrors = False
+    encoding = 'utf-8'
+    xsltfilename = "undefined.xsl"
+    outputextension = 'UNDEFINED'
+    usage = "UNDEFINED"
+
+def processdir(d, outputfilename = None):
+    print("Searching in  " + d, file=sys.stderr)
+    for f in glob.glob(os.path.join(d,'*')):
+        if f[-len(settings.extension) - 1:] == '.' + settings.extension and f[-len(settings.outputextension) - 1:] != '.' + settings.outputextension:
+            outputfilename =  f[:-len(settings.extension) - 1] + '.' + settings.outputextension
+            process(f, outputfilename)
+        elif settings.recurse and os.path.isdir(f):
+            processdir(f, outputfilename)
+
+def process(inputfilename, outputfilename=None):
+    try:
+        transform(settings.xsltfilename, inputfilename, outputfilename, settings.encoding)
+    except Exception as e:
+        if settings.ignoreerrors:
+            print("ERROR: An exception was raised whilst processing " + inputfilename + ":", e, file=sys.stderr)
+        else:
+            raise
+
+
+def main(xsltfilename, outputextension, usagetext):
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "o:E:hrq", ["help"])
+    except getopt.GetoptError as err:
+        print(str(err), file=sys.stderr)
+        usage()
+        sys.exit(2)
+
+    settings.xsltfilename = xsltfilename
+    settings.outputextension = outputextension
+    settings.usage = usagetext
+
+    outputfilename = ""
+
+
+    for o, a in opts:
+        if o == '-h' or o == '--help':
+            usage()
+            sys.exit(0)
+        elif o == '-t':
+            settings.retaintokenisation = True
+        elif o == '-e':
+            settings.encoding = a
+        elif o == '-E':
+            settings.extension = a
+        elif o == '-o':
+            outputfilename = a
+        elif o == '-r':
+            settings.recurse = True
+        elif o == '-q':
+            settings.ignoreerrors = True
+        else:
+            raise Exception("No such option: " + o)
+
+    if args:
+        for x in args:
+            if os.path.isdir(x):
+                processdir(x)
+            elif os.path.isfile(x):
+                if len(sys.argv) > 2: outputfilename = outputfilename =  x[:-len(settings.extension) - 1] + '.' + settings.outputextension
+                process(x, outputfilename)
+            else:
+                print("ERROR: File or directory not found: " + x, file=sys.stderr)
+                sys.exit(3)
+    else:
+        print("ERROR: Nothing to do, specify one or more files or directories",file=sys.stderr)
diff --git a/pynlpl/tests/FoLiA/schemas/generaterng.py b/pynlpl/tests/FoLiA/schemas/generaterng.py
new file mode 100755
index 0000000..c34a1e8
--- /dev/null
+++ b/pynlpl/tests/FoLiA/schemas/generaterng.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+#-*- coding:utf-8 -*-
+
+from __future__ import print_function, unicode_literals, division, absolute_import
+
+import sys
+try:
+    from pynlpl.formats import folia
+except ImportError:
+    print("PyNLPL is required to run this script, obtain it from https://github.com/proycon/pynlpl and either install it or make sure the pynlpl/ dir is symlinked from the directory this script is in.",file=sys.stderr)
+    sys.exit(3)
+import io
+
+print("Generating Relax NG schema")
+folia.relaxng('folia-min.rng')
+
+#!/usr/bin/python
+data = io.open("folia-min.rng",'r',encoding='utf-8').readlines()
+data.insert(1,"""<!--
+RelaxNG schema for FoLiA XML v%s
+    by Maarten van Gompel
+    Induction of Linguistic Knowledge Research group
+    Tilburg University
+
+    http://ilk.uvt.nl/folia
+    http://github.com/proycon/folia
+
+    Schema version %s
+      (auto-generated by pynlpl.formats.folia)
+
+    Validation examples:
+     $ xmllint ‒‒relaxng folia.rng foliadocument.xml
+     $ jing folia.rng foliadocument.xml
+
+    However, it is recommended to use the foliavalidator tool instead,
+    as it does extra validation that can not be captured by RelaxNG!
+
+    $ foliavalidator foliadocument.xml
+
+    Licensed under the GNU General Public License v3
+-->
+""" %  (folia.FOLIAVERSION, folia.LIBVERSION))
+io.open('folia.rng','w',encoding='utf-8').writelines(data)
+
+
+print("Validating example document")
+folia.validate('../test/example.xml')
diff --git a/pynlpl/tests/FoLiA/setup.py b/pynlpl/tests/FoLiA/setup.py
new file mode 100755
index 0000000..401db93
--- /dev/null
+++ b/pynlpl/tests/FoLiA/setup.py
@@ -0,0 +1,63 @@
+#! /usr/bin/env python
+# -*- coding: utf8 -*-
+
+import os
+import io
+from setuptools import setup
+
+
+def read(fname):
+    return io.open(os.path.join(os.path.dirname(__file__), fname),'r',encoding='utf-8').read()
+
+setup(
+    name = "FoLiA-tools",
+    version = "1.4.3.56", #point to this from pynlpl/tests/folia.py
+    author = "Maarten van Gompel",
+    author_email = "proycon at anaproy.nl",
+    description = ("FoLiA-tools contains various Python-based command line tools for working with FoLiA XML (Format for Linguistic Annotation)"),
+    license = "GPL",
+    keywords = "nlp computational_linguistics search linguistics toolkit folia pynlpl",
+    url = "https://github.com/proycon/folia",
+    packages=['foliatools'],
+    long_description=read('README.rst'),
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Topic :: Text Processing :: Linguistic",
+        "Programming Language :: Python :: 2.7",
+        "Programming Language :: Python :: 3",
+        "Operating System :: POSIX",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+    ],
+    entry_points = {
+        'console_scripts': [
+            'folia2txt = foliatools.folia2txt:main',
+            'folia2annotatedtxt = foliatools.folia2annotatedtxt:main',
+            'foliafreqlist = foliatools.foliafreqlist:main',
+            'foliavalidator = foliatools.foliavalidator:main',
+            'foliamerge = foliatools.foliamerge:main',
+            'folia2columns = foliatools.folia2columns:main',
+            'folia2dcoi = foliatools.folia2dcoi:main',
+            'folia2html = foliatools.folia2html:main',
+            'foliaquery = foliatools.foliaquery:main',
+            'foliaquery1 = foliatools.foliaquery1:main', #old version
+            'foliatextcontent = foliatools.foliatextcontent:main',
+            'dcoi2folia = foliatools.dcoi2folia:main',
+            'rst2folia = foliatools.rst2folia:main',
+            'foliacat = foliatools.foliacat:main',
+            'folia2rst = foliatools.folia2rst:main',
+            'foliacorrect = foliatools.foliacorrect:main',
+            'foliacount = foliatools.foliacount:main',
+            'foliaid = foliatools.foliaid:main',
+            'foliaspec = foliatools.foliaspec:main',
+            'foliaspec2json = foliatools.foliaspec2json:main',
+            'alpino2folia = foliatools.alpino2folia:main',
+            'foliatree = foliatools.foliatree:main',
+            'foliasetdefinition = foliatools.foliasetdefinition:main',
+        ]
+    },
+    #include_package_data=True,
+    package_data = {'foliatools': ['*.xsl', '../schemas/*.yml','../schemas/*.rng']},
+    install_requires=['pynlpl >= 1.1.10', 'lxml >= 2.2','docutils']
+)
diff --git a/pynlpl/tests/evaluation.py b/pynlpl/tests/evaluation.py
index aa43aaf..dfe9ae2 100755
--- a/pynlpl/tests/evaluation.py
+++ b/pynlpl/tests/evaluation.py
@@ -22,7 +22,7 @@ import os
 import unittest
 import random
 
-from pynlpl.evaluation import AbstractExperiment, WPSParamSearch, ExperimentPool, ClassEvaluation
+from pynlpl.evaluation import AbstractExperiment, WPSParamSearch, ExperimentPool, ClassEvaluation, OrdinalEvaluation
 
 class ParamExperiment(AbstractExperiment):
     def defaultparameters(self):
@@ -81,7 +81,18 @@ class ClassEvaluationTest2(unittest.TestCase):
         print()
         print(e)
         print(e.confusionmatrix())
-    
+
+class OrdinalEvaluationTest(unittest.TestCase):
+    def setUp(self):
+        self.goals = [1,2,3,4,3,2]
+        self.observations = [4,1,3,4,2,2]
+
+    def test001(self):
+        oe = OrdinalEvaluation(self.goals,self.observations)
+        print(oe.mae())
+        print(oe.mae(2))
+        print(oe.rmse())
+        print(oe.rmse(4))
     
 class ClassEvaluationTest(unittest.TestCase):
     def setUp(self):
diff --git a/pynlpl/tests/folia.py b/pynlpl/tests/folia.py
index 870b534..66b3b21 100755
--- a/pynlpl/tests/folia.py
+++ b/pynlpl/tests/folia.py
@@ -17,15 +17,6 @@ from __future__ import print_function
 from __future__ import unicode_literals
 from __future__ import division
 from __future__ import absolute_import
-from pynlpl.common import u, isstring
-import sys
-if sys.version < '3':
-    from codecs import getwriter
-    stderr = getwriter('utf-8')(sys.stderr)
-    stdout = getwriter('utf-8')(sys.stdout)
-else:
-    stderr = sys.stderr
-    stdout = sys.stdout
 
 import sys
 import os
@@ -34,18 +25,34 @@ import io
 import gzip
 import bz2
 import re
+from datetime import datetime
+import lxml.objectify
+from pynlpl.common import u, isstring
+from pynlpl.formats import folia
+if sys.version < '3':
+    from codecs import getwriter
+    stderr = getwriter('utf-8')(sys.stderr)
+    stdout = getwriter('utf-8')(sys.stdout)
+else:
+    stderr = sys.stderr
+    stdout = sys.stdout
 
+FOLIARELEASE = "v1.5.1.59"
+#FOLIARELEASE = None #development version, do *NOT* release if this is set!
 
-FOLIARELEASE = "v1.4.0.53"
-
-if os.path.exists('../../FoLiA'):
+if os.path.exists('../../../FoLiA'):
+    FOLIAPATH = '../../../FoLiA/'
+elif os.path.exists('../../FoLiA'):
     FOLIAPATH = '../../FoLiA/'
 elif os.path.exists('../FoLiA'):
     FOLIAPATH = '../FoLiA/'
 else:
     FOLIAPATH = 'FoLiA'
     print("Downloading FoLiA",file=sys.stderr)
-    os.system("git clone https://github.com/proycon/folia.git FoLiA && cd FoLiA && git checkout tags/" + FOLIARELEASE + ' && cd ..')
+    if FOLIARELEASE:
+        os.system("git clone https://github.com/proycon/folia.git FoLiA && cd FoLiA && git checkout tags/" + FOLIARELEASE + ' && cd ..')
+    else:
+        os.system("git clone https://github.com/proycon/folia.git FoLiA")
 
 if 'TMPDIR' in os.environ:
     TMPDIR = os.environ['TMPDIR']
@@ -56,9 +63,6 @@ if sys.version < '3':
     from StringIO import StringIO
 else:
     from io import StringIO, BytesIO
-from datetime import datetime
-import lxml.objectify
-from pynlpl.formats import folia
 if folia.LXE:
     from lxml import etree as ElementTree
 else:
@@ -95,7 +99,6 @@ class Test1Read(unittest.TestCase):
 
     def test1_readfromfile(self):
         """Reading from file"""
-        global FOLIAEXAMPLE
         #write example to file
         f = io.open(os.path.join(TMPDIR,'foliatest.xml'),'w',encoding='utf-8')
         f.write(FOLIAEXAMPLE)
@@ -110,7 +113,6 @@ class Test1Read(unittest.TestCase):
 
     def test1a_readfromfile(self):
         """Reading from GZ file"""
-        global FOLIAEXAMPLE
         #write example to file
         f = gzip.GzipFile(os.path.join(TMPDIR,'foliatest.xml.gz'),'w')
         f.write(FOLIAEXAMPLE.encode('utf-8'))
@@ -126,7 +128,6 @@ class Test1Read(unittest.TestCase):
 
     def test1b_readfromfile(self):
         """Reading from BZ2 file"""
-        global FOLIAEXAMPLE
         #write example to file
         f = bz2.BZ2File(os.path.join(TMPDIR,'foliatest.xml.bz2'),'w')
         f.write(FOLIAEXAMPLE.encode('utf-8'))
@@ -142,19 +143,16 @@ class Test1Read(unittest.TestCase):
 
     def test2_readfromstring(self):
         """Reading from string (unicode)"""
-        global FOLIAEXAMPLE
         doc = folia.Document(string=FOLIAEXAMPLE)
         self.assertTrue(isinstance(doc,folia.Document))
 
-    def test2_readfromstring(self):
+    def test2b_readfromstring(self):
         """Reading from string (bytes)"""
-        global FOLIAEXAMPLE
         doc = folia.Document(string=FOLIAEXAMPLE.encode('utf-8'))
         self.assertTrue(isinstance(doc,folia.Document))
 
     def test3_readfromstring(self):
         """Reading from pre-parsed XML tree (as unicode(Py2)/str(Py3) obj)"""
-        global FOLIAEXAMPLE
         if sys.version < '3':
             doc = folia.Document(tree=ElementTree.parse(StringIO(FOLIAEXAMPLE.encode('utf-8'))))
         else:
@@ -164,7 +162,6 @@ class Test1Read(unittest.TestCase):
 
     def test4_readdcoi(self):
         """Reading D-Coi file"""
-        global DCOIEXAMPLE
         doc = folia.Document(string=DCOIEXAMPLE)
         #doc = folia.Document(tree=lxml.etree.parse(StringIO(DCOIEXAMPLE.encode('iso-8859-15'))))
         self.assertTrue(isinstance(doc,folia.Document))
@@ -173,7 +170,7 @@ class Test1Read(unittest.TestCase):
 class Test2Sanity(unittest.TestCase):
 
     def setUp(self):
-        self.doc = folia.Document(string=FOLIAEXAMPLE)
+        self.doc = folia.Document(string=FOLIAEXAMPLE, textvalidation=True)
 
     def test000_count_text(self):
         """Sanity check - One text """
@@ -205,7 +202,7 @@ class Test2Sanity(unittest.TestCase):
         self.assertEqual( w.text() , "Stemma" )
         self.assertEqual( str(w) , "Stemma" ) #should be unicode object also in Py2!
         if sys.version < '3':
-            self.assertEqual( unicode(w) , "Stemma" )
+            self.assertEqual( unicode(w) , "Stemma" ) #pylint: disable=undefined-variable
 
 
     def test005_last_word(self):
@@ -224,7 +221,7 @@ class Test2Sanity(unittest.TestCase):
         self.assertTrue( isinstance(s, folia.Sentence) )
         self.assertEqual( s.id, 'WR-P-E-J-0000000001.p.1.s.1' )
         self.assertFalse( s.hastext() )
-        self.assertEqual( str(s), "Stemma is een ander woord voor stamboom ." )
+        self.assertEqual( str(s), "Stemma is een ander woord voor stamboom." )
 
     def test006b_sentencetest(self):
         """Sanity check - Sentence text (including retaining tokenisation)"""
@@ -557,7 +554,7 @@ class Test2Sanity(unittest.TestCase):
         pos = w.annotation(folia.PosAnnotation)
         self.assertEqual( pos.datetime, datetime(2011, 7, 20, 19, 0, 1) )
 
-        self.assertTrue( xmlcheck(pos.xmlstring(), '<pos xmlns="http://ilk.uvt.nl/folia" class="N(soort,ev,basis,zijd,stan)" datetime="2011-07-20T19:00:01"/>') )
+        self.assertTrue( xmlcheck(pos.xmlstring(), '<pos xmlns="http://ilk.uvt.nl/folia" set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="N(soort,ev,basis,zijd,stan)" datetime="2011-07-20T19:00:01"/>') )
 
     def test028_wordparents(self):
         """Sanity Check - Finding parents of word"""
@@ -595,7 +592,7 @@ class Test2Sanity(unittest.TestCase):
         """Sanity check - Text Content"""
         s = self.doc['WR-P-E-J-0000000001.p.1.s.4']
 
-        self.assertEqual( s.text(), 'De hoofdletter A wordt gebruikt voor het originele handschrift .')
+        self.assertEqual( s.text(), 'De hoofdletter A wordt gebruikt voor het originele handschrift.')
         self.assertEqual( s.stricttext(), 'De hoofdletter A wordt gebruikt voor het originele handschrift.')
         self.assertEqual( s.textcontent().text(), 'De hoofdletter A wordt gebruikt voor het originele handschrift.')
         self.assertEqual( s.textcontent('original').text(), 'De hoofdletter A wordt gebruikt voor het originele handschrift.')
@@ -618,12 +615,23 @@ class Test2Sanity(unittest.TestCase):
         s = self.doc['sandbox.3.head']
         t = s.textcontent()
         self.assertEqual( len(t), 3)
-        self.assertEqual( t.text(), "De FoLiA developers zijn:")
+        self.assertEqual( t.text(), "De \nFoLiA developers zijn:")
         self.assertEqual( t[0], "De ")
         self.assertTrue( isinstance(t[1], folia.TextMarkupString) )
-        self.assertEqual( t[1].text(), "FoLiA developers")
+        self.assertEqual( t[1].text(), "\nFoLiA developers")
         self.assertEqual( t[2], " zijn:")
 
+    def test030c_textclassattrib(self):
+        """Sanity check - Text class attribute"""
+        w = self.doc['WR-P-E-J-0000000001.p.1.s.4.w.5']
+        self.assertEqual( w.annotation(folia.PosAnnotation).textclass , 'original')
+        self.assertEqual( w.annotation(folia.LemmaAnnotation).textclass , 'original')
+
+    def test030d_textclassattrib_default(self):
+        """Sanity check - Text class attribute (default)"""
+        w = self.doc['WR-P-E-J-0000000001.p.1.s.4.w.4']
+        self.assertEqual( w.annotation(folia.PosAnnotation).textclass , 'current')
+        self.assertEqual( w.annotation(folia.LemmaAnnotation).textclass , 'current')
 
     def test031_sense(self):
         """Sanity Check - Lexical Semantic Sense Annotation"""
@@ -825,7 +833,7 @@ class Test2Sanity(unittest.TestCase):
         self.assertEqual( t.count(folia.TextMarkupString), 1)
 
         st = next(t.select(folia.TextMarkupString))
-        self.assertEqual( st.text(), "FoLiA developers" ) #testing value (full text value)
+        self.assertEqual( st.text(), "\nFoLiA developers" ) #testing value (full text value)
 
         self.assertEqual( st.resolve(), self.doc['sandbox.3.str']) #testing resolving references
 
@@ -837,7 +845,7 @@ class Test2Sanity(unittest.TestCase):
         self.assertEqual( st[0], self.doc['sandbox.3.str.bold'])
 
         #testing TextMarkup.text()
-        self.assertEqual( st[0].text(), 'FoLiA' )
+        self.assertEqual( st[0].text(), '\nFoLiA' )
 
         #resolving returns self if it's not a reference
         self.assertEqual( self.doc['sandbox.3.str.bold'].resolve(), self.doc['sandbox.3.str.bold'])
@@ -865,7 +873,8 @@ class Test2Sanity(unittest.TestCase):
     def test046a_text(self):
         """Sanity Check - Text serialisation test with linebreaks and whitespaces"""
         p = self.doc['WR-P-E-J-0000000001.p.1'] #this is a bit of a malformed paragraph due to the explicit whitespace and linebreaks in it, but makes for a nice test:
-        self.assertEqual( p.text(), "Stemma is een ander woord voor stamboom . In de historische wetenschap wordt zo'n stamboom , onder de naam stemma codicum ( handschriftelijke genealogie ) , gebruikt om de verwantschap tussen handschriften weer te geven . \n\nWerkwijze\n\nHiervoor worden de handschriften genummerd en gedateerd zodat ze op de juiste plaats van hun afstammingsgeschiedenis geplaatst kunnen worden . De hoofdletter A wordt gebruikt voor het originele handschrift . De ander [...]
+        self.maxDiff = 3000
+        self.assertEqual( p.text(), "Stemma is een ander woord voor stamboom. In de historische wetenschap wordt zo'n stamboom , onder de naam stemma codicum ( handschriftelijke genealogie ) , gebruikt om de verwantschap tussen handschriften weer te geven . \n\nWerkwijze\n\nHiervoor worden de handschriften genummerd en gedateerd zodat ze op de juiste plaats van hun afstammingsgeschiedenis geplaatst kunnen worden . De hoofdletter A wordt gebruikt voor het originele handschrift. De andere  [...]
 
 
     def test046b_text(self):
@@ -955,7 +964,7 @@ class Test2Sanity(unittest.TestCase):
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
         doc = folia.Document(string=xml)
         self.assertEqual( doc.metadatatype, folia.MetaDataType.CMDI )
-        self.assertEqual( doc.metadatafile, 'test.cmdi.xml' )
+        self.assertEqual( doc.metadata.url, 'test.cmdi.xml' )
 
     def test101b_metadataextref2(self):
         """Sanity Check - Metadata external reference (IMDI)"""
@@ -970,7 +979,7 @@ class Test2Sanity(unittest.TestCase):
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
         doc = folia.Document(string=xml)
         self.assertEqual( doc.metadatatype, folia.MetaDataType.IMDI )
-        self.assertEqual( doc.metadatafile, 'test.imdi.xml' )
+        self.assertEqual( doc.metadata.url , 'test.imdi.xml' )
 
     def test101c_metadatainternal(self):
         """Sanity Check - Metadata internal (foreign data) (Dublin Core)"""
@@ -1024,6 +1033,28 @@ class Test2Sanity(unittest.TestCase):
         self.assertEqual( doc.metadata.node.xpath('//dc:creator', namespaces={'dc':'http://purl.org/dc/elements/1.1/'})[0].text , 'proycon' )
         xmlcheck(doc.xmlstring(), xml)
 
+    def test101e_metadatalegacyimdi(self):
+        """Sanity Check - Legacy inline IMDI metadata"""
+        #adapted from foliatests/tests/folia.imdi.xml
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="imdi">
+    <annotations>
+      <event-annotation set="test"/>
+    </annotations>
+    <imdi:METATRANSCRIPT xmlns:imdi="http://www.mpi.nl/IMDI/Schema/IMDI">
+      <imdi:Session>
+	<imdi:Title>Een imdi file</imdi:Title>
+	<imdi:Date>28/09/2017</imdi:Date>
+      </imdi:Session>
+    </imdi:METATRANSCRIPT>
+  </metadata>
+  <text xml:id="test.text"/>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual( doc.metadatatype, "imdi" )
+        self.assertEqual( doc.metadata.node.xpath('//imdi:Title', namespaces={'imdi':'http://www.mpi.nl/IMDI/Schema/IMDI'})[0].text , 'Een imdi file' )
+
     def test102a_declarations(self):
         """Sanity Check - Declarations - Default set"""
         xml = """<?xml version="1.0"?>\n
@@ -1072,7 +1103,9 @@ class Test2Sanity(unittest.TestCase):
     <gap class="X" set="extended-gap-set" />
   </text>
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
-        self.assertRaises( ValueError,  folia.Document, string=xml)
+        with self.assertRaises( folia.ParseError) as cm:
+            folia.Document(string=xml)
+        self.assertEqual(cm.exception.cause.__class__, ValueError)
 
 
     def test102c_declarations(self):
@@ -1109,7 +1142,9 @@ class Test2Sanity(unittest.TestCase):
     <gap class="Y" />
   </text>
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
-        self.assertRaises(ValueError,  folia.Document, string=xml )
+        with self.assertRaises( folia.ParseError) as cm:
+            folia.Document(string=xml)
+        self.assertEqual(cm.exception.cause.__class__, ValueError)
 
 
 
@@ -1130,7 +1165,9 @@ class Test2Sanity(unittest.TestCase):
     <gap class="Y" set="gip-set"/>
   </text>
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
-        self.assertRaises(ValueError,  folia.Document, string=xml )
+        with self.assertRaises( folia.ParseError) as cm:
+            folia.Document(string=xml)
+        self.assertEqual(cm.exception.cause.__class__, ValueError)
 
     def test102d3_declarations(self):
         """Sanity Check - Declarations - Ignore Duplicates"""
@@ -1164,7 +1201,9 @@ class Test2Sanity(unittest.TestCase):
     <gap class="X" set="extended-gap-set" />
   </text>
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
-        self.assertRaises( ValueError,  folia.Document, string=xml)
+        with self.assertRaises( folia.ParseError) as cm:
+            folia.Document(string=xml)
+        self.assertEqual(cm.exception.cause.__class__, ValueError)
 
     def test102f_declarations(self):
         """Sanity Check - Declarations - Declaration not needed"""
@@ -1178,7 +1217,7 @@ class Test2Sanity(unittest.TestCase):
     <gap />
   </text>
 </FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
-        doc = folia.Document(string=xml)
+        folia.Document(string=xml)
 
 
     def test102g_declarations(self):
@@ -1216,7 +1255,7 @@ class Test2Sanity(unittest.TestCase):
 
 
     def test102i_declarations(self):
-        """Sanity Check - Declarations - miscellanious trouble"""
+        """Sanity Check - Declarations - miscellaneous trouble"""
         xml = """<?xml version="1.0"?>\n
 <FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
   <metadata type="native">
@@ -1324,6 +1363,59 @@ class Test2Sanity(unittest.TestCase):
 
         self.assertEqual( next(doc["example.text.1"].select(folia.Gap)).datetime ,  folia.parse_datetime('2011-12-15T19:00') )
 
+    def test102m_declarations(self):
+        """Sanity Check - Declarations - Adding a declaration of a FoLiA v1.4 RDF Set Definition."""
+        xml = """<?xml version="1.0"?>\n
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text.1">
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        doc.declare(folia.AnnotationType.ENTITY, "https://github.com/proycon/folia/blob/master/setdefinitions/namedentities.foliaset.ttl", annotator='proycon' )
+
+    def test102n_aliases(self):
+        """Sanity Check - Declarations - Testing Aliases"""
+        xml = """<?xml version="1.0"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">n"
+    <annotations>n"
+      <gap-annotation set="some very convoluted url or such which clutters all" alias="gap-set" datetime="2012-06-18T17:49"/>
+      <division-annotation set="a long div annotation name" alias="div-set" datetime="2012-06-18T17:49"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text.1">
+    <gap class="X" />
+    <gap class="Y" datetime="2012-06-18T17:50"/>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+
+        doc = folia.Document(string=xml)
+        doc.declare(folia.AnnotationType.GAP, "nog zon ingewikkelde en veels te lange declaratie", alias='gap-set2' )
+        self.doc.xmlstring() #check if serialisation works
+
+        #declaring a setname which is already an alias is an error
+        self.assertRaises( ValueError,  doc.declare, folia.AnnotationType.GAP, "gap-set2")
+
+        #declaring an alias  which is already an alias is an error
+        self.assertRaises( ValueError,  doc.declare, folia.AnnotationType.GAP, "gap-set3", alias="gap-set2")
+
+        #declaring an alias  which is already a setname is an error
+        self.assertRaises( ValueError,  doc.declare, folia.AnnotationType.GAP, "gap-set3", alias="nog zon ingewikkelde en veels te lange declaratie")
+
+        #just declaring again is NOT an error!
+        doc.declare(folia.AnnotationType.GAP, "nog zon ingewikkelde en veels te lange declaratie", alias='gap-set2' )
+
+        self.doc.xmlstring() #check if serialisation still works
+
+        #declaring again with another alias IS an error!
+        self.assertRaises(ValueError, doc.declare,folia.AnnotationType.GAP, "nog zon ingewikkelde en veels te lange declaratie", alias='gap-set3' )
+
+        #declaring again with same alias and another setname IS an error!
+        self.assertRaises(ValueError, doc.declare, folia.AnnotationType.GAP, "niet zon ingewikkelde en veels te lange declaratie", alias='gap-set2' )
 
 
 
@@ -1490,13 +1582,109 @@ class Test2Sanity(unittest.TestCase):
         alignments = list(ca[0].select(folia.Alignment))
         self.assertEqual(len(alignments),2)
 
+    def test106_submetadata(self):
+        """Sanity Check - Submetadata"""
+        self.assertEqual(self.doc['WR-P-E-J-0000000001.div0.1'].getmetadata(), self.doc.submetadata['wikipedia.stemma'])
+        self.assertTrue(isinstance(self.doc['WR-P-E-J-0000000001.div0.1'].getmetadata(), folia.NativeMetaData))
+        self.assertEqual(self.doc.submetadatatype[self.doc['WR-P-E-J-0000000001.div0.1'].metadata], 'native')
+        self.assertEqual(self.doc['WR-P-E-J-0000000001.div0.1'].getmetadata('originalsource'), 'https://nl.wikipedia.org/wiki/Stemma')
+        self.assertEqual(self.doc['WR-P-E-J-0000000001.p.1.s.1.w.1'].getmetadata(), self.doc.submetadata['wikipedia.stemma'])
+        self.assertEqual(self.doc['sandbox.3'].getmetadata(), self.doc.submetadata['sandbox.3.metadata'])
+        self.assertEqual(self.doc['sandbox.3'].getmetadata('author'), 'proycon')
+        self.assertEqual(self.doc['example.table.1.w.1'].getmetadata(), self.doc.submetadata['sandbox.3.metadata'])
+
+    def test107a_submetadataextref(self):
+        """Sanity Check - Submetadata external reference (CMDI)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+<metadata type="native">
+    <annotations>
+    </annotations>
+    <submetadata xml:id="test.metadata" src="test.cmdi.xml" type="cmdi" />
+</metadata>
+<text xml:id="test.text" metadata="test.metadata" />
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual( doc.submetadatatype['test.metadata'], 'cmdi')
+        self.assertTrue( isinstance(doc['test.text'].getmetadata(), folia.ExternalMetaData) )
+        self.assertEqual( doc['test.text'].getmetadata().url, 'test.cmdi.xml' )
+
+
+    def test107b_metadatainternal(self):
+        """Sanity Check - Submetadata internal (foreign data) (Dublin Core)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+<metadata>
+  <annotations>
+  </annotations>
+  <submetadata xml:id="test.metadata" type="dc">
+      <foreign-data xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <dc:identifier>mydoc</dc:identifier>
+        <dc:format>text/xml</dc:format>
+        <dc:type>Example</dc:type>
+        <dc:contributor>proycon</dc:contributor>
+        <dc:creator>proycon</dc:creator>
+        <dc:language>en</dc:language>
+        <dc:publisher>Radboud University</dc:publisher>
+        <dc:rights>public Domain</dc:rights>
+      </foreign-data>
+    </submetadata>
+</metadata>
+<text xml:id="test.text" metadata="test.metadata" />
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual( doc.submetadatatype['test.metadata'], 'dc')
+        self.assertTrue( isinstance(doc['test.text'].getmetadata(), folia.ForeignData) )
+
+    def test108_text_with_comment(self):
+        """Sanity Check - Text with XML comment"""
+        xml = """<?xml version="1.0"?>\n
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text.1">
+   <t><!-- Comment -->This is the real text</t>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual(doc['example.text.1'].text(),"This is the real text")
+
+    def test108b_text_with_comment(self):
+        """Sanity Check - Text with XML comment"""
+        xml = """<?xml version="1.0"?>\n
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text.1">
+   <t>This is the real text<!-- Comment --></t>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual(doc['example.text.1'].text(),"This is the real text")
 
+    def test108c_text_with_comment(self):
+        """Sanity Check - Text with FoLiA comment"""
+        xml = """<?xml version="1.0"?>\n
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text.1">
+   <t>This is the real text<comment annotator="pkampschreur" annotatortype="manual" datetime="2017-11-01T20:55:50">Overbodig</comment></t>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml)
+        self.assertEqual(doc['example.text.1'].text(),"This is the real text")
 
 class Test4Edit(unittest.TestCase):
 
     def setUp(self):
-        global FOLIAEXAMPLE
-        self.doc = folia.Document(string=FOLIAEXAMPLE)
+        self.doc = folia.Document(string=FOLIAEXAMPLE, textvalidation=True)
 
     def test001_addsentence(self):
         """Edit Check - Adding a sentence to first paragraph (verbose)"""
@@ -1561,7 +1749,7 @@ class Test4Edit(unittest.TestCase):
         s.append(folia.Word,'een')
         s.append(folia.Word,'nieuwe')
         w = s.append(folia.Word,'zin')
-        w2 = s.append(folia.Word,'.',cls='PUNCTUATION')
+        s.append(folia.Word,'.',cls='PUNCTUATION')
 
         self.assertEqual( s.id, 'WR-P-E-J-0000000001.p.1.s.9')
         self.assertEqual( len(list(s.words())), 6 ) #number of words in sentence
@@ -1590,7 +1778,7 @@ class Test4Edit(unittest.TestCase):
         s.add(folia.Word,'een')
         s.add(folia.Word,'nieuwe')
         w = s.add(folia.Word,'zin')
-        w2 = s.add(folia.Word,'.',cls='PUNCTUATION')
+        s.add(folia.Word,'.',cls='PUNCTUATION')
 
         self.assertEqual( len(list(s.words())), 6 ) #number of words in sentence
         self.assertEqual( w.text(), 'zin' ) #text check
@@ -1681,18 +1869,18 @@ class Test4Edit(unittest.TestCase):
 
         #add a pos annotation without specifying a set (should take default set), but this will clash with existing tag!
 
-        self.assertRaises( folia.DuplicateAnnotationError, w.append, folia.PosAnnotation(self.doc,  cls='N', annotator='testscript', annotatortype=folia.AnnotatorType.AUTO) )
+        self.assertRaises( folia.DuplicateAnnotationError, w.append, folia.PosAnnotation(self.doc,  cls='N', annotator='testscript', annotatortype=folia.AnnotatorType.AUTO, set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" ) )
         self.assertRaises( folia.DuplicateAnnotationError, w.append, folia.LemmaAnnotation(self.doc, cls='naam', annotator='testscript', annotatortype=folia.AnnotatorType.AUTO ) )
 
     def test005_addalternative(self):
         """Edit Check - Adding an alternative token annotation"""
         w = self.doc['WR-P-E-J-0000000001.p.1.s.2.w.11']
-        w.append( folia.Alternative(self.doc, generate_id_in=w, contents=folia.PosAnnotation(self.doc, cls='V')))
+        w.append( folia.Alternative(self.doc, generate_id_in=w, contents=folia.PosAnnotation(self.doc, cls='V', set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" )))
 
         #reobtaining it:
         alt = list(w.alternatives()) #all alternatives
 
-        set = self.doc.defaultset(folia.AnnotationType.POS)
+        set = "https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" #pylint: disable=redefined-builtin
 
         alt2 = list(w.alternatives(folia.PosAnnotation, set))
 
@@ -1701,7 +1889,7 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( len(alt2),1 )
         self.assertTrue( isinstance(alt[0].annotation(folia.PosAnnotation, set), folia.PosAnnotation) )
 
-        self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.2.w.11"><t>naam</t><pos class="N(soort,ev,basis,zijd,stan)"/><lemma class="naam"/><alt xml:id="WR-P-E-J-0000000001.p.1.s.2.w.11.alt.1" auth="no"><pos class="V"/></alt></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.2.w.11"><t>naam</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="N(soort,ev,basis,zijd,stan)"/><lemma class="naam"/><alt xml:id="WR-P-E-J-0000000001.p.1.s.2.w.11.alt.1" auth="no"><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="V"/></alt></w>'))
 
 
     def test006_addcorrection(self):
@@ -1713,7 +1901,7 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( w.annotation(folia.Correction).new(0).text() ,'stippellijn' )
         self.assertEqual( w.text(), 'stippellijn')
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><t>stippellijn</t></new><original auth="no"><t>stippelijn</t></original></correction></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><t>stippellijn</t></new><original auth="no"><t>stippelijn</t></origin [...]
 
     def test006b_addcorrection(self):
         """Edit Check - Correcting Text (2)"""
@@ -1724,19 +1912,19 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( w.annotation(folia.Correction).new(0).text() ,'stippellijn' )
         self.assertEqual( w.text(), 'stippellijn')
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><t>stippellijn</t></new><original auth="no"><t>stippelijn</t></original></correction></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><t>stippellijn</t></new><original auth="no"><t>stippelijn</t></origin [...]
 
     def test007_addcorrection2(self):
         """Edit Check - Correcting a Token Annotation element"""
         w = self.doc.index['WR-P-E-J-0000000001.p.1.s.8.w.11'] #stippelijn
         oldpos = w.annotation(folia.PosAnnotation)
-        newpos = folia.PosAnnotation(self.doc, cls='N(soort,ev,basis,zijd,stan)')
+        newpos = folia.PosAnnotation(self.doc, cls='N(soort,ev,basis,zijd,stan)', set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" )
         w.correct(original=oldpos,new=newpos, set='corrections',cls='spelling',annotator='testscript', annotatortype=folia.AnnotatorType.AUTO)
 
         self.assertEqual( w.annotation(folia.Correction).original(0) ,oldpos )
         self.assertEqual( w.annotation(folia.Correction).new(0),newpos )
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><pos class="N(soort,ev,basis,zijd,stan)"/></new><original auth="no"><pos class="FOUTN(soort,ev,basis,zijd,stan)"/></original></correction><lemma class="stippelijn"/></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><new><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="N(soort,ev,basis,zijd,stan)"/></new><original auth="no"><pos set="https://raw.githubusercontent.com/proycon/folia [...]
 
     def test008_addsuggestion(self):
         """Edit Check - Suggesting a text correction"""
@@ -1747,7 +1935,7 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( w.annotation(folia.Correction).suggestions(0).text() , 'stippellijn' )
         self.assertEqual( w.text(), 'stippelijn')
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><pos class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><suggestion auth="no"><t>stippellijn</t></suggestion></correction></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotatortype="auto" annotator="testscript"><suggestion auth="no"><t>stippellijn</t></suggestion></co [...]
 
     def test009a_idclash(self):
         """Edit Check - Checking for exception on adding a duplicate ID"""
@@ -1782,7 +1970,7 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( pos.parent, w)
         self.assertEqual( pos.doc, w.doc)
 
-        self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><pos class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><pos class="N" set="fakecgn"/></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><t>stippelijn</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><pos class="N" set="fakecgn"/></w>'))
 
     def test011_subtokenannot(self):
         """Edit Check - Adding morphemes"""
@@ -1803,7 +1991,7 @@ class Test4Edit(unittest.TestCase):
 
 
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.5.w.3"><t>handschriften</t><pos class="N(soort,mv,basis)"/><lemma class="handschrift"/><morphology><morpheme function="lexical" class="stem"><t offset="0">handschrift</t><lemma class="handschrift"/></morpheme><morpheme function="inflexional" class="suffix"><t offset="11">en</t></morpheme></morphology></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.5.w.3"><t>handschriften</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="N(soort,mv,basis)"/><lemma class="handschrift"/><morphology><morpheme function="lexical" class="stem"><t offset="0">handschrift</t><lemma class="handschrift"/></morpheme><morpheme function="inflexional" class="suffix"><t offset="11">en</t></mor [...]
 
     def test012_alignment(self):
         """Edit Check - Adding Alignment"""
@@ -1816,7 +2004,7 @@ class Test4Edit(unittest.TestCase):
         self.assertEqual( next(a.resolve()), self.doc['WR-P-E-J-0000000001.p.1.s.6.w.1'] )
         self.assertEqual( list(a.resolve())[1], self.doc['WR-P-E-J-0000000001.p.1.s.6.w.2'] )
 
-        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.6.w.8"><t>ze</t><pos class="VNW(pers,pron,stan,red,3,mv)"/><lemma class="ze"/><alignment class="coreference"><aref type="w" id="WR-P-E-J-0000000001.p.1.s.6.w.1"/><aref type="w" id="WR-P-E-J-0000000001.p.1.s.6.w.2"/></alignment></w>'))
+        self.assertTrue( xmlcheck(w.xmlstring(),'<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.6.w.8"><t>ze</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="VNW(pers,pron,stan,red,3,mv)"/><lemma class="ze"/><alignment class="coreference"><aref type="w" id="WR-P-E-J-0000000001.p.1.s.6.w.1"/><aref type="w" id="WR-P-E-J-0000000001.p.1.s.6.w.2"/></alignment></w>'))
 
 
 
@@ -1859,8 +2047,7 @@ class Test4Edit(unittest.TestCase):
         entity = word.add(folia.Entity, word, word2, cls="misc",set="http://raw.github.com/proycon/folia/master/setdefinitions/namedentities.foliaset.xml")
 
         self.assertIsInstance(entity, folia.Entity)
-        self.assertTrue(xmlcheck(entity.parent.parent.xmlstring(),'<part xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.4.part.1"><w xml:id="WR-P-E-J-0000000001.p.1.s.4.w.1"><t offset="0">De</t><t class="original" offset="0">De</t><pos class="LID(bep,stan,rest)"/><lemma class="de"/></w><w xml:id="WR-P-E-J-0000000001.p.1.s.4.w.2"><t offset="3">hoofdletter</t><pos class="N(soort,ev,basis,zijd,stan)"/><lemma class="hoofdletter"/></w><w xml:id="WR-P-E-J-0000000001.p.1.s.4. [...]
-
+        self.assertTrue(xmlcheck(entity.parent.parent.xmlstring(),'<part xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.4.part.1"><w xml:id="WR-P-E-J-0000000001.p.1.s.4.w.1"><t offset="0" ref="WR-P-E-J-0000000001.p.1.s.4">De</t><t class="original" offset="0" ref="WR-P-E-J-0000000001.p.1.s.4">De</t><pos class="LID(bep,stan,rest)" set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn"/><lemma class="de"/></w><w xml:id="WR-P-E-J-0000000 [...]
     def test013b_spanannot(self):
         """Edit Check - Adding nested Span Annotatation (add as append)"""
 
@@ -1909,8 +2096,7 @@ class Test4Edit(unittest.TestCase):
         entity = sentence.add(folia.Entity, word, word2, cls="misc",set="http://raw.github.com/proycon/folia/master/setdefinitions/namedentities.foliaset.xml")
 
         self.assertIsInstance(entity, folia.Entity)
-        self.assertTrue(xmlcheck(entity.parent.parent.xmlstring(),'<s xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.4"><t>De hoofdletter A wordt gebruikt voor het originele handschrift.</t><t class="original">De hoofdletter A wordt gebruikt voor het originele handschrift.</t><t class="translate">Uppercase A is used for the original.</t><part xml:id="WR-P-E-J-0000000001.p.1.s.4.part.1"><w xml:id="WR-P-E-J-0000000001.p.1.s.4.w.1"><t offset="0">De</t><t class="original"  [...]
-
+        self.assertTrue(xmlcheck(entity.parent.parent.xmlstring(),'<s xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.4"><t>De hoofdletter A wordt gebruikt voor het originele handschrift.</t><t class="original">De hoofdletter A wordt gebruikt voor het originele handschrift.</t><t class="translate">Uppercase A is used for the original.</t><part xml:id="WR-P-E-J-0000000001.p.1.s.4.part.1"><w xml:id="WR-P-E-J-0000000001.p.1.s.4.w.1"><t offset="0" ref="WR-P-E-J-0000000001.p [...]
 
     def test013e_spanannot(self):
         """Edit Check - Adding nested Span Annotation"""
@@ -1928,12 +2114,12 @@ class Test4Edit(unittest.TestCase):
     def test014_replace(self):
         """Edit Check - Replacing an annotation"""
         word = self.doc['WR-P-E-J-0000000001.p.1.s.3.w.14']
-        word.replace(folia.PosAnnotation(self.doc, cls='BOGUS') )
+        word.replace(folia.PosAnnotation(self.doc, cls='BOGUS', set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" ) )
 
         self.assertEqual( len(list(word.annotations(folia.PosAnnotation))), 1)
         self.assertEqual( word.annotation(folia.PosAnnotation).cls, 'BOGUS')
 
-        self.assertTrue( xmlcheck(word.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.3.w.14"><t>plaats</t><lemma class="plaats"/><pos class="BOGUS"/></w>'))
+        self.assertTrue( xmlcheck(word.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.3.w.14"><t>plaats</t><lemma class="plaats"/><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="BOGUS"/></w>'))
 
     def test015_remove(self):
         """Edit Check - Removing an annotation"""
@@ -1950,12 +2136,13 @@ class Test4Edit(unittest.TestCase):
         pos = w.annotation(folia.PosAnnotation)
         pos.datetime = datetime(1982, 12, 15, 19, 0, 1) #(the datetime of my joyful birth)
 
-        self.assertTrue( xmlcheck(pos.xmlstring(), '<pos xmlns="http://ilk.uvt.nl/folia" class="WW(pv,tgw,met-t)" datetime="1982-12-15T19:00:01"/>'))
+        self.assertTrue( xmlcheck(pos.xmlstring(), '<pos xmlns="http://ilk.uvt.nl/folia" set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="WW(pv,tgw,met-t)" datetime="1982-12-15T19:00:01"/>'))
 
     def test017_wordtext(self):
         """Edit Check - Altering word text"""
 
         #Important note: directly altering text is usually bad practise, you'll want to use proper corrections instead.
+        #this may also lead to inconsistencies if there is redundant text on higher levels
         w = self.doc['WR-P-E-J-0000000001.p.1.s.8.w.9']
         self.assertEqual(w.text(), 'terweil')
 
@@ -1966,6 +2153,7 @@ class Test4Edit(unittest.TestCase):
         """Edit Check - Altering word text with reserved symbols"""
 
         #Important note: directly altering text is usually bad practise, you'll want to use proper corrections instead.
+        #This test just serves to test reserved symbols
         w = self.doc['WR-P-E-J-0000000001.p.1.s.8.w.9']
 
         w.settext('1 & 1 > 0')
@@ -1976,15 +2164,16 @@ class Test4Edit(unittest.TestCase):
         """Edit Check - Altering sentence text (untokenised by definition)"""
         s = self.doc['WR-P-E-J-0000000001.p.1.s.1']
 
-        self.assertEqual(s.text(), 'Stemma is een ander woord voor stamboom .') #text is obtained from children, since there is no direct text associated
+        self.assertEqual(s.text(), 'Stemma is een ander woord voor stamboom.') #text is obtained from children, since there is no direct text associated
 
         self.assertFalse(s.hastext()) #no text DIRECTLY associated with the sentence
 
-        #associating text directly with the sentence: de-tokenised by definition!
+        #associating text directly with the sentence (should be in agreement with text from children)
         s.settext('Stemma is een ander woord voor stamboom.')
         self.assertTrue(s.hastext())
-        self.assertEqual(s.text(), 'Stemma is een ander woord voor stamboom .') #text still obtained from children rather than directly associated text!!
-        self.assertEqual(s.stricttext(), 'Stemma is een ander woord voor stamboom.')
+        self.assertEqual(s.text(), 'Stemma is een ander woord voor stamboom.') #text still obtained from children rather than directly associated text!!
+        self.assertEqual(s.stricttext(), 'Stemma is een ander woord voor stamboom.') #text obtained directly
+
 
     def test018b_sentencetext(self):
         """Edit Check - Altering sentence text (untokenised by definition)"""
@@ -1993,8 +2182,7 @@ class Test4Edit(unittest.TestCase):
 
         self.assertEqual( s.text(), 'Een volle lijn duidt op een verwantschap , terweil een stippelijn op een onzekere verwantschap duidt .' ) #dynamic from children
 
-
-        s.settext('Een volle lijn duidt op een verwantschap, terwijl een stippellijn op een onzekere verwantschap duidt.' ) #setting the correct text here will cause a mismatch with the text on deeper levels, but is permitted (deep validation should detect it)
+        #s.settext('Een volle lijn duidt op een verwantschap, terwijl een stippellijn op een onzekere verwantschap duidt.' ) #setting the correct text here will cause a mismatch with the text on deeper levels, but is permitted (deep validation should detect it)
 
         s.settext('Een volle lijn duidt op een verwantschap, terweil een stippelijn op een onzekere verwantschap duidt.', 'original' )
 
@@ -2002,7 +2190,7 @@ class Test4Edit(unittest.TestCase):
         self.assertTrue( s.hastext('original') )
         self.assertEqual( s.stricttext('original'), 'Een volle lijn duidt op een verwantschap, terweil een stippelijn op een onzekere verwantschap duidt.' )
 
-        self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8"><t>Een volle lijn duidt op een verwantschap, terwijl een stippellijn op een onzekere verwantschap duidt.</t><t class="original">Een volle lijn duidt op een verwantschap, terweil een stippelijn op een onzekere verwantschap duidt.</t><w xml:id="WR-P-E-J-0000000001.p.1.s.8.w.1"><t>Een</t><pos class="LID(onbep,stan,agr)"/><lemma class="een"/></w><quote xml:id="WR-P-E-J-00 [...]
+        self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8"><t class="original">Een volle lijn duidt op een verwantschap, terweil een stippelijn op een onzekere verwantschap duidt.</t><w xml:id="WR-P-E-J-0000000001.p.1.s.8.w.1"><t>Een</t><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="LID(onbep,stan,agr)"/><lemma class="een"/></w><quote xml:id="WR-P-E-J-0000000001.p.1.s.8. [...]
 
     def test019_adderrordetection(self):
         """Edit Check - Error Detection"""
@@ -2040,224 +2228,216 @@ class Test4Edit(unittest.TestCase):
     #    self.assertEqual( w.text(), 'stippellijn')
 
 class Test4Create(unittest.TestCase):
-        def test001_create(self):
-            """Creating a FoLiA Document from scratch"""
-            self.doc = folia.Document(id='example')
-            self.doc.declare(folia.AnnotationType.TOKEN, 'adhocset',annotator='proycon')
+    def test001_create(self):
+        """Creating a FoLiA Document from scratch"""
+        self.doc = folia.Document(id='example')
+        self.doc.declare(folia.AnnotationType.TOKEN, 'adhocset',annotator='proycon')
 
-            self.assertEqual(self.doc.defaultset(folia.AnnotationType.TOKEN), 'adhocset')
-            self.assertEqual(self.doc.defaultannotator(folia.AnnotationType.TOKEN, 'adhocset'), 'proycon')
+        self.assertEqual(self.doc.defaultset(folia.AnnotationType.TOKEN), 'adhocset')
+        self.assertEqual(self.doc.defaultannotator(folia.AnnotationType.TOKEN, 'adhocset'), 'proycon')
 
-            text = folia.Text(self.doc, id=self.doc.id + '.text.1')
-            self.doc.append( text )
+        text = folia.Text(self.doc, id=self.doc.id + '.text.1')
+        self.doc.append( text )
 
-            text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
-                ]
-                )
-            )
+        text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
+            ])
+        )
 
-            self.assertEqual( len(self.doc.index[self.doc.id + '.s.1']), 5)
+        self.assertEqual( len(self.doc.index[self.doc.id + '.s.1']), 5)
 
 class Test5Correction(unittest.TestCase):
-        def setUp(self):
-            self.doc = folia.Document(id='example')
-            self.doc.declare(folia.AnnotationType.TOKEN, set='adhocset',annotator='proycon')
-            self.text = folia.Text(self.doc, id=self.doc.id + '.text.1')
-            self.doc.append( self.text )
-
-
-        def test001_splitcorrection(self):
-            """Correction - Split correction"""
-
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
-                ]
-                )
-            )
+    def setUp(self):
+        self.doc = folia.Document(id='example', textvalidation=True)
+        self.doc.declare(folia.AnnotationType.TOKEN, set='adhocset',annotator='proycon')
+        self.text = folia.Text(self.doc, id=self.doc.id + '.text.1')
+        self.doc.append( self.text )
+
+
+    def test001_splitcorrection(self):
+        """Correction - Split correction"""
+
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
+            ])
+        )
 
 
-            w = self.doc.index[self.doc.id + '.s.1.w.4']
+        w = self.doc.index[self.doc.id + '.s.1.w.4']
 
-            w.split( folia.Word(self.doc, id=self.doc.id + '.s.1.w.4a', text="on"), folia.Word(self.doc, id=self.doc.id + '.s.1.w.4b', text="line") )
+        w.split( folia.Word(self.doc, id=self.doc.id + '.s.1.w.4a', text="on"), folia.Word(self.doc, id=self.doc.id + '.s.1.w.4b', text="line") )
 
-            s = self.doc.index[self.doc.id + '.s.1']
-            self.assertEqual( s.words(-3).text(), 'on' )
-            self.assertEqual( s.words(-2).text(), 'line' )
-            self.assertEqual( s.text(), 'De site staat on line .' )
-            self.assertEqual( len(list(s.words())), 6 )
-            self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.4a"><t>on</t></w><w xml:id="example.s.1.w.4b"><t>line</t></w></new><original auth="no"><w xml:id="example.s.1.w.4"><t>online</t></w></original></correction><w xml:id="example.s. [...]
+        s = self.doc.index[self.doc.id + '.s.1']
+        self.assertEqual( s.words(-3).text(), 'on' )
+        self.assertEqual( s.words(-2).text(), 'line' )
+        self.assertEqual( s.text(), 'De site staat on line .' )
+        self.assertEqual( len(list(s.words())), 6 )
+        self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.4a"><t>on</t></w><w xml:id="example.s.1.w.4b"><t>line</t></w></new><original auth="no"><w xml:id="example.s.1.w.4"><t>online</t></w></original></correction><w xml:id="example.s.1.w. [...]
 
 
-        def test001_splitcorrection2(self):
-            """Correction - Split suggestion"""
+    def test001_splitcorrection2(self):
+        """Correction - Split suggestion"""
 
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
-                ]
-                )
-            )
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="online"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
+            ])
+        )
 
 
-            w = self.doc.index[self.doc.id + '.s.1.w.4']
+        w = self.doc.index[self.doc.id + '.s.1.w.4']
 
-            s = self.doc.index[self.doc.id + '.s.1']
-            w.split( folia.Word(self.doc, generate_id_in=s, text="on"), folia.Word(self.doc, generate_id_in=s, text="line"), suggest=True )
+        s = self.doc.index[self.doc.id + '.s.1']
+        w.split( folia.Word(self.doc, generate_id_in=s, text="on"), folia.Word(self.doc, generate_id_in=s, text="line"), suggest=True )
 
-            self.assertEqual( len(list(s.words())), 5 )
-            self.assertEqual( s.words(-2).text(), 'online' )
-            self.assertEqual( s.text(), 'De site staat online .' )
+        self.assertEqual( len(list(s.words())), 5 )
+        self.assertEqual( s.words(-2).text(), 'online' )
+        self.assertEqual( s.text(), 'De site staat online .' )
 
-            self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><current><w xml:id="example.s.1.w.4"><t>online</t></w></current><suggestion auth="no"><w xml:id="example.s.1.w.6"><t>on</t></w><w xml:id="example.s.1.w.7"><t>line</t></w></suggestion></correction><w xml:id="e [...]
+        self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><current><w xml:id="example.s.1.w.4"><t>online</t></w></current><suggestion auth="no"><w xml:id="example.s.1.w.6"><t>on</t></w><w xml:id="example.s.1.w.7"><t>line</t></w></suggestion></correction><w xml:id="examp [...]
 
 
-        def test002_mergecorrection(self):
-            """Correction - Merge corrections"""
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="on"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="line"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
-                ]
-                )
-            )
+    def test002_mergecorrection(self):
+        """Correction - Merge corrections"""
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="on"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="line"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
+            ])
+        )
 
-            s = self.doc.index[self.doc.id + '.s.1']
+        s = self.doc.index[self.doc.id + '.s.1']
 
 
-            s.mergewords( folia.Word(self.doc, 'online', id=self.doc.id + '.s.1.w.4-5') , self.doc.index[self.doc.id + '.s.1.w.4'], self.doc.index[self.doc.id + '.s.1.w.5'] )
+        s.mergewords( folia.Word(self.doc, 'online', id=self.doc.id + '.s.1.w.4-5') , self.doc.index[self.doc.id + '.s.1.w.4'], self.doc.index[self.doc.id + '.s.1.w.5'] )
 
-            self.assertEqual( len(list(s.words())), 5 )
-            self.assertEqual( s.text(), 'De site staat online .')
+        self.assertEqual( len(list(s.words())), 5 )
+        self.assertEqual( s.text(), 'De site staat online .')
 
-            #incorrection() test, check if newly added word correctly reports being part of a correction
-            w = self.doc.index[self.doc.id + '.s.1.w.4-5']
-            self.assertTrue( isinstance(w.incorrection(), folia.Correction) ) #incorrection return the correction the word is part of, or None if not part of a correction,
+        #incorrection() test, check if newly added word correctly reports being part of a correction
+        w = self.doc.index[self.doc.id + '.s.1.w.4-5']
+        self.assertTrue( isinstance(w.incorrection(), folia.Correction) ) #incorrection return the correction the word is part of, or None if not part of a correction,
 
 
-            self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.4-5"><t>online</t></w></new><original auth="no"><w xml:id="example.s.1.w.4"><t>on</t></w><w xml:id="example.s.1.w.5"><t>line</t></w></original></correction><w xml:id="example.s. [...]
+        self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.4-5"><t>online</t></w></new><original auth="no"><w xml:id="example.s.1.w.4"><t>on</t></w><w xml:id="example.s.1.w.5"><t>line</t></w></original></correction><w xml:id="example.s.1.w. [...]
 
 
-        def test003_deletecorrection(self):
-            """Correction - Deletion"""
+    def test003_deletecorrection(self):
+        """Correction - Deletion"""
 
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="Ik"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="zie"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="een"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="groot"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="huis"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
-                ]
-                )
-            )
-            s = self.doc.index[self.doc.id + '.s.1']
-            s.deleteword(self.doc.index[self.doc.id + '.s.1.w.4'])
-            self.assertEqual( len(list(s.words())), 5 )
-            self.assertEqual( s.text(), 'Ik zie een huis .')
-
-            self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>Ik</t></w><w xml:id="example.s.1.w.2"><t>zie</t></w><w xml:id="example.s.1.w.3"><t>een</t></w><correction xml:id="example.s.1.correction.1"><new/><original auth="no"><w xml:id="example.s.1.w.4"><t>groot</t></w></original></correction><w xml:id="example.s.1.w.5"><t>huis</t></w><w xml:id="example.s.1.w.6"><t>.</t></w></s>') )
-
-        def test004_insertcorrection(self):
-            """Correction - Insert"""
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="Ik"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="zie"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="een"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="huis"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
-                ]
-                )
-            )
-            s = self.doc.index[self.doc.id + '.s.1']
-            s.insertword( folia.Word(self.doc, id=self.doc.id+'.s.1.w.3b',text='groot'),  self.doc.index[self.doc.id + '.s.1.w.3'])
-            self.assertEqual( len(list(s.words())), 6 )
-
-            self.assertEqual( s.text(), 'Ik zie een groot huis .')
-            self.assertTrue( xmlcheck( s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>Ik</t></w><w xml:id="example.s.1.w.2"><t>zie</t></w><w xml:id="example.s.1.w.3"><t>een</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.3b"><t>groot</t></w></new></correction><w xml:id="example.s.1.w.4"><t>huis</t></w><w xml:id="example.s.1.w.5"><t>.</t></w></s>'))
-
-        def test005_reusecorrection(self):
-            """Correction - Re-using a correction with only suggestions"""
-            global FOLIAEXAMPLE
-            self.doc = folia.Document(string=FOLIAEXAMPLE)
-
-            w = self.doc.index['WR-P-E-J-0000000001.p.1.s.8.w.11'] #stippelijn
-            w.correct(suggestion='stippellijn', set='corrections',cls='spelling',annotator='testscript', annotatortype=folia.AnnotatorType.AUTO)
-            c = w.annotation(folia.Correction)
-
-            self.assertTrue( isinstance(w.annotation(folia.Correction), folia.Correction) )
-            self.assertEqual( w.annotation(folia.Correction).suggestions(0).text() , 'stippellijn' )
-            self.assertEqual( w.text(), 'stippelijn')
-
-            w.correct(new='stippellijn',set='corrections',cls='spelling',annotator='John Doe', annotatortype=folia.AnnotatorType.MANUAL,reuse=c.id)
-
-            self.assertEqual( w.text(), 'stippellijn')
-            self.assertEqual( len(list(w.annotations(folia.Correction))), 1 )
-            self.assertEqual( w.annotation(folia.Correction).suggestions(0).text() , 'stippellijn' )
-            self.assertEqual( w.annotation(folia.Correction).suggestions(0).annotator , 'testscript' )
-            self.assertEqual( w.annotation(folia.Correction).suggestions(0).annotatortype , folia.AnnotatorType.AUTO)
-            self.assertEqual( w.annotation(folia.Correction).new(0).text() , 'stippellijn' )
-            self.assertEqual( w.annotation(folia.Correction).annotator , 'John Doe' )
-            self.assertEqual( w.annotation(folia.Correction).annotatortype , folia.AnnotatorType.MANUAL)
-
-            self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotator="John Doe"><suggestion annotator="testscript" auth="no" annotatortype="auto"><t>stippellijn</t></suggestion><new><t>stippellijn</t></new><original auth="no"><t>stippelijn</t></original></correct [...]
-
-        def test006_deletionsuggestion(self):
-            """Correction - Suggestion for deletion with parent merge suggestion"""
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="on"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="line"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
-                ]),
-            )
-            self.text.append(
-                folia.Sentence(self.doc,id=self.doc.id + '.s.2', contents=[
-                    folia.Word(self.doc,id=self.doc.id + '.s.2.w.1', text="sinds"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.2.w.2', text="vorige"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.2.w.3', text="week"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.2.w.4', text="zondag"),
-                    folia.Word(self.doc,id=self.doc.id + '.s.2.w.6', text=".")
-                ])
-            )
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="Ik"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="zie"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="een"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="groot"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="huis"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
+            ])
+        )
+        s = self.doc.index[self.doc.id + '.s.1']
+        s.deleteword(self.doc.index[self.doc.id + '.s.1.w.4'])
+        self.assertEqual( len(list(s.words())), 5 )
+        self.assertEqual( s.text(), 'Ik zie een huis .')
+
+        self.assertTrue( xmlcheck(s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>Ik</t></w><w xml:id="example.s.1.w.2"><t>zie</t></w><w xml:id="example.s.1.w.3"><t>een</t></w><correction xml:id="example.s.1.correction.1"><new/><original auth="no"><w xml:id="example.s.1.w.4"><t>groot</t></w></original></correction><w xml:id="example.s.1.w.5"><t>huis</t></w><w xml:id="example.s.1.w.6"><t>.</t></w></s>') )
+
+    def test004_insertcorrection(self):
+        """Correction - Insert"""
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="Ik"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="zie"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="een"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="huis"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text=".")
+            ])
+        )
+        s = self.doc.index[self.doc.id + '.s.1']
+        s.insertword( folia.Word(self.doc, id=self.doc.id+'.s.1.w.3b',text='groot'),  self.doc.index[self.doc.id + '.s.1.w.3'])
+        self.assertEqual( len(list(s.words())), 6 )
+
+        self.assertEqual( s.text(), 'Ik zie een groot huis .')
+        self.assertTrue( xmlcheck( s.xmlstring(), '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>Ik</t></w><w xml:id="example.s.1.w.2"><t>zie</t></w><w xml:id="example.s.1.w.3"><t>een</t></w><correction xml:id="example.s.1.correction.1"><new><w xml:id="example.s.1.w.3b"><t>groot</t></w></new></correction><w xml:id="example.s.1.w.4"><t>huis</t></w><w xml:id="example.s.1.w.5"><t>.</t></w></s>'))
+
+    def test005_reusecorrection(self):
+        """Correction - Re-using a correction with only suggestions"""
+        self.doc = folia.Document(string=FOLIAEXAMPLE)
+
+        w = self.doc.index['WR-P-E-J-0000000001.p.1.s.8.w.11'] #stippelijn
+        w.correct(suggestion='stippellijn', set='corrections',cls='spelling',annotator='testscript', annotatortype=folia.AnnotatorType.AUTO)
+        c = w.annotation(folia.Correction)
+
+        self.assertTrue( isinstance(w.annotation(folia.Correction), folia.Correction) )
+        self.assertEqual( w.annotation(folia.Correction).suggestions(0).text() , 'stippellijn' )
+        self.assertEqual( w.text(), 'stippelijn')
+
+        w.correct(new='stippellijn',set='corrections',cls='spelling',annotator='John Doe', annotatortype=folia.AnnotatorType.MANUAL,reuse=c.id)
+
+        self.assertEqual( w.text(), 'stippellijn')
+        self.assertEqual( len(list(w.annotations(folia.Correction))), 1 )
+        self.assertEqual( w.annotation(folia.Correction).suggestions(0).text() , 'stippellijn' )
+        self.assertEqual( w.annotation(folia.Correction).suggestions(0).annotator , 'testscript' )
+        self.assertEqual( w.annotation(folia.Correction).suggestions(0).annotatortype , folia.AnnotatorType.AUTO)
+        self.assertEqual( w.annotation(folia.Correction).new(0).text() , 'stippellijn' )
+        self.assertEqual( w.annotation(folia.Correction).annotator , 'John Doe' )
+        self.assertEqual( w.annotation(folia.Correction).annotatortype , folia.AnnotatorType.MANUAL)
+
+        self.assertTrue( xmlcheck(w.xmlstring(), '<w xmlns="http://ilk.uvt.nl/folia" xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11"><pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" class="FOUTN(soort,ev,basis,zijd,stan)"/><lemma class="stippelijn"/><correction xml:id="WR-P-E-J-0000000001.p.1.s.8.w.11.correction.1" class="spelling" annotator="John Doe"><suggestion annotator="testscript" auth="no" annotatortype="auto"><t>stippellijn</t></suggestion [...]
+
+    def test006_deletionsuggestion(self):
+        """Correction - Suggestion for deletion with parent merge suggestion"""
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.1', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.1', text="De"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.2', text="site"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.4', text="on"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.5', text="line"),
+                folia.Word(self.doc,id=self.doc.id + '.s.1.w.6', text=".")
+            ]),
+        )
+        self.text.append(
+            folia.Sentence(self.doc,id=self.doc.id + '.s.2', contents=[
+                folia.Word(self.doc,id=self.doc.id + '.s.2.w.1', text="sinds"),
+                folia.Word(self.doc,id=self.doc.id + '.s.2.w.2', text="vorige"),
+                folia.Word(self.doc,id=self.doc.id + '.s.2.w.3', text="week"),
+                folia.Word(self.doc,id=self.doc.id + '.s.2.w.4', text="zondag"),
+                folia.Word(self.doc,id=self.doc.id + '.s.2.w.6', text=".")
+            ])
+        )
 
-            s = self.doc.index[self.doc.id + '.s.1']
-            s2 = self.doc.index[self.doc.id + '.s.2']
-            w = self.doc.index[self.doc.id + '.s.1.w.6']
-            s.remove(w)
-            s.append( folia.Correction(self.doc, folia.Current(self.doc, w), folia.Suggestion(self.doc, merge=s2.id)) )
+        s = self.doc.index[self.doc.id + '.s.1']
+        s2 = self.doc.index[self.doc.id + '.s.2']
+        w = self.doc.index[self.doc.id + '.s.1.w.6']
+        s.remove(w)
+        s.append( folia.Correction(self.doc, folia.Current(self.doc, w), folia.Suggestion(self.doc, merge=s2.id)) )
 
-            self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><w xml:id="example.s.1.w.4"><t>on</t></w><w xml:id="example.s.1.w.5"><t>line</t></w><correction><current><w xml:id="example.s.1.w.6"><t>.</t></w></current><suggestion merge="example.s.2" auth="no"/></correction></s>'))
+        self.assertTrue( xmlcheck(s.xmlstring(),  '<s xmlns="http://ilk.uvt.nl/folia" xml:id="example.s.1"><w xml:id="example.s.1.w.1"><t>De</t></w><w xml:id="example.s.1.w.2"><t>site</t></w><w xml:id="example.s.1.w.3"><t>staat</t></w><w xml:id="example.s.1.w.4"><t>on</t></w><w xml:id="example.s.1.w.5"><t>line</t></w><correction><current><w xml:id="example.s.1.w.6"><t>.</t></w></current><suggestion merge="example.s.2" auth="no"/></correction></s>'))
 
 
 
 class Test6Query(unittest.TestCase):
     def setUp(self):
-        global FOLIAEXAMPLE
-        self.doc = folia.Document(string=FOLIAEXAMPLE)
+        self.doc = folia.Document(string=FOLIAEXAMPLE, textvalidation=True)
 
     def test001_findwords_simple(self):
         """Querying - Find words (simple)"""
@@ -2320,8 +2500,7 @@ class Test6Query(unittest.TestCase):
                 folia.Word(doc,id=doc.id + '.s.1.w.5', text="b"),
                 folia.Word(doc,id=doc.id + '.s.1.w.6', text="a"),
                 folia.Word(doc,id=doc.id + '.s.1.w.7', text="a"),
-            ]
-            )
+            ])
         )
         doc.append(text)
 
@@ -2392,8 +2571,7 @@ class Test6Query(unittest.TestCase):
                 folia.Word(doc,id=doc.id + '.s.1.w.5', text="a"),
                 folia.Word(doc,id=doc.id + '.s.1.w.6', text="b"),
                 folia.Word(doc,id=doc.id + '.s.1.w.7', text="c"),
-            ]
-            )
+            ])
         )
         doc.append(text)
 
@@ -2415,7 +2593,7 @@ class Test9Reader(unittest.TestCase):
     def test000_worditer(self):
         """Stream reader - Iterating over words"""
         count = 0
-        for word in self.reader:
+        for _ in self.reader:
             count += 1
         self.assertEqual(count, 192)
 
@@ -2491,28 +2669,1297 @@ class Test7XpathQuery(unittest.TestCase):
 
 
 class Test8Validation(unittest.TestCase):
-      def test000_relaxng(self):
+    def test000_relaxng(self):
         """Validation - RelaxNG schema generation"""
         folia.relaxng()
 
-      def test001_shallowvalidation(self):
+    def test001_shallowvalidation(self):
         """Validation - Shallow validation against automatically generated RelaxNG schema"""
         folia.validate(os.path.join(TMPDIR,'foliasavetest.xml'))
 
-      def test002_loadsetdefinitions(self):
+    def test002_loadsetdefinitions(self):
         """Validation - Loading of set definitions"""
         doc = folia.Document(file=os.path.join(TMPDIR,'foliatest.xml'), loadsetdefinitions=True)
         assert isinstance( doc.setdefinitions["http://raw.github.com/proycon/folia/master/setdefinitions/namedentities.foliaset.xml"], folia.SetDefinition)
 
 class Test9Validation(unittest.TestCase):
-      def test001_deepvalidation(self):
+    def test001_deepvalidation(self):
         """Validation - Deep Validation"""
-        doc = folia.Document(file=os.path.join(FOLIAPATH,'test/example.deep.xml'), deepvalidation=True, allowadhocsets=True)
+        folia.Document(file=os.path.join(FOLIAPATH,'test/example.deep.xml'), deepvalidation=True, textvalidation=True, allowadhocsets=True)
+
+    def test002_textvalidation(self):
+        """Validation - Text Validation"""
+        folia.Document(file=os.path.join(FOLIAPATH,'test/example.textvalidation.xml'), textvalidation=True)
+
+    def test003_invalid_text_misspelled(self):
+        """Validation - Invalid Text (Misspelled word)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers als Couperus, Haasse, of Grunberg?</t>
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literrair oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        self.assertRaises( folia.InconsistentText, folia.Document, string=xml, textvalidation=True) #exception
+
+
+    def test004_invalid_text_missing(self):
+        """Validation - Invalid Text (Missing Word)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers als Couperus, Haasse, of Grunberg?</t>
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        self.assertRaises( folia.InconsistentText, folia.Document, string=xml, textvalidation=True) #exception
+
+
+    def test005_textvalidation_intermittent_redundancy(self):
+        """Validation - Text Validation (Intermittent Redundancy)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers als Couperus, Haasse, of Grunberg? Of kan ik het ook?</t>
+    <p xml:id="example.p.1">
+      <!-- Note: no text here on paragraph level -->
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+      </s>
+      <s xml:id="example.p.1.s.2">
+        <t> Of kan ik
+het    ook   ?
+	    </t>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        folia.Document(file=os.path.join(FOLIAPATH,'test/example.textvalidation.xml'), textvalidation=True)
+
+    def test006_multiple_textclasses(self):
+        """Validation - Invalid Text (Multiple classes)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers als Couperus, Haasse, of Grunberg?</t>
+      <t class="missingword">Is het creëren van een volwaardig oeuvre voorbehouden aan schrijvers als Couperus, Haasse, of Grunberg?</t>
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+        <t class="missingword">Is het creëren van een volwaardig oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        folia.Document(string=xml, textvalidation=True)
+
+    def test007_textcheck_no_morphemes(self):
+        """Validation - No text checking on (nested) morphemes"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <pos-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn" annotator="frog" annotatortype="auto" />
+      <pos-annotation annotator="frog-mbma-1.0" annotatortype="auto" datetime="2017-04-20T16:48:45" set="http://ilk.uvt.nl/folia/sets/frog-mbpos-clex"/>
+      <lemma-annotation set="lemmas-nl" annotator="tadpole" annotatortype="auto" />
+      <morphological-annotation annotator="proycon" annotatortype="manual" />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+      <w xml:id="WR-P-E-J-0000000001.p.1.s.2.w.16">
+        <t>genealogie</t>
+        <pos class="N(soort,ev,basis,zijd,stan)" set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn"/>
+        <lemma class="genealogie"/>
+        <morphology>
+          <morpheme class="complex">
+            <t>genealogie</t>
+            <feat class="[[genealogisch]adjective[ie]]noun/singular" subset="structure"/>
+            <pos class="N" set="http://ilk.uvt.nl/folia/sets/frog-mbpos-clex"/>
+            <morpheme class="complex">
+              <feat class="N_A*" subset="applied_rule"/>
+              <feat class="[[genealogisch]adjective[ie]]noun" subset="structure"/>
+              <pos class="N" set="http://ilk.uvt.nl/folia/sets/frog-mbpos-clex"/>
+              <morpheme class="stem">
+                <t>genealogisch</t>
+                <pos class="A" set="http://ilk.uvt.nl/folia/sets/frog-mbpos-clex"/>
+              </morpheme>
+              <morpheme class="affix">
+                <t>ie</t>
+                <feat class="[ie]" subset="structure"/>
+              </morpheme>
+             </morpheme>
+             <morpheme class="inflection">
+              <feat class="singular" subset="inflection"/>
+             </morpheme>
+          </morpheme>
+        </morphology>
+      </w>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        folia.Document(string=xml, textvalidation=True)
+
+
+    def test008_offset(self):
+        """Validation - Offset validation"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <t offset="7">creëren</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD">
+          <t offset="67">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="79">als</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.13" class="WORD" space="no">
+          <t offset="83">Couperus</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.14" class="PUNCTUATION">
+          <t offset="91">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.15" class="WORD" space="no">
+          <t offset="94">Haasse</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.16" class="PUNCTUATION">
+          <t offset="100">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.17" class="WORD">
+          <t offset="102">of</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.18" class="WORD" space="no">
+          <t offset="106">Grunberg</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.19" class="PUNCTUATION">
+          <t offset="114">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['example.p.1.s.1.w.19'].textcontent().getreference(), doc['example.p.1.s.1'] ) #testing resolving implicit reference
+
+
+
+    def test009_invalid_offset(self):
+        """Validation - Offset validation (invalid)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <t offset="7">creëren</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="10">van</t> <!-- this one is invalid -->
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD">
+          <t offset="67">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="79">als</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.13" class="WORD" space="no">
+          <t offset="83">Couperus</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.14" class="PUNCTUATION">
+          <t offset="91">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.15" class="WORD" space="no">
+          <t offset="94">Haasse</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.16" class="PUNCTUATION">
+          <t offset="100">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.17" class="WORD">
+          <t offset="102">of</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.18" class="WORD" space="no">
+          <t offset="106">Grunberg</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.19" class="PUNCTUATION">
+          <t offset="114">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        self.assertRaises( folia.UnresolvableTextContent, folia.Document, string=xml, textvalidation=True) #exception
+
+    def test010_offset_reference(self):
+        """Validation - Offset validation with explicit references"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers
+	als Couperus, 	Haasse, of
+	Grunberg?</t>
+      <s xml:id="example.p.1.s.1">
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0" ref="example.p.1">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3" ref="example.p.1">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <t offset="7" ref="example.p.1">creëren</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15" ref="example.p.1">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19" ref="example.p.1">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23" ref="example.p.1">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34" ref="example.p.1">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43" ref="example.p.1">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50" ref="example.p.1">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63" ref="example.p.1">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD">
+          <t offset="67" ref="example.p.1">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="79" ref="example.p.1">als</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.13" class="WORD" space="no">
+          <t offset="83" ref="example.p.1">Couperus</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.14" class="PUNCTUATION">
+          <t offset="91" ref="example.p.1">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.15" class="WORD" space="no">
+          <t offset="94" ref="example.p.1">Haasse</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.16" class="PUNCTUATION">
+          <t offset="100" ref="example.p.1">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.17" class="WORD">
+          <t offset="102" ref="example.p.1">of</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.18" class="WORD" space="no">
+          <t offset="106" ref="example.p.1">Grunberg</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.19" class="PUNCTUATION">
+          <t offset="114" ref="example.p.1">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['example.p.1.s.1.w.19'].textcontent().getreference(), doc['example.p.1'] ) #testing resolving explicit reference
+
+    def test011a_offset_textmarkup(self):
+        """Validation - Offset validation with text markup (non-text-modifiers)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een <t-style class="emphasis">volwaardig</t-style> literair oeuvre voorbehouden aan schrijvers
+\tals <t-str xlink:href="https://nl.wikipedia.org/wiki/Louis_Couperus" xlink:type="simple">Couperus</t-str>, 	Haasse, of
+\tGrunberg?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <t offset="7">creëren</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD">
+          <t offset="67">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="79">als</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.13" class="WORD" space="no">
+          <t offset="83">Couperus</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.14" class="PUNCTUATION">
+          <t offset="91">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.15" class="WORD" space="no">
+          <t offset="94">Haasse</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.16" class="PUNCTUATION">
+          <t offset="100">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.17" class="WORD">
+          <t offset="102">of</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.18" class="WORD" space="no">
+          <t offset="106">Grunberg</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.19" class="PUNCTUATION">
+          <t offset="114">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['example.p.1.s.1.w.19'].textcontent().getreference(), doc['example.p.1.s.1'] ) #testing resolving implicit reference
+
+    def test011b_offset_textmarkup(self):
+        """Validation - Offset validation with text markup (with text modifiers like br)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een <t-style class="emphasis">volwaardig</t-style> literair oeuvre voorbehouden aan schrijvers<br/>\tals <t-str xlink:href="https://nl.wikipedia.org/wiki/Louis_Couperus" xlink:type="simple">Couperus</t-str>, 	Haasse, of
+\tGrunberg?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <t offset="7">creëren</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD">
+          <t offset="67">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="79">als</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.13" class="WORD" space="no">
+          <t offset="83">Couperus</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.14" class="PUNCTUATION">
+          <t offset="91">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.15" class="WORD" space="no">
+          <t offset="94">Haasse</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.16" class="PUNCTUATION">
+          <t offset="100">,</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.17" class="WORD">
+          <t offset="102">of</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.18" class="WORD" space="no">
+          <t offset="106">Grunberg</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.19" class="PUNCTUATION">
+          <t offset="114">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['example.p.1.s.1.w.19'].textcontent().getreference(), doc['example.p.1.s.1'] ) #testing resolving implicit reference
+
+    def test012_string(self):
+        """Validation - Text Validation on String"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een <t-style class="emphasis">volwaardig</t-style> literair oeuvre voorbehouden aan schrijvers<br/>\tals <t-str xlink:href="https://nl.wikipedia.org/wiki/Louis_Couperus" xlink:type="simple">Couperus</t-str>, 	Haasse, of
+\tGrunberg?</t>
+        <str xml:id="example.string">
+            <t offset="7">creëren</t>
+        </str>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['example.string'].textcontent().getreference(), doc['example.p.1.s.1'] ) #testing resolving implicit reference
+
+    def test013a_correction(self):
+        """Validation - Text Validation on Correction (single text layer)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <correction>
+           <new>
+              <t offset="7">creëren</t>
+           </new>
+           <original auth="no">
+              <t offset="7">creeren</t>
+           </original>
+          </correction>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD" space="no">
+          <t offset="67">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="77">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+    def test013b_correction(self):
+        """Validation - Text Validation on Correction (Double text layers)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers?</t>
+        <t class="original">Is het creeren van een volwaardig litterair oeuvre voorbehouden aan schrijvers?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+          <t class="original" offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+          <t class="original" offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <correction>
+           <new>
+              <t offset="7">creëren</t>
+           </new>
+           <original auth="no">
+              <t class="original" offset="7">creeren</t>
+           </original>
+          </correction>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+          <t class="original" offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+          <t class="original" offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+          <t class="original" offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+          <t class="original" offset="34">litterair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+          <t class="original" offset="44">oeuvre</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.9" class="WORD">
+          <t offset="50">voorbehouden</t>
+          <t class="original" offset="51">voorbehouden</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+          <t class="original" offset="64">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD" space="no">
+          <t offset="67">schrijvers</t>
+          <t class="original" offset="68">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="77">?</t>
+          <t class="original" offset="78">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+    def test013c_correction(self):
+        """Validation - Text Validation on Correction (Double text layers, structural changes)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers?</t>
+        <t class="original">Is het creeren van een volwaardig litterair oeuvre voor behouden aan schrijvers?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+          <t class="original" offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+          <t class="original" offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <correction>
+           <new>
+              <t offset="7">creëren</t>
+           </new>
+           <original auth="no">
+              <t class="original" offset="7">creeren</t>
+           </original>
+          </correction>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+          <t class="original" offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+          <t class="original" offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+          <t class="original" offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+          <t class="original" offset="34">litterair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+          <t class="original" offset="44">oeuvre</t>
+        </w>
+        <correction>
+         <new>
+            <w xml:id="example.p.1.s.1.w.9" class="WORD">
+              <t offset="50">voorbehouden</t>
+            </w>
+         </new>
+         <original>
+            <w xml:id="example.p.1.s.1.w.9a" class="WORD">
+              <t class="original" offset="51">voor</t>
+            </w>
+            <w xml:id="example.p.1.s.1.w.9b" class="WORD">
+              <t class="original" offset="56">behouden</t>
+            </w>
+         </original>
+        </correction>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+          <t class="original" offset="65">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD" space="no">
+          <t offset="67">schrijvers</t>
+          <t class="original" offset="69">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="77">?</t>
+          <t class="original" offset="79">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+    def test013d_correction(self):
+        """Validation - Text Validation on Correction (Double text layers, structural changes, custom class)"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers?</t>
+        <t class="old">Is het creeren van een volwaardig litterair oeuvre voor behouden aan schrijvers?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+          <t class="old" offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+          <t class="old" offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <correction>
+           <new>
+              <t offset="7">creëren</t>
+           </new>
+           <original auth="no">
+              <t class="old" offset="7">creeren</t>
+           </original>
+          </correction>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+          <t class="old" offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+          <t class="old" offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+          <t class="old" offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+          <t class="old" offset="34">litterair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+          <t class="old" offset="44">oeuvre</t>
+        </w>
+        <correction>
+         <new>
+            <w xml:id="example.p.1.s.1.w.9" class="WORD">
+              <t offset="50">voorbehouden</t>
+            </w>
+         </new>
+         <original>
+            <w xml:id="example.p.1.s.1.w.9a" class="WORD">
+              <t class="old" offset="51">voor</t>
+            </w>
+            <w xml:id="example.p.1.s.1.w.9b" class="WORD">
+              <t class="old" offset="56">behouden</t>
+            </w>
+         </original>
+        </correction>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+          <t class="old" offset="65">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD" space="no">
+          <t offset="67">schrijvers</t>
+          <t class="old" offset="69">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="77">?</t>
+          <t class="old" offset="79">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+    def test013e_correction(self):
+        """Validation - Text Validation on complex nested correction (Double text layers, structural changes, custom class)"""
+        #NOTE: Current library implementation won't be able to validate nested layers and will just skip those!
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+      <style-annotation />
+    </annotations>
+  </metadata>
+  <text xml:id="example.text">
+    <p xml:id="example.p.1">
+      <s xml:id="example.p.1.s.1">
+        <t>Is het creëren van een volwaardig literair oeuvre voorbehouden aan schrijvers?</t>
+        <t class="old">Is het creeren van een volwaardig litterair oeuvre voor behouden aan schrijvers?</t>
+        <t class="older">Is het CREEREN van een volwaardig litterair oeuvre voor behouden aan schrijvers?</t>
+        <w xml:id="example.p.1.s.1.w.1" class="WORD">
+          <t offset="0">Is</t>
+          <t class="old" offset="0">Is</t>
+          <t class="older" offset="0">Is</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.2" class="WORD">
+          <t offset="3">het</t>
+          <t class="old" offset="3">het</t>
+          <t class="older" offset="3">het</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.3" class="WORD">
+          <correction>
+           <new>
+              <t offset="7">creëren</t>
+           </new>
+           <original auth="no">
+              <correction>
+                  <new>
+                      <t class="old" offset="7">creeren</t>
+                  </new>
+                  <original auth="no">
+                      <t class="older" offset="7">CREEREN</t>
+                  </original>
+              </correction>
+           </original>
+          </correction>
+        </w>
+        <w xml:id="example.p.1.s.1.w.4" class="WORD">
+          <t offset="15">van</t>
+          <t class="old" offset="15">van</t>
+          <t class="older" offset="15">van</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.5" class="WORD">
+          <t offset="19">een</t>
+          <t class="old" offset="19">een</t>
+          <t class="older" offset="19">een</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.6" class="WORD">
+          <t offset="23">volwaardig</t>
+          <t class="old" offset="23">volwaardig</t>
+          <t class="older" offset="23">volwaardig</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.7" class="WORD">
+          <t offset="34">literair</t>
+          <t class="old" offset="34">litterair</t>
+          <t class="older" offset="34">litterair</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.8" class="WORD">
+          <t offset="43">oeuvre</t>
+          <t class="old" offset="44">oeuvre</t>
+          <t class="older" offset="44">oeuvre</t>
+        </w>
+        <correction>
+         <new>
+            <w xml:id="example.p.1.s.1.w.9" class="WORD">
+              <t offset="50">voorbehouden</t>
+            </w>
+         </new>
+         <original>
+            <correction>
+                <new>
+                    <w xml:id="example.p.1.s.1.w.9a" class="WORD">
+                      <t class="old" offset="51">voor</t>
+                    </w>
+                    <w xml:id="example.p.1.s.1.w.9b" class="WORD">
+                      <t class="old" offset="56">behouden</t>
+                    </w>
+                </new>
+                <original>
+                    <w xml:id="example.p.1.s.1.w.9c" class="WORD">
+                      <t class="older" offset="51">voor</t>
+                    </w>
+                    <w xml:id="example.p.1.s.1.w.9d" class="WORD">
+                      <t class="older" offset="56">behouden</t>
+                    </w>
+                </original>
+            </correction>
+         </original>
+        </correction>
+        <w xml:id="example.p.1.s.1.w.10" class="WORD">
+          <t offset="63">aan</t>
+          <t class="old" offset="65">aan</t>
+          <t class="older" offset="65">aan</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.11" class="WORD" space="no">
+          <t offset="67">schrijvers</t>
+          <t class="old" offset="69">schrijvers</t>
+          <t class="older" offset="69">schrijvers</t>
+        </w>
+        <w xml:id="example.p.1.s.1.w.12" class="WORD">
+          <t offset="77">?</t>
+          <t class="old" offset="79">?</t>
+          <t class="older" offset="79">?</t>
+        </w>
+      </s>
+    </p>
+  </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+    def test013f_correction(self):
+        """Validation - Text Validation with redundancy on construction"""
+        #NOTE: Current library implementation won't be able to validate nested layers and will just skip those!
+        doc = folia.Document(id='example',textvalidation=True)
+
+        text = folia.Text(doc, id=doc.id + '.text.1')
+
+        text.append(
+            folia.Sentence(doc,id=doc.id + '.s.1', text="De site staat online . ", contents=[
+                folia.Word(doc,id=doc.id + '.s.1.w.1', text="De"),
+                folia.Word(doc,id=doc.id + '.s.1.w.2', text="site"),
+                folia.Word(doc,id=doc.id + '.s.1.w.3', text="staat"),
+                folia.Word(doc,id=doc.id + '.s.1.w.4', text="online"),
+                folia.Word(doc,id=doc.id + '.s.1.w.5', text=".")
+            ])
+        )
+        doc.xmlstring() #serialisation forces validation
+
+    def test013g_correction(self):
+        """Validation - Text Validation with redundancy on partial construction"""
+        #NOTE: Current library implementation won't be able to validate nested layers and will just skip those!
+        doc = folia.Document(id='example',textvalidation=True)
+
+        text = folia.Text(doc, id=doc.id + '.text.1')
+
+        raised = False
+        try:
+            text.append(
+                folia.Sentence(doc,id=doc.id + '.s.1', text="De site staat online . ", contents=[
+                    folia.Word(doc,id=doc.id + '.s.1.w.1', text="De"),
+                    folia.Word(doc,id=doc.id + '.s.1.w.2', text="site"),
+                    folia.Word(doc,id=doc.id + '.s.1.w.3', text="staat"),
+                ])
+            )
+        except folia.InconsistentText:
+            raised = True
+        self.assertTrue(raised)
+
+    def test014_fullparagraph(self):
+        """Validation - Text Validation with sentence text delimiter inheritance"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+      <paragraph-annotation set="undefined" />
+      <token-annotation annotator="ucto" annotatortype="auto" datetime="2017-09-25T10:29:52" set="tokconfig-nld"/>
+    </annotations>
+  </metadata>
+  <text xml:id="test.text">
+        <p xml:id="TEI.1.text.1.front.1.div1.1.p.13" class="p">
+          <t class="default">Versoek van het Zuyd-Hollandse Synode aan Haar Ho. Mo., dat bij het inwilligen van een nieuw octroy de Compagnie een goede somme gelds soude contribueeren tot onderhoud van een Seminarium. Het getal der predikanten in Indiën a°. 1647 gebragt op ’t getal van 28. Verdeelinge van deselve (blz. 12).</t>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1">
+            <t class="default">Versoek van het Zuyd-Hollandse Synode aan Haar Ho.</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.1" class="WORD" set="tokconfig-nld">
+              <t class="default">Versoek</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.2" class="WORD" set="tokconfig-nld">
+              <t class="default">van</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.3" class="WORD" set="tokconfig-nld">
+              <t class="default">het</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.4" class="WORD-COMPOUND" set="tokconfig-nld">
+              <t class="default">Zuyd-Hollandse</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.5" class="WORD" set="tokconfig-nld">
+              <t class="default">Synode</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.6" class="WORD" set="tokconfig-nld">
+              <t class="default">aan</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.7" class="WORD" set="tokconfig-nld">
+              <t class="default">Haar</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.8" class="WORD" set="tokconfig-nld" space="no">
+              <t class="default">Ho</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.1.w.9" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">.</t>
+            </w>
+          </s>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.2">
+            <t class="default">Mo.</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.2.w.1" class="WORD" set="tokconfig-nld" space="no">
+              <t class="default">Mo</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.2.w.2" class="PUNCTUATION" set="tokconfig-nld" space="no">
+              <t class="default">.</t>
+            </w>
+          </s>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3">
+            <t class="default">, dat bij het inwilligen van een nieuw octroy de Compagnie een goede somme gelds soude contribueeren tot onderhoud van een Seminarium.</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.1" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">,</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.2" class="WORD" set="tokconfig-nld">
+              <t class="default">dat</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.3" class="WORD" set="tokconfig-nld">
+              <t class="default">bij</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.4" class="WORD" set="tokconfig-nld">
+              <t class="default">het</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.5" class="WORD" set="tokconfig-nld">
+              <t class="default">inwilligen</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.6" class="WORD" set="tokconfig-nld">
+              <t class="default">van</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.7" class="WORD" set="tokconfig-nld">
+              <t class="default">een</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.8" class="WORD" set="tokconfig-nld">
+              <t class="default">nieuw</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.9" class="WORD" set="tokconfig-nld">
+              <t class="default">octroy</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.10" class="WORD" set="tokconfig-nld">
+              <t class="default">de</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.11" class="WORD" set="tokconfig-nld">
+              <t class="default">Compagnie</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.12" class="WORD" set="tokconfig-nld">
+              <t class="default">een</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.13" class="WORD" set="tokconfig-nld">
+              <t class="default">goede</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.14" class="WORD" set="tokconfig-nld">
+              <t class="default">somme</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.15" class="WORD" set="tokconfig-nld">
+              <t class="default">gelds</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.16" class="WORD" set="tokconfig-nld">
+              <t class="default">soude</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.17" class="WORD" set="tokconfig-nld">
+              <t class="default">contribueeren</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.18" class="WORD" set="tokconfig-nld">
+              <t class="default">tot</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.19" class="WORD" set="tokconfig-nld">
+              <t class="default">onderhoud</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.20" class="WORD" set="tokconfig-nld">
+              <t class="default">van</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.21" class="WORD" set="tokconfig-nld">
+              <t class="default">een</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.22" class="WORD" set="tokconfig-nld" space="no">
+              <t class="default">Seminarium</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.3.w.23" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">.</t>
+            </w>
+          </s>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4">
+            <t class="default">Het getal der predikanten in Indiën a°.</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.1" class="WORD" set="tokconfig-nld">
+              <t class="default">Het</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.2" class="WORD" set="tokconfig-nld">
+              <t class="default">getal</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.3" class="WORD" set="tokconfig-nld">
+              <t class="default">der</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.4" class="WORD" set="tokconfig-nld">
+              <t class="default">predikanten</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.5" class="WORD" set="tokconfig-nld">
+              <t class="default">in</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.6" class="WORD" set="tokconfig-nld">
+              <t class="default">Indiën</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.7" class="WORD" set="tokconfig-nld" space="no">
+              <t class="default">a</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.8" class="SYMBOL" set="tokconfig-nld" space="no">
+              <t class="default">°</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.4.w.9" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">.</t>
+            </w>
+          </s>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5">
+            <t class="default">1647 gebragt op ’t getal van 28.</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.1" class="NUMBER" set="tokconfig-nld">
+              <t class="default">1647</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.2" class="WORD" set="tokconfig-nld">
+              <t class="default">gebragt</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.3" class="WORD" set="tokconfig-nld">
+              <t class="default">op</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.4" class="WORD-TOKEN" set="tokconfig-nld">
+              <t class="default">’t</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.5" class="WORD" set="tokconfig-nld">
+              <t class="default">getal</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.6" class="WORD" set="tokconfig-nld">
+              <t class="default">van</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.7" class="NUMBER" set="tokconfig-nld" space="no">
+              <t class="default">28</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.5.w.8" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">.</t>
+            </w>
+          </s>
+          <s xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6">
+            <t class="default">Verdeelinge van deselve (blz. 12).</t>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.1" class="WORD" set="tokconfig-nld">
+              <t class="default">Verdeelinge</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.2" class="WORD" set="tokconfig-nld">
+              <t class="default">van</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.3" class="WORD" set="tokconfig-nld">
+              <t class="default">deselve</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.4" class="PUNCTUATION" set="tokconfig-nld" space="no">
+              <t class="default">(</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.5" class="ABBREVIATION-KNOWN" set="tokconfig-nld">
+              <t class="default">blz.</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.6" class="NUMBER" set="tokconfig-nld" space="no">
+              <t class="default">12</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.7" class="PUNCTUATION" set="tokconfig-nld" space="no">
+              <t class="default">)</t>
+            </w>
+            <w xml:id="TEI.1.text.1.front.1.div1.1.p.13.s.6.w.8" class="PUNCTUATION" set="tokconfig-nld">
+              <t class="default">.</t>
+            </w>
+          </s>
+        </p>
+    </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+
+
+    def test015_textwhitespace(self):
+        """Validation - Whitespace in text content sanity check"""
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="test" version="{version}" generator="{generator}">
+  <metadata type="native">
+    <annotations>
+    </annotations>
+  </metadata>
+  <text xml:id="test.text">
+      <s xml:id="test.s"><t>Dit
+         is een rare test.
+         </t>
+      </s>
+    </text>
+</FoLiA>""".format(version=folia.FOLIAVERSION, generator='pynlpl.formats.folia-v' + folia.LIBVERSION)
+        doc = folia.Document(string=xml, textvalidation=True)
+        self.assertEqual( doc['test.s'].text(), "Dit\n         is een rare test.\n         ")
 
 
-f = io.open(FOLIAPATH + '/test/example.xml', 'r',encoding='utf-8')
-FOLIAEXAMPLE = f.read()
-f.close()
+with io.open(FOLIAPATH + '/test/example.xml', 'r',encoding='utf-8') as foliaexample_f:
+    FOLIAEXAMPLE = foliaexample_f.read()
 
 #We cheat, by setting the generator and version attributes to match the library, so xmldiff doesn't complain when we compare against this reference
 FOLIAEXAMPLE = re.sub(r' version="[^"]*" generator="[^"]*"', ' version="' + folia.FOLIAVERSION + '" generator="pynlpl.formats.folia-v' + folia.LIBVERSION + '"', FOLIAEXAMPLE, re.MULTILINE)
diff --git a/pynlpl/tests/fql.py b/pynlpl/tests/fql.py
index 7bad0cd..47f2990 100755
--- a/pynlpl/tests/fql.py
+++ b/pynlpl/tests/fql.py
@@ -121,6 +121,7 @@ Qcorrect_split = "SUBSTITUTE w WITH text \"weer\" SUBSTITUTE w WITH text \"gegev
 
 Qsuggest_split = "SUBSTITUTE (AS CORRECTION OF \"http://raw.github.com/proycon/folia/master/setdefinitions/spellingcorrection.foliaset.xml\" WITH class \"runonerror\" SUGGESTION (SUBSTITUTE w WITH text \"weer\" SUBSTITUTE w WITH text \"gegeven\")) FOR SPAN ID \"WR-P-E-J-0000000001.p.1.s.6.w.20\""
 
+
 Qprepend = "PREPEND w WITH text \"heel\" FOR ID \"WR-P-E-J-0000000001.p.1.s.1.w.4\""
 Qcorrect_prepend = "PREPEND w WITH text \"heel\" (AS CORRECTION OF \"http://raw.github.com/proycon/folia/master/setdefinitions/spellingcorrection.foliaset.xml\" WITH class \"insertion\") FOR ID \"WR-P-E-J-0000000001.p.1.s.1.w.4\""
 
@@ -156,6 +157,10 @@ Qfeat2 = "EDIT feat WHERE subset = \"wvorm\" WITH class \"inf\" FOR pos WHERE cl
 Qfeat3 = "ADD feat WITH subset \"wvorm\" class \"inf\" FOR pos WHERE class = \"WW(inf,vrij,zonder)\" FOR ID \"WR-P-E-J-0000000001.p.1.s.2.w.28\""
 Qfeat4 = "EDIT feat WHERE subset = \"strength\" AND class = \"strong\"  WITH class \"verystrong\"  FOR ID \"WR-P-E-J-0000000001.text.sentiment.1\""
 
+Qdelete_correction = "DELETE correction ID \"correctionexample.s.1.w.2.correction.1\" RESTORE ORIGINAL RETURN ancestor-focus"
+Qdelete_structural_correction = "DELETE correction ID \"correctionexample.s.3.correction.1\" RESTORE ORIGINAL RETURN ancestor-focus"
+Qdelete_structural_correction2 = "DELETE correction ID \"correctionexample.s.3.correction.2\" RESTORE ORIGINAL RETURN ancestor-focus"
+
 
 class Test1UnparsedQuery(unittest.TestCase):
 
@@ -354,7 +359,7 @@ class Test3Evaluation(unittest.TestCase):
         q = fql.Query(Qedittext4)
         results = q(self.doc)
         self.assertEqual(results[0].text(), "ter\nwijl")
-        self.assertEqual(results[0].xmlstring(), "<w xmlns=\"http://ilk.uvt.nl/folia\" xml:id=\"WR-P-E-J-0000000001.p.1.s.8.w.9\"><t>ter\nwijl</t><errordetection class=\"spelling\"/><pos class=\"VG(onder)\"/><lemma class=\"terweil\"/></w>")
+        self.assertEqual(results[0].xmlstring(), "<w xmlns=\"http://ilk.uvt.nl/folia\" xml:id=\"WR-P-E-J-0000000001.p.1.s.8.w.9\"><t>ter\nwijl</t><errordetection class=\"spelling\"/><pos class=\"VG(onder)\" set=\"https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/frog-mbpos-cgn\"/><lemma class=\"terweil\"/></w>")
 
     def test13_subfilter(self):
         q = fql.Query(Qhas)
@@ -826,6 +831,8 @@ class Test3Evaluation(unittest.TestCase):
         self.assertEqual(list(results[0].annotation(folia.Headspan).wrefs()), [ results[0].doc['WR-P-E-J-0000000001.p.1.s.1.w.3'], results[0].doc['WR-P-E-J-0000000001.p.1.s.1.w.4'], results[0].doc['WR-P-E-J-0000000001.p.1.s.1.w.5'] ] )
         self.assertEqual(results[0].ancestor(folia.AbstractStructureElement).id,  'WR-P-E-J-0000000001.p.1.s.1')
 
+
+
 class Test4CQL(unittest.TestCase):
     def setUp(self):
         self.doc = folia.Document(string=FOLIAEXAMPLE)
@@ -961,6 +968,31 @@ class Test4Evaluation(unittest.TestCase):
         self.assertEqual(results[0].text(), '.')
         self.assertIsInstance(results[0].original()[0], folia.Correction)
 
+    def test4_delete_correction(self):
+        """Deleting a correction and restoring the original"""
+        q = fql.Query(Qdelete_correction)
+        results = q(self.doc)
+        self.assertIsInstance(results[0], folia.Word)
+        self.assertEqual(results[0].text(), "word")
+
+    def test4b_delete_structural_correction(self):
+        """Deleting a structural correction and restoring the original (runon error)"""
+        q = fql.Query(Qdelete_structural_correction)
+        results = q(self.doc)
+        self.assertIsInstance(results[0], folia.Sentence)
+        self.assertIsInstance(results[0][0], folia.Word)
+        self.assertEqual(results[0][0].text(), "Ikhoor")
+
+    def test4c_delete_structural_correction(self):
+        """Deleting a structural correction and restoring the original (split error)"""
+        q = fql.Query(Qdelete_structural_correction2)
+        results = q(self.doc)
+        self.assertIsInstance(results[0], folia.Sentence)
+        self.assertIsInstance(results[0][1], folia.Word)
+        self.assertIsInstance(results[0][2], folia.Word)
+        self.assertEqual(results[0][1].text(), "on")
+        self.assertEqual(results[0][2].text(), "weer")
+
 if os.path.exists('../../FoLiA'):
     FOLIAPATH = '../../FoLiA/'
 elif os.path.exists('../FoLiA'):
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4d3e20d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+lxml>=2.2
+httplib2>=0.6
+numpy
diff --git a/setup.cfg b/setup.cfg
index 8146594..7132698 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -11,5 +11,4 @@ upload-dir = ../docs/build/html
 [egg_info]
 tag_build = 
 tag_date = 0
-tag_svn_revision = 0
 
diff --git a/setup.py b/setup.py
index 5074c91..63eea0b 100755
--- a/setup.py
+++ b/setup.py
@@ -8,24 +8,6 @@ import os
 import sys
 from setuptools import setup, find_packages
 
-if os.path.dirname(__file__) != "":
-    os.chdir(os.path.dirname(__file__))
-if not os.path.exists('pynlpl'):
-    print("Preparing build",file=sys.stderr)
-    if os.path.exists('build'):
-        os.system('rm -Rf build')
-    os.mkdir('build')
-    os.chdir('build')
-    if not os.path.exists('pynlpl'): os.mkdir('pynlpl')
-    os.system('cp -Rpf ../* pynlpl/ 2> /dev/null')
-    os.system('mv -f pynlpl/setup.py pynlpl/setup.cfg .')
-    os.system('cp -f pynlpl/README.rst .')
-    os.system('cp -f pynlpl/LICENSE .')
-    os.system('cp -f pynlpl/MANIFEST.in .')
-
-    #Do not include unfininished WIP modules:
-    os.system('rm -f pynlpl/formats/colibri.py pynlpl/formats/alpino.py pynlpl/foliaprocessing.py pynlpl/grammar.py')
-
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
@@ -41,7 +23,7 @@ if sys.version > '3':
 
 setup(
     name = "PyNLPl",
-    version = "1.1.2", #edit version in __init__.py as well and ensure tests/folia.py FOLIARELEASE points to the right version!
+    version = "1.2.5", #edit version in __init__.py as well and ensure tests/folia.py FOLIARELEASE points to the right version and is not set to None!
     author = "Maarten van Gompel",
     author_email = "proycon at anaproy.nl",
     description = ("PyNLPl, pronounced as 'pineapple', is a Python library for Natural Language Processing. It contains various modules useful for common, and less common, NLP tasks. PyNLPl contains modules for basic tasks, clients for interfacting with server, and modules for parsing several file formats common in NLP, most notably FoLiA."),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-pynlpl.git



More information about the debian-science-commits mailing list