[med-svn] [kineticstools] 01/10: Imported Upstream version 0.5.2+dfsg

Afif Elghraoui afif at moszumanska.debian.org
Sun Jul 3 03:37:05 UTC 2016


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository kineticstools.

commit 5ef124ee9a0246d656717c290b105f1cde5a8701
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Sat Jul 2 15:21:19 2016 -0700

    Imported Upstream version 0.5.2+dfsg
---
 .circleci/installHDF5.sh                          | 14 +++++
 circle.yml                                        | 18 ++++++
 kineticsTools/ipdSummary.py                       | 68 +++++++++++------------
 kineticsTools/summarizeModifications.py           | 18 +++---
 requirements-ci.txt                               | 12 ++++
 requirements-dev.txt                              |  3 +
 setup.py                                          |  6 +-
 test/cram/case-ctrl.t                             |  2 +-
 test/cram/detection.t                             |  2 +-
 test/cram/detection_bam.t                         |  8 +--
 test/cram/detection_bam_dataset.t                 |  8 +--
 test/cram/detection_bam_lossless.t                |  6 +-
 test/cram/identify.t                              |  2 +-
 test/cram/long_running/README.txt                 |  6 +-
 test/cram/long_running/detect_and_identify_Bsub.t |  4 +-
 test/cram/long_running/detect_and_identify_Cagg.t |  2 +-
 test/cram/long_running/detect_and_identify_Hpyl.t |  4 +-
 test/cram/long_running/detect_and_identify_Mjan.t |  2 +-
 test/cram/long_running/run_jenkins.sh             | 19 +++++++
 test/cram/methyl-fraction-case-ctrl.t             |  2 +-
 test/cram/version.t                               |  9 +--
 test/test_ReferenceUtils.py                       |  4 +-
 test/test_inputs.py                               | 60 ++++++++++++++++++--
 test/test_tool_contract.py                        |  6 +-
 24 files changed, 199 insertions(+), 86 deletions(-)

diff --git a/.circleci/installHDF5.sh b/.circleci/installHDF5.sh
new file mode 100644
index 0000000..3e7ab39
--- /dev/null
+++ b/.circleci/installHDF5.sh
@@ -0,0 +1,14 @@
+set -x
+set -e
+if [ ! -e prefix/lib/libhdf5.so ]; then
+  wget https://www.hdfgroup.org/ftp/HDF5//releases/hdf5-1.8.12/src/hdf5-1.8.12.tar.gz
+  tar xzf hdf5-1.8.12.tar.gz
+  mkdir -p prefix
+  PREFIX=$PWD/prefix
+  cd hdf5-1.8.12
+  ./configure --prefix=$PREFIX
+  make
+  make install
+else
+    echo "HDF5 build/install already completed!"
+fi
diff --git a/circle.yml b/circle.yml
new file mode 100644
index 0000000..a1e6411
--- /dev/null
+++ b/circle.yml
@@ -0,0 +1,18 @@
+machine:
+  python:
+    version: 2.7.6
+
+dependencies:
+  # We need to manually build (and cache) a more modern libhdf5 than
+  # ubuntu precise makes available via apt---there are bugs that
+  # affect us, in libhdf5 1.8.4
+  cache_directories:
+    - .circleci/prefix
+  pre:
+    - (cd .circleci && bash installHDF5.sh)
+    - HDF5_DIR=$PWD/.circleci/prefix pip install -r requirements-ci.txt
+    - HDF5_DIR=$PWD/.circleci/prefix pip install -r requirements-dev.txt
+    
+test:
+  override:
+    - make test # Run doctests in addition to the usual unit tests
diff --git a/kineticsTools/ipdSummary.py b/kineticsTools/ipdSummary.py
index 783d251..5c52c54 100755
--- a/kineticsTools/ipdSummary.py
+++ b/kineticsTools/ipdSummary.py
@@ -50,6 +50,7 @@ import Queue
 import traceback
 from pkg_resources import Requirement, resource_filename
 
+from pbcommand.common_options import add_debug_option
 from pbcommand.models import FileTypes, SymbolTypes, get_pbparser
 from pbcommand.cli import pbparser_runner
 from pbcommand.utils import setup_log
@@ -114,30 +115,33 @@ def get_parser():
         description=__doc__,
         driver_exe=Constants.DRIVER_EXE,
         is_distributed=True,
-        nproc=SymbolTypes.MAX_NPROC)
+        nproc=SymbolTypes.MAX_NPROC,
+        default_level="WARN")
     p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_set",
         "Alignment DataSet", "BAM or Alignment DataSet")
+    tcp = p.tool_contract_parser
     # FIXME just use a positional argument...
-    p.tool_contract_parser.add_input_file_type(FileTypes.DS_REF, "reference",
+    tcp.add_input_file_type(FileTypes.DS_REF, "reference",
         "Reference DataSet", "Fasta or Reference DataSet")
-    p.arg_parser.parser.add_argument("--reference", action="store",
+    argp = p.arg_parser.parser
+    argp.add_argument("--reference", action="store",
         required=True,
         type=validateFile, help="Fasta or Reference DataSet")
     # XXX GFF and CSV are "option" for arg parser, not tool contract
-    p.tool_contract_parser.add_output_file_type(FileTypes.GFF, "gff",
+    tcp.add_output_file_type(FileTypes.GFF, "gff",
         name="GFF file",
         description="GFF file of modified bases",
-        default_name="basemods.gff")
-    p.tool_contract_parser.add_output_file_type(FileTypes.CSV, "csv",
+        default_name="basemods")
+    tcp.add_output_file_type(FileTypes.CSV, "csv",
         name="CSV file",
         description="CSV file of per-nucleotide information",
-        default_name="basemods.csv")
-    p.arg_parser.parser.add_argument("--gff", action="store", default=None,
+        default_name="basemods")
+    argp.add_argument("--gff", action="store", default=None,
         help="Output GFF file of modified bases")
-    p.arg_parser.parser.add_argument("--csv", action="store", default=None,
+    argp.add_argument("--csv", action="store", default=None,
         help="Output CSV file out per-nucleotide information")
     # FIXME use central --nproc option
-    p.arg_parser.parser.add_argument('--numWorkers', '-j',
+    argp.add_argument('--numWorkers', '-j',
         dest='numWorkers',
         default=1,
         type=int,
@@ -152,25 +156,33 @@ def get_parser():
         default=Constants.MAX_LENGTH_DEFAULT,
         name="Max sequence length",
         description="Maximum number of bases to process per contig")
-    p.add_str(Constants.IDENTIFY_ID,
+    tcp.add_str(Constants.IDENTIFY_ID,
         option_str="identify",
         default="",
         name="Identify basemods",
         description="Specific modifications to identify (comma-separated "+\
+            "list).  Currrent options are m6A and/or m4C.")
+    argp.add_argument(
+        "--identify",
+        action="store",
+        default="",
+        help="Specific modifications to identify (comma-separated "+\
             "list).  Currrent options are m6A, m4C, m5C_TET.  Cannot be "+\
             "used with --control.")
     _DESC = "In the --identify mode, add --methylFraction to "+\
             "command line to estimate the methylated fraction, along with "+\
             "95%% confidence interval bounds."
     # FIXME tool contract parser and argparser conflict
-    p.tool_contract_parser.add_boolean(Constants.METHYL_FRACTION_ID,
+    tcp.add_boolean(Constants.METHYL_FRACTION_ID,
         option_str="methylFraction",
         default=False,
         name="Compute methyl fraction",
-        description=_DESC)
-    p.arg_parser.parser.add_argument("--methylFraction", action="store_true",
+        description="When identifying specific modifications (m4C and/or "+
+                    "m6A), enabling this option will estimate the methylated "+
+                    "fraction, along with 95% confidence interval bounds.")
+    argp.add_argument("--methylFraction", action="store_true",
         help=_DESC)
-    _get_more_options(p.arg_parser.parser)
+    _get_more_options(argp)
     return p
 
 def _get_more_options(parser):
@@ -351,11 +363,7 @@ def _get_more_options(parser):
                         default=False,
                         help="Enable Python-level profiling (using cProfile).")
 
-    parser.add_argument('--usePdb',
-                        action='store_true',
-                        dest="usePdb",
-                        default=False,
-                        help="Enable dropping down into pdb debugger if an Exception is raised.")
+    add_debug_option(parser)
 
     parser.add_argument("--seed",
                         action="store",
@@ -364,10 +372,6 @@ def _get_more_options(parser):
                         default=None,
                         help="Random seed (for development and debugging purposes only)")
 
-    # Verbosity
-    parser.add_argument("--verbose",
-                        action="store_true",
-                        default=False)
     return parser
 
 
@@ -694,11 +698,6 @@ def monitorChildProcesses(children):
         time.sleep(1)
 
 def args_runner(args):
-    log = logging.getLogger()
-    if args.verbose:
-        log.setLevel(logging.INFO)
-    else:
-        log.setLevel(logging.WARN)
     kt = KineticsToolsRunner(args)
     return kt.start()
 
@@ -742,19 +741,16 @@ def resolved_tool_contract_runner(resolved_contract):
     return args_runner(args_)
 
 def main(argv=sys.argv, out=sys.stdout):
-    # Log generously
-    logFormat = '%(asctime)s [%(levelname)s] %(message)s'
-    logging.basicConfig(format=logFormat, level=logging.WARN)
-    stdOutHandler = logging.StreamHandler(sys.stdout)
-    log = logging.getLogger()
+    setup_log_ = functools.partial(setup_log,
+        str_formatter='%(asctime)s [%(levelname)s] %(message)s')
     try:
         return pbparser_runner(
             argv=argv[1:],
             parser=get_parser(),
             args_runner_func=args_runner,
             contract_runner_func=resolved_tool_contract_runner,
-            alog=log,
-            setup_log_func=setup_log)
+            alog=logging.getLogger(__name__),
+            setup_log_func=setup_log_)
     # FIXME is there a more central place to deal with this?
     except Exception as e:
         type, value, tb = sys.exc_info()
diff --git a/kineticsTools/summarizeModifications.py b/kineticsTools/summarizeModifications.py
index 59c6257..deb2768 100755
--- a/kineticsTools/summarizeModifications.py
+++ b/kineticsTools/summarizeModifications.py
@@ -35,13 +35,13 @@ Summarizes kinetic modifications in the alignment_summary.gff file.
 
 import cProfile
 from itertools import groupby
+import functools
 import os
 import logging
 import sys
 
 from pbcommand.models import FileTypes, get_pbparser
 from pbcommand.cli import pbparser_runner
-from pbcommand.common_options import add_debug_option
 from pbcommand.utils import setup_log
 from pbcore.io import GffReader, Gff3Record
 
@@ -169,7 +169,8 @@ def get_parser():
         version=__version__,
         name=Constants.TOOL_ID,
         description=__doc__,
-        driver_exe=Constants.DRIVER_EXE)
+        driver_exe=Constants.DRIVER_EXE,
+        default_level="INFO")
     p.add_input_file_type(FileTypes.GFF, "modifications",
         name="GFF file",
         description="Base modification GFF file")
@@ -179,23 +180,20 @@ def get_parser():
     p.add_output_file_type(FileTypes.GFF, "gff_out",
         name="GFF file",
         description="Modified alignment summary file",
-        default_name="alignment_summary_with_basemods.gff")
+        default_name="alignment_summary_with_basemods")
     return p
 
 def main(argv=sys.argv):
     mp = get_parser()
-    logFormat = '%(asctime)s [%(levelname)s] %(message)s'
-    logging.basicConfig(level=logging.INFO, format=logFormat)
-    stdOutHandler = logging.StreamHandler(sys.stdout)
-    logging.Logger.root.addHandler(stdOutHandler)
-    log = logging.getLogger()
+    setup_log_ = functools.partial(setup_log,
+        str_formatter='%(asctime)s [%(levelname)s] %(message)s')
     return pbparser_runner(
         argv=argv[1:],
         parser=mp,
         args_runner_func=args_runner,
         contract_runner_func=resolved_tool_contract_runner,
-        alog=log,
-        setup_log_func=setup_log)
+        alog=logging.getLogger(__name__),
+        setup_log_func=setup_log_)
 
 if __name__ == "__main__":
     main()
diff --git a/requirements-ci.txt b/requirements-ci.txt
new file mode 100644
index 0000000..a43e0c4
--- /dev/null
+++ b/requirements-ci.txt
@@ -0,0 +1,12 @@
+cython
+numpy
+h5py
+jinja2
+networkx
+jsonschema
+xmlbuilder
+functools32
+pyxb
+# Install from github
+-e git://github.com/PacificBiosciences/pbcore.git@master#egg=pbcore
+-e git://github.com/PacificBiosciences/pbcommand.git#egg=pbcommand
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..80e9144
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,3 @@
+-r requirements.txt
+sphinx
+nose
diff --git a/setup.py b/setup.py
index 91c87bd..bc1f56b 100755
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ import sys
 
 setup(
     name='kineticsTools',
-    version='0.5.1',
+    version='0.5.2',
     author='Pacific Biosciences',
     author_email='devnet at pacificbiosciences.com',
     license=open('LICENSES.txt').read(),
@@ -16,11 +16,11 @@ setup(
                            export_symbols=["innerPredict", "innerPredictCtx", "init_native"])],
     zip_safe=False,
     install_requires=[
-        'pbcore >= 1.2.2',
+        'pbcore >= 1.2.8',
         'numpy >= 1.6.0',
         'h5py >= 1.3.0',
         'scipy >= 0.9.0',
-        'pbcommand >= 0.2.0',
+        'pbcommand >= 0.3.22',
     ],
     entry_points={'console_scripts': [
         "ipdSummary = kineticsTools.ipdSummary:main",
diff --git a/test/cram/case-ctrl.t b/test/cram/case-ctrl.t
index 7442a3b..6e707a6 100644
--- a/test/cram/case-ctrl.t
+++ b/test/cram/case-ctrl.t
@@ -10,7 +10,7 @@ Load in data:
 
 Run basic ipdSummary:
 
-  $ ipdSummary --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
+  $ ipdSummary --log-level=WARNING --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
 
 Look at output csv file:
 
diff --git a/test/cram/detection.t b/test/cram/detection.t
index b8d2fe6..822ca0b 100644
--- a/test/cram/detection.t
+++ b/test/cram/detection.t
@@ -10,7 +10,7 @@ Load in data:
 
 Run basic ipdSummary:
 
-  $ ipdSummary --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
+  $ ipdSummary --log-level=WARNING --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
 
 Look at output csv file:
 
diff --git a/test/cram/detection_bam.t b/test/cram/detection_bam.t
index 0724a2d..77de087 100644
--- a/test/cram/detection_bam.t
+++ b/test/cram/detection_bam.t
@@ -4,13 +4,13 @@ Test detection and identification modes of ipdSummary using .bam file as input.
 
 Load in data:
 
-  $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+  $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ INPUT=$DATA/Hpyl_1_5000.bam
-  $ REFERENCE=/mnt/secondary-siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
+  $ REFERENCE=/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
 
 Run basic ipdSummary:
 
-  $ ipdSummary --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+  $ ipdSummary --log-level=WARNING --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
 
 Look at output csv file:
 
@@ -50,7 +50,7 @@ Look at output gff file:
 
 Now try limiting the number of alignments:
 
-  $ ipdSummary --gff tmp2.gff --csv tmp2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --maxAlignments 100 --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+  $ ipdSummary --log-level=WARNING --gff tmp2.gff --csv tmp2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --maxAlignments 100 --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
 
   $ N_DIFF=`diff tmp1.gff tmp2.gff | wc --lines`
   $ python -c "assert 100 < ${N_DIFF}, ${N_DIFF}"
diff --git a/test/cram/detection_bam_dataset.t b/test/cram/detection_bam_dataset.t
index 11c070b..95ec106 100644
--- a/test/cram/detection_bam_dataset.t
+++ b/test/cram/detection_bam_dataset.t
@@ -4,13 +4,13 @@ Test detection and identification modes of ipdSummary using .xml dataset file as
 
 Load in data:
 
-  $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+  $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ INPUT=$DATA/Hpyl_1_5000.xml
-  $ REFERENCE=/mnt/secondary-siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
+  $ REFERENCE=/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
 
 Run basic ipdSummary:
 
-  $ ipdSummary --outfile tmp_xml1 --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+  $ ipdSummary --log-level=WARNING --outfile tmp_xml1 --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
 
 Look at output csv file:
 
@@ -51,6 +51,6 @@ Look at output gff file:
 Now try with a split dataset:
 
   $ INPUT=$DATA/Hpyl_1_5000_split.xml
-  $ ipdSummary --gff tmp_xml2.gff --csv tmp_xml2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+  $ ipdSummary --log-level=WARNING --gff tmp_xml2.gff --csv tmp_xml2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
   $ linecount tmp_xml2.gff
   274
diff --git a/test/cram/detection_bam_lossless.t b/test/cram/detection_bam_lossless.t
index 3f0c610..7beecd1 100644
--- a/test/cram/detection_bam_lossless.t
+++ b/test/cram/detection_bam_lossless.t
@@ -4,14 +4,14 @@ Test detection and identification modes of ipdSummary using .bam file as input,
 
 Load in data:
 
-  $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+  $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ INPUT=$DATA/Mjan_1_5000_lossless.bam
-  $ export REF_DIR=/mnt/secondary-siv/references
+  $ export REF_DIR=/pbi/dept/secondary/siv/references
   $ export REF_SEQ=${REF_DIR}/Methanocaldococcus_jannaschii_DSM2661/sequence/Methanocaldococcus_jannaschii_DSM2661.fasta
 
 Run basic ipdSummary:
 
-  $ ipdSummary --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REF_SEQ $INPUT
+  $ ipdSummary --log-level=WARNING --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REF_SEQ $INPUT
 
 Look at output csv file:
 
diff --git a/test/cram/identify.t b/test/cram/identify.t
index da19d43..442d130 100644
--- a/test/cram/identify.t
+++ b/test/cram/identify.t
@@ -10,7 +10,7 @@ Load in data:
 
 Run basic ipdSummary:
 
-  $ ipdSummary --numWorkers 1 --pvalue 0.001 --identify m6A,m4C --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
+  $ ipdSummary --log-level=WARNING --numWorkers 1 --pvalue 0.001 --identify m6A,m4C --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
 
 Look at output csv file:
 
diff --git a/test/cram/long_running/README.txt b/test/cram/long_running/README.txt
index 6a19abc..1e489e6 100644
--- a/test/cram/long_running/README.txt
+++ b/test/cram/long_running/README.txt
@@ -1,6 +1,6 @@
-====================================================
-README for /mnt/secondary-siv/testdata/kineticsTools
-====================================================
+=========================================================
+README for /pbi/dept/secondary/siv/testdata/kineticsTools
+=========================================================
 
 Most of these files are derived from Tyson Clark's P6 chemistry validation
 experiments.  Bsub is an amplified control.
diff --git a/test/cram/long_running/detect_and_identify_Bsub.t b/test/cram/long_running/detect_and_identify_Bsub.t
index 7b7b439..3e4c882 100644
--- a/test/cram/long_running/detect_and_identify_Bsub.t
+++ b/test/cram/long_running/detect_and_identify_Bsub.t
@@ -4,9 +4,9 @@ Run base modification detection on B. subtilis P6 chemistry validation data
 
   $ . $TESTDIR/../portability.sh
 
-  $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+  $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ export BAMFILE=${DATA_DIR}/Bsub_aligned.subreads.bam
-  $ export REF_DIR=/mnt/secondary-siv/references
+  $ export REF_DIR=/pbi/dept/secondary/siv/references
   $ export REF_SEQ=${REF_DIR}/B_subtilis_strW23/sequence/B_subtilis_strW23.fasta
 
   $ ipdSummary ${BAMFILE} --reference ${REF_SEQ} --gff tst_Bsub.gff --csv tst_Bsub.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C
diff --git a/test/cram/long_running/detect_and_identify_Cagg.t b/test/cram/long_running/detect_and_identify_Cagg.t
index aac1d6c..a509ed8 100644
--- a/test/cram/long_running/detect_and_identify_Cagg.t
+++ b/test/cram/long_running/detect_and_identify_Cagg.t
@@ -4,7 +4,7 @@ Run base modification detection on C. aggregans P6 chemistry validation data
 
   $ . $TESTDIR/../portability.sh
 
-  $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+  $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ export BAMFILE=${DATA_DIR}/Cagg_aligned.subreads.bam
   $ export REF_DIR=/mnt/secondary/Smrtanalysis/current/common/references
   $ export REF_SEQ=${REF_DIR}/Chloroflexus_aggregans_DSM9485/sequence/Chloroflexus_aggregans_DSM9485.fasta
diff --git a/test/cram/long_running/detect_and_identify_Hpyl.t b/test/cram/long_running/detect_and_identify_Hpyl.t
index 8067142..97c409f 100644
--- a/test/cram/long_running/detect_and_identify_Hpyl.t
+++ b/test/cram/long_running/detect_and_identify_Hpyl.t
@@ -3,9 +3,9 @@ Run base modification detection on H. pylori P6 chemistry validation data.
 
   $ . $TESTDIR/../portability.sh
 
-  $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+  $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ export BAMFILE=${DATA_DIR}/Hpyl_aligned.subreads.bam
-  $ export REF_DIR=/mnt/secondary-siv/references
+  $ export REF_DIR=/pbi/dept/secondary/siv/references
   $ export REF_SEQ=${REF_DIR}/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
 
   $ ipdSummary ${BAMFILE} --reference ${REF_SEQ} --gff tst_Hpyl.gff --csv tst_Hpyl.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C
diff --git a/test/cram/long_running/detect_and_identify_Mjan.t b/test/cram/long_running/detect_and_identify_Mjan.t
index 3c5f9e1..3a44f42 100644
--- a/test/cram/long_running/detect_and_identify_Mjan.t
+++ b/test/cram/long_running/detect_and_identify_Mjan.t
@@ -3,7 +3,7 @@ Run base modification detection on M. jannaschii P6 chemistry validation data.
 
   $ . $TESTDIR/../portability.sh
 
-  $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+  $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
   $ export BAMFILE=${DATA_DIR}/Mjan_aligned.subreads.bam
   $ export REF_DIR=/mnt/secondary/Smrtanalysis/current/common/references
   $ export REF_SEQ=${REF_DIR}/Methanocaldococcus_jannaschii_DSM2661/sequence/Methanocaldococcus_jannaschii_DSM2661.fasta
diff --git a/test/cram/long_running/run_jenkins.sh b/test/cram/long_running/run_jenkins.sh
new file mode 100755
index 0000000..07356fa
--- /dev/null
+++ b/test/cram/long_running/run_jenkins.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+BASE_PATH=$1
+XML_DEST=$2
+
+if [ -z "${BASE_PATH}" ] || [ ! -d "${BASE_PATH}" ]; then
+  echo "Base path required as first argument"
+  exit 1
+fi
+if [ -z "${XML_DEST}" ] || [ -d "${XML_DEST}" ]; then
+  echo "XML output file required as first argument"
+  exit 1
+fi
+rm -f ${XML_DEST}
+
+cd ${BASE_PATH}
+virtualenv ${BASE_PATH}/venv
+${BASE_PATH}/venv/bin/pip install CramUnit
+${BASE_PATH}/venv/bin/python ${BASE_PATH}/venv/bin/run_cram_unit.py -x ${XML_DEST} ${BASE_PATH}/tests/cram/long_running
diff --git a/test/cram/methyl-fraction-case-ctrl.t b/test/cram/methyl-fraction-case-ctrl.t
index 25e9fd1..c63e719 100644
--- a/test/cram/methyl-fraction-case-ctrl.t
+++ b/test/cram/methyl-fraction-case-ctrl.t
@@ -10,7 +10,7 @@ Load in data:
 
 Run basic ipdSummary:
 
-  $ ipdSummary --numWorkers 1 --methylFraction --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
+  $ ipdSummary --log-level=WARNING --numWorkers 1 --methylFraction --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
 
 Look at output csv file:
 
diff --git a/test/cram/version.t b/test/cram/version.t
index 0dd4ff3..ef00278 100644
--- a/test/cram/version.t
+++ b/test/cram/version.t
@@ -4,9 +4,10 @@ A simple test of the version and help options:
   2.2
 
   $ ipdSummary
-  usage: ipdSummary [-h] [-v] [--emit-tool-contract]
+  usage: ipdSummary [-h] [--version] [--emit-tool-contract]
                     [--resolved-tool-contract RESOLVED_TOOL_CONTRACT]
-                    [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [--debug]
+                    [--log-file LOG_FILE]
+                    [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL} | --debug | --quiet | -v]
                     --reference REFERENCE [--gff GFF] [--csv CSV]
                     [--numWorkers NUMWORKERS] [--pvalue PVALUE]
                     [--maxLength MAXLENGTH] [--identify IDENTIFY]
@@ -25,8 +26,8 @@ A simple test of the version and help options:
                     [--refContigIndex REFCONTIGINDEX]
                     [-W REFERENCEWINDOWSASSTRING]
                     [--skipUnrecognizedContigs SKIPUNRECOGNIZEDCONTIGS]
-                    [--alignmentSetRefWindows] [--threaded] [--profile]
-                    [--usePdb] [--seed RANDOMSEED] [--verbose]
+                    [--alignmentSetRefWindows] [--threaded] [--profile] [--pdb]
+                    [--seed RANDOMSEED]
                     alignment_set
   ipdSummary: error: too few arguments
   [2]
diff --git a/test/test_ReferenceUtils.py b/test/test_ReferenceUtils.py
index 1c204b4..1d6d3cb 100644
--- a/test/test_ReferenceUtils.py
+++ b/test/test_ReferenceUtils.py
@@ -6,8 +6,8 @@ import os.path
 from kineticsTools.ReferenceUtils import ReferenceUtils
 from pbcore.io import AlignmentSet
 
-big_data_dir = "/mnt/secondary-siv/testdata/kineticsTools"
-ref_dir = "/mnt/secondary-siv/references"
+big_data_dir = "/pbi/dept/secondary/siv/testdata/kineticsTools"
+ref_dir = "/pbi/dept/secondary/siv/references"
 
 logging.basicConfig()
 log = logging.getLogger()
diff --git a/test/test_inputs.py b/test/test_inputs.py
index 6c2ca5a..ea699bf 100644
--- a/test/test_inputs.py
+++ b/test/test_inputs.py
@@ -16,9 +16,11 @@ logging.basicConfig()
 log = logging.getLogger()
 
 # FIXME
-data_dir = "/mnt/secondary-siv/testdata/kineticsTools"
+data_dir = "/pbi/dept/secondary/siv/testdata/kineticsTools"
 
 class _TestBase(object):
+    MAX_ALIGNMENTS = 1500
+
     """
     Common test functionality.  All input type tests should inherit from this,
     and yield identical results.
@@ -30,6 +32,7 @@ class _TestBase(object):
 
     def basicOpts(self):
         """Mock up some options for the kinetic worker"""
+        self_ = self
         class opts:
             def __init__(self):
                 self.mapQvThreshold = -1
@@ -45,7 +48,7 @@ class _TestBase(object):
                 self.identifyMinCov = 5
                 self.methylMinCov = 10
                 self.useLDA = False
-                self.maxAlignments = 1500
+                self.maxAlignments = self_.MAX_ALIGNMENTS
                 self.randomSeed = None
         return opts()
 
@@ -53,7 +56,7 @@ class _TestBase(object):
         raise NotImplementedError()
 
     def getReference (self):
-        refDir = "/mnt/secondary-siv/references"
+        refDir = "/pbi/dept/secondary/siv/references"
         return os.path.join(refDir, "Helicobacter_pylori_J99", "sequence",
             "Helicobacter_pylori_J99.fasta")
 
@@ -100,7 +103,7 @@ class _TestBase(object):
         chunks = self.kw._chunkRawIpds(rawIpds)
         #log.critical(chunks)
 
-    def testSmallDecode (self):
+    def test_small_decode (self):
         """Test for known modifications near the start of H. pylori genome"""
         # XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A)
         start = 50
@@ -143,5 +146,54 @@ class TestSplitDataset(_TestBase, unittest.TestCase):
         return os.path.join(data_dir, "Hpyl_1_5000_split.xml")
 
 
+ at unittest.skipUnless(os.path.isdir(data_dir), "Missing test data directory")
+class TestChunkedDataset(_TestBase, unittest.TestCase):
+
+    def getAlignments(self):
+        return os.path.join(data_dir, "Hpyl_1_5000_chunk.xml")
+
+    @unittest.skip
+    def test_private_api(self):
+        pass
+
+    def test_small_decode(self):
+        start = 985
+        end = 1065
+        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
+        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
+        bounds = (start, end)
+
+        self.kw._prepForReferenceWindow(referenceWindow)
+        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
+        mods = self.kw._decodePositiveControl(kinetics, bounds)
+        self.assertEqual(len(mods), 4)
+
+
+ at unittest.skipUnless(os.path.isdir(data_dir), "Missing test data directory")
+class TestNonStochastic(TestBam): #_TestBase, unittest.TestCase):
+    # XXX force this down to trigger RNG
+    MAX_ALIGNMENTS = 150
+
+    @unittest.skip
+    def test_private_api(self):
+        pass
+
+    def test_small_decode(self):
+        start = 50
+        end = 100
+        REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
+        referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
+        bounds = (start, end)
+        self.kw._prepForReferenceWindow(referenceWindow)
+        kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
+        # XXX note that this is very dependent on the exact order of reads
+        # found by readsInRange(), which may be altered by changes to the
+        # implementation of the dataset API.  It should be immune to stochastic
+        # effects, however.
+        self.assertEqual("%.5f" % kinetics[0]['ipdRatio'], "1.06460")
+        mods = self.kw._decodePositiveControl(kinetics, bounds)
+        self.assertEqual(len(mods), 3)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_tool_contract.py b/test/test_tool_contract.py
index f7dc385..5056b24 100755
--- a/test/test_tool_contract.py
+++ b/test/test_tool_contract.py
@@ -13,8 +13,8 @@ import pbcommand.testkit
 
 os.environ["PACBIO_TEST_ENV"] = "1" # turns off --verbose
 
-DATA_DIR = "/mnt/secondary-siv/testdata/kineticsTools"
-REF_DIR = "/mnt/secondary-siv/references/Helicobacter_pylori_J99"
+DATA_DIR = "/pbi/dept/secondary/siv/testdata/kineticsTools"
+REF_DIR = "/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99"
 
 
 class Constants(object):
@@ -86,7 +86,7 @@ class TestIpdSummaryChunk(TestIpdSummary):
         gff_file = os.path.join(output_dir, rtc.task.output_files[0])
         csv_file = os.path.join(output_dir, rtc.task.output_files[1])
         logging.critical(gff_file)
-        logging.critical(csv_file)
+        logging.critical("%s %s" % (csv_file, os.path.getsize(csv_file)))
         with open(csv_file) as f:
             records = [ r for r in csv.DictReader(f) ]
             logging.critical("start=%s end=%s" % (records[0]['tpl'],

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/kineticstools.git



More information about the debian-med-commit mailing list