[med-svn] [fastaq] 01/01: new upstream

Jorge Soares jssoares-guest at moszumanska.debian.org
Tue Nov 18 16:17:28 UTC 2014


This is an automated email from the git hooks/post-receive script.

jssoares-guest pushed a commit to branch master
in repository fastaq.

commit 35b6b76c8a48c9e8702763abee5fef92c9dbc718
Author: Jorge Soares <j.s.soares at gmail.com>
Date:   Tue Nov 18 16:16:56 2014 +0000

    new upstream
---
 fastaq/tasks.py                                    | 31 +++++++++++-----------
 ...sequences_test_fastaq_to_quasr_primers.expected |  2 --
 .../data/sequences_test_fastaq_to_quasr_primers.fa |  4 ---
 fastaq/tests/data/tasks_test_sequence_trim_1.fa    | 24 ++++++++++++-----
 .../data/tasks_test_sequence_trim_1.trimmed.fa     | 14 ++++++----
 fastaq/tests/data/tasks_test_sequence_trim_2.fa    | 24 ++++++++++++-----
 .../data/tasks_test_sequence_trim_2.trimmed.fa     | 14 ++++++----
 fastaq/tests/data/tasks_test_sequences_to_trim.fa  |  8 ++----
 fastaq/tests/tasks_test.py                         | 11 +-------
 scripts/fastaq_sequence_trim                       |  4 ++-
 scripts/fastaq_to_quasr_primers_file               | 12 ---------
 setup.py                                           |  2 +-
 12 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/fastaq/tasks.py b/fastaq/tasks.py
index 068a640..1a7d378 100644
--- a/fastaq/tasks.py
+++ b/fastaq/tasks.py
@@ -467,10 +467,16 @@ def search_for_seq(infile, outfile, search_string):
     utils.close(fout)
 
 
-def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50):
-    trim_seqs = {}
-    file_to_dict(to_trim_file, trim_seqs)
-    trim_seqs = [x.seq for x in trim_seqs.values()]
+def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_length=50, check_revcomp=False):
+    to_trim_seqs = {}
+    file_to_dict(to_trim_file, to_trim_seqs)
+    trim_seqs = [x.seq for x in to_trim_seqs.values()]
+    if check_revcomp:
+        for seq in to_trim_seqs.values():
+            seq.revcomp()
+        trim_seqs_revcomp = [x.seq for x in to_trim_seqs.values()]
+    else:
+        trim_seqs_revcomp = []
 
     seq_reader_1 = sequences.file_reader(infile_1)
     seq_reader_2 = sequences.file_reader(infile_2)
@@ -490,6 +496,11 @@ def sequence_trim(infile_1, infile_2, outfile_1, outfile_2, to_trim_file, min_le
                     seq.trim(len(trim_seq),0)
                     break
 
+            for trim_seq in trim_seqs_revcomp:
+                if seq.seq.endswith(trim_seq):
+                    seq.trim(0,len(trim_seq))
+                    break
+
         if len(seq_1) >= min_length and len(seq_2) >= min_length:
             print(seq_1, file=f_out_1)
             print(seq_2, file=f_out_2)
@@ -679,18 +690,6 @@ def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False
     sequences.Fasta.line_length = original_line_length
 
 
-def to_quasr_primers(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq2 = copy.copy(seq)
-        seq2.revcomp()
-        print(seq.seq, seq2.seq, sep='\t', file=f_out)
-
-    utils.close(f_out)
-
-
 def to_fasta_union(infile, outfile, seqname='union'):
     seq_reader = sequences.file_reader(infile)
     new_seq = []
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
deleted file mode 100644
index 88ce837..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-ACGT	ACGT
-AG	CT
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
deleted file mode 100644
index be7c130..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-ACGT
->2
-AG
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
index 28f665b..ac2ff83 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.fa
@@ -1,12 +1,24 @@
 >1/1
-TRIM1GCTCGAGCT
+1234567890
 >2/1
-TRIM1AGCTAGCTAG
+AACG123456789
 >3/1
-CGCTAGCTAG
+1234567890
 >4/1
-TRIM2AGCTAGCTAG
+AACG1234567890
 >5/1
-AGCTAGCTAG
+1234567890
 >6/1
-TRIM4AGCTAGCTAG
+AACG1234567890
+>7/1
+123456789AGGC
+>8/1
+123456789
+>9/1
+1234567890AGGC
+>10/1
+AACG123456789CGTT
+>11/1
+AACG1234567890CGTT
+>12/1
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
index 0bebad8..0512244 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_1.trimmed.fa
@@ -1,8 +1,12 @@
->3/1
-CGCTAGCTAG
+>1/1
+1234567890
 >4/1
-AGCTAGCTAG
+1234567890
 >5/1
-AGCTAGCTAG
+1234567890
 >6/1
-AGCTAGCTAG
+1234567890
+>9/1
+1234567890
+>12/1
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
index 7514250..cf3e872 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.fa
@@ -1,12 +1,24 @@
 >1/2
-TRIM1ACGTACGTAC
+1234567890
 >2/2
-TRIM2ACGTAGTGA
+1234567890
 >3/2
-ACGCTGCAGTCAGTCAGTAT
+AACG123456789
 >4/2
-TRIM3CGATCGATCG
+1234567890
 >5/2
-TRIM3CGATCGATCG
+AACG1234567890
 >6/2
-CGATCGATCG
+GCCT1234567890
+>7/2
+1234567890
+>8/2
+123456789AGGC
+>9/2
+1234567890CGTT
+>10/2
+AACG1234567890CGTT
+>11/2
+AACG123456789CGTT
+>12/2
+AACG1234567890CGTT
diff --git a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
index ec80f40..432f60a 100644
--- a/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
+++ b/fastaq/tests/data/tasks_test_sequence_trim_2.trimmed.fa
@@ -1,8 +1,12 @@
->3/2
-ACGCTGCAGTCAGTCAGTAT
+>1/2
+1234567890
 >4/2
-CGATCGATCG
+1234567890
 >5/2
-CGATCGATCG
+1234567890
 >6/2
-CGATCGATCG
+1234567890
+>9/2
+1234567890
+>12/2
+1234567890
diff --git a/fastaq/tests/data/tasks_test_sequences_to_trim.fa b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
index 395eaaa..cd2aa28 100644
--- a/fastaq/tests/data/tasks_test_sequences_to_trim.fa
+++ b/fastaq/tests/data/tasks_test_sequences_to_trim.fa
@@ -1,8 +1,4 @@
 >1
-TRIM1
+AACG
 >2
-TRIM2
->3
-TRIM3
->4
-TRIM4
+GCCT
diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py
index 36ebfba..7528815 100644
--- a/fastaq/tests/tasks_test.py
+++ b/fastaq/tests/tasks_test.py
@@ -291,7 +291,7 @@ class TestSequenceTrim(unittest.TestCase):
         to_trim = os.path.join(data_dir, 'tasks_test_sequences_to_trim.fa')
         expected1 = os.path.join(data_dir, 'tasks_test_sequence_trim_1.trimmed.fa')
         expected2 = os.path.join(data_dir, 'tasks_test_sequence_trim_2.trimmed.fa')
-        tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10)
+        tasks.sequence_trim(in1, in2, tmp1, tmp2, to_trim, min_length=10, check_revcomp=True)
         self.assertTrue(filecmp.cmp(expected1, tmp1))
         self.assertTrue(filecmp.cmp(expected2, tmp2))
         os.unlink(tmp1)
@@ -478,15 +478,6 @@ class TestStripIlluminaSuffix(unittest.TestCase):
         os.unlink(tmpfile)
 
 
-class TestToQuasrPrimers(unittest.TestCase):
-    def test_to_quasr_primers(self):
-        '''Check that fasta file gets converted to QUASR sequence file'''
-        tmpfile = 'tmp.primers'
-        tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile))
-        os.unlink(tmpfile)
-
-
 class TestToFasta(unittest.TestCase):
     def test_to_fasta(self):
         '''Test to_fasta'''
diff --git a/scripts/fastaq_sequence_trim b/scripts/fastaq_sequence_trim
index 50a4f34..7021c6c 100755
--- a/scripts/fastaq_sequence_trim
+++ b/scripts/fastaq_sequence_trim
@@ -7,6 +7,7 @@ parser = argparse.ArgumentParser(
     description = 'Trims sequences off the start of all sequences in a pair of fasta/q files, whenever there is a perfect match. Only keeps a read pair if both reads of the pair are at least a minimum length after any trimming',
     usage = '%(prog)s [options] <fasta/q 1 in> <fastaq/2 in> <out 1> <out 2> <trim_seqs>')
 parser.add_argument('--min_length', type=int, help='Minimum length of output sequences [%(default)s]', default=50, metavar='INT')
+parser.add_argument('--revcomp', action='store_true', help='Trim the end of each sequence if it matches the reverse complement. This option is intended for PCR primer trimming') 
 parser.add_argument('infile_1', help='Name of forward fasta/q file to be trimmed', metavar='fasta/q 1 in')
 parser.add_argument('infile_2', help='Name of reverse fasta/q file to be trimmed', metavar='fasta/q 2 in')
 parser.add_argument('outfile_1', help='Name of output forward fasta/q file', metavar='out_1')
@@ -19,5 +20,6 @@ tasks.sequence_trim(
     options.outfile_1,
     options.outfile_2,
     options.trim_seqs,
-    min_length=options.min_length
+    min_length=options.min_length,
+    check_revcomp=options.revcomp
 )
diff --git a/scripts/fastaq_to_quasr_primers_file b/scripts/fastaq_to_quasr_primers_file
deleted file mode 100755
index 8e5bf7c..0000000
--- a/scripts/fastaq_to_quasr_primers_file
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-from fastaq import tasks
-
-parser = argparse.ArgumentParser(
-    description = 'Converts a fasta/q file to QUASR primers format: just the sequence on each line and its reverse complement, tab separated',
-    usage = '%(prog)s <fasta/q in> <outfile>')
-parser.add_argument('infile', help='Name of input fasta/q file')
-parser.add_argument('outfile', help='Name of output file')
-options = parser.parse_args()
-tasks.to_quasr_primers(options.infile, options.outfile)
diff --git a/setup.py b/setup.py
index 3064862..5506ba9 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ def read(fname):
 
 setup(
     name='Fastaq',
-    version='1.5.0',
+    version='1.6.0',
     description='Scripts to manipulate FASTA and FASTQ files, plus API for developers',
     long_description=read('README.md'),
     packages = find_packages(),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list