[med-svn] r23666 - in trunk/packages/metastudent/trunk/debian: . patches

Tatiana Malygina latticetower-guest at moszumanska.debian.org
Sun Feb 5 11:21:34 UTC 2017


Author: latticetower-guest
Date: 2017-02-05 11:21:33 +0000 (Sun, 05 Feb 2017)
New Revision: 23666

Added:
   trunk/packages/metastudent/trunk/debian/patches/
   trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch
   trunk/packages/metastudent/trunk/debian/patches/series
Modified:
   trunk/packages/metastudent/trunk/debian/changelog
Log:
add patch to parse blast+ output correctly (closes: #848903)

Modified: trunk/packages/metastudent/trunk/debian/changelog
===================================================================
--- trunk/packages/metastudent/trunk/debian/changelog	2017-02-05 07:36:49 UTC (rev 23665)
+++ trunk/packages/metastudent/trunk/debian/changelog	2017-02-05 11:21:33 UTC (rev 23666)
@@ -5,6 +5,7 @@
   * add details on package testsuite failure to REAMDE.Debian,
     add to README.test how to test manually (currently metastudent supports
     legacy blast only and fails on blast+ output parsing).
+  * add patch to parse blast+ output correctly (closes: #848903).
 
  -- Tatiana Malygina <merlettaia at gmail.com>  Sat, 16 Jul 2016 09:24:57 +0300
 

Added: trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch
===================================================================
--- trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch	                        (rev 0)
+++ trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch	2017-02-05 11:21:33 UTC (rev 23666)
@@ -0,0 +1,229 @@
+Author: Tatiana Malygina <merlettaia at gmail.com>
+Last-Update: 2017-02-05
+Description: metastudent supports only legacy version of BLAST. 
+ This patch makes it work with blast+, which is now the main tool associated
+ with blast2 package. 
+ Metastudent's temporary files were observed and compared for blast+ and legacy
+ blast - since different stages of metastudent pipeline parse blast output 
+ separately, they all could produce incorrect output. This patch fixes parsing
+ in different stages of metastudent.
+Bug-Debian: https://bugs.debian.org/848903
+
+--- a/metastudentPkg/lib/groupA/GOSSIP/src/GOSSIPSTarter.java
++++ b/metastudentPkg/lib/groupA/GOSSIP/src/GOSSIPSTarter.java
+@@ -62,9 +62,23 @@
+ 		try 
+ 		{
+ 			scanner = new Scanner(new FileInputStream(path));
++			boolean usePSIBlast = false;
++			boolean prefixSkipped = false;
+ 			while (scanner.hasNextLine())
+ 			{
+ 				String currLine = scanner.nextLine() + NL;
++				if (currLine.startsWith("PSIBLAST"))
++					usePSIBlast = true;
++				if (usePSIBlast && currLine.startsWith("Results from round 1"))
++				{
++					if (prefixSkipped &&
++						text.toString().replaceAll("\\s", "").length() > 0)
++					{
++						indivdResults.add(text.toString());
++						text = new StringBuilder();
++					}
++					prefixSkipped = true;
++				}
+ 				if(currLine.startsWith("BLASTP") && text.toString().replaceAll("\\s","").length() > 0)
+ 				{
+ //					System.out.println(text.toString());
+@@ -75,7 +89,8 @@
+ 				}
+ 				text.append(currLine);
+ 			}
+-			indivdResults.add(text.toString());
++			if (!(usePSIBlast) || prefixSkipped)
++				indivdResults.add(text.toString());
+ 		} 
+ 		catch (FileNotFoundException e) 
+ 		{
+--- a/metastudentPkg/lib/groupA/GOSSIP/src/Predictor.java
++++ b/metastudentPkg/lib/groupA/GOSSIP/src/Predictor.java
+@@ -128,7 +128,7 @@
+ 		String actGo = "";
+ 		for(int i = 0; i < lines.length; i++)
+ 		{	
+-			if(   (Constants.NUM_BLAST_ITERATIONS==1 && lines[i].startsWith("Searching")) || (Constants.NUM_BLAST_ITERATIONS>1 && lines[i].startsWith("Results from round "))   )
++			if(   (Constants.NUM_BLAST_ITERATIONS==1 && (lines[i].startsWith("Searching") || lines[i].replaceAll("\\s", "").startsWith("Query="))) || (lines[i].startsWith("Results from round "))   )
+ 			{	
+ 				selector.gos.clear();
+ 				selector.length.clear();
+@@ -156,7 +156,7 @@
+ 						go = false;
+ 						actGo =  actGo.replaceAll("\\s+", "");
+ 						selector.gos.add(actGo);
+-						selector.length.add(Integer.parseInt(lines[i].split(" = ")[1]));
++						selector.length.add(Integer.parseInt(lines[i].replaceAll("\\s", "").split("=")[1]));
+ 						i++;
+ 						//System.out.println(line);
+ 						i++;
+@@ -187,4 +187,3 @@
+ 		
+ 	}
+ }
+-
+
+
+--- a/metastudentPkg/lib/groupB/java/BlastFileParser.java
++++ b/metastudentPkg/lib/groupB/java/BlastFileParser.java
+@@ -36,7 +36,18 @@
+ 		File file = new File(dir);
+ 		result_per_blast = new ArrayList<String>();
+ 		BufferedReader in = new BufferedReader(new FileReader(file));
++		boolean prefixSkipped = false;
+ 		while((line = in.readLine()) != null) {
++			if (line.startsWith("Results from round 1")) {
++				if (prefixSkipped) {
++						if (!result_per_blast.isEmpty()) {
++							parse(result_per_blast);
++						}
++				}
++				result_per_blast.clear();
++				result_per_blast.add(line);
++				prefixSkipped = true;
++			}
+ 			if(Pattern.matches("BLASTP.*",line)) {
+ 				if(!result_per_blast.isEmpty()) {
+ 					parse(result_per_blast);
+@@ -60,12 +71,12 @@
+ 		String target = "";
+ 		int round = 0;
+ 		ArrayList<TabulatorFormat> result = new ArrayList<TabulatorFormat>();
+-
+ 		for(int i = 0; i < result_per_blast.size();i++) {
+ 			line = result_per_blast.get(i);
+ 			result.add(new TabulatorFormat());
+ 			if(Pattern.matches("Results\\sfrom\\sround\\s\\d+",line)) {
+-				moreThanOneRound = true;
++				if (Pattern.matches("Results\\sfrom\\sround\\s2", line))
++					moreThanOneRound = true;
+ 				result.add(new TabulatorFormat());
+ 				round++;
+ 				result.get(round).addRound(round);
+@@ -75,15 +86,27 @@
+ 				matcher = pattern.matcher(line);
+ 				matcher.find();
+ 				target = matcher.group(1);
++				i++;
++				line = result_per_blast.get(i);
++				while (Pattern.matches("\\S+", line) &&
++						line.length() > 0 &&
++						!line.replaceAll("\\s", "").startsWith("Length") &&
++						!line.startsWith(">") &&
++						i < result_per_blast.size() - 1) {
++					target += line;
++					i++;
++					line = result_per_blast.get(i);
++				}
++				System.out.println(target);
+ 			}
+-			if(Pattern.matches(">\\S*GO\\S+\\s*",line)) {
++			if(Pattern.matches(">\\s*\\S*GO\\S+\\s*",line)) {
+ 				QueryResult queryResult = new QueryResult();
+-				if(Pattern.matches(">GO.*", line)) {
+-					pattern = Pattern.compile(">(GO\\S+)\\s*");
++				if(Pattern.matches(">\\s*GO.*", line)) {
++					pattern = Pattern.compile(">\\s*(GO\\S+)\\s*");
+ 					matcher = pattern.matcher(line);
+ 					matcher.find();
+ 					goTerms = matcher.group(1);
+-					while(!Pattern.matches("\\s*Length\\s=\\s\\d+.*", line) && i < result_per_blast.size()-1) {
++					while(!Pattern.matches("\\s*Length\\s*=\\s*\\d+.*", line) && i < result_per_blast.size()-1) {
+ 						if(Pattern.matches("\\s*GO\\S*\\s*", line)) {
+ 							pattern = Pattern.compile("\\s*(GO\\S+)\\s*");
+ 							matcher = pattern.matcher(line);
+@@ -94,13 +117,13 @@
+ 						line = result_per_blast.get(i);
+ 					}
+ 					queryResult.setName(goTerms);
+-				} else if (Pattern.matches(">\\S+\\|\\S+.*",line)) {
+-					pattern = Pattern.compile(">(\\S+)\\|(\\S+)\\s*");
++				} else if (Pattern.matches(">\\s*\\S+\\|\\S+.*",line)) {
++					pattern = Pattern.compile(">\\s*(\\S+)\\|(\\S+)\\s*");
+ 					matcher = pattern.matcher(line);
+ 					matcher.find();
+ 					name = matcher.group(1);
+ 					goTerms = matcher.group(2);
+-					while(!Pattern.matches("\\s*Length\\s=\\s\\d+.*", line) && i < result_per_blast.size()-1) {
++					while(!Pattern.matches("\\s*Length\\s*=\\s*\\d+.*", line) && i < result_per_blast.size()-1) {
+ 						if(Pattern.matches("\\s*GO\\S*\\s*", line)) {
+ 							pattern = Pattern.compile("\\s*(GO\\S+)\\s*");
+ 							matcher = pattern.matcher(line);
+@@ -113,15 +136,15 @@
+ 					queryResult.setName(name);
+ 				}
+ 				queryResult.addGoTerms(goTerms);
+-				pattern = Pattern.compile("\\s+Length\\s*=\\s*(\\d+)\\s*");
++				pattern = Pattern.compile("\\s*Length\\s*=\\s*(\\d+)\\s*");
+ 				matcher = pattern.matcher(line);
+ 				matcher.find();
+ 				queryResult.addLength("" + matcher.group(1));
+-				while(!Pattern.matches("\\s*Score\\s=\\s*\\S+\\sbits\\s\\(\\d+\\),\\sExpect\\s=\\s\\S+.*", line) && i < result_per_blast.size()-1) {
++				while(!Pattern.matches("\\s*Score\\s=\\s*\\S+\\sbits\\s\\(\\d+\\),\\s*Expect\\s=\\s\\S+.*", line) && i < result_per_blast.size()-1) {
+ 					i++;
+ 					line = result_per_blast.get(i);
+ 				}
+-				pattern = Pattern.compile("\\s*Score\\s=\\s*(\\S+)\\sbits\\s\\((\\d+)\\),\\sExpect\\s=\\s(\\S+),.*");
++				pattern = Pattern.compile("\\s*Score\\s=\\s*(\\S+)\\sbits\\s\\((\\d+)\\),\\s*Expect\\s=\\s(\\S+),.*");
+ 				matcher = pattern.matcher(line);
+ 				matcher.find();
+ 				queryResult.addScore(Double.parseDouble(matcher.group(1)));
+@@ -185,4 +208,3 @@
+ 	}
+ 
+ }
+-
+
+
+--- a/metastudentPkg/lib/groupC/exercise3.pl
++++ b/metastudentPkg/lib/groupC/exercise3.pl
+@@ -58,11 +58,25 @@
+ #		}
+ 	}
+ 
+-    if(!/^>.*?:(.+)$|^\s+(Score)|(^Searching)|^Query=\s(.*)|^\s(Identities)|^(Database)/)
++    if(!/^>.*?:(.+)$|^\s+(Score)|(^Searching|Results\sfrom\sround\s1)|^Query=\s(.*)|^\s(Identities)|^(Database)/)
+     {
+    		if($parseId)
+ 		{
+-			$target .= $_;
++      if (!/\S+/) {
++        $target =~ s:\r?\n::g;
++      	$target =~ s:\s*::g;
++        if ($target ne "") {
++      	push (@out,"Target: $target\n");
++          $targetid=$target;
++  		$targets{$targetid}="no prediction";
++        }
++          undef $target;
++        $true=1;
++      	$parseId = 0;
++      } else {
++        $target .= $_;
++      }
++			#$target .= $_;
+ 		}
+ 		next;
+     }
+@@ -124,10 +138,12 @@
+     	$target.= $_;
+     	$target =~ s:\r?\n::g;
+     	$target =~ s:\s*::g;
+-    	$target =~ s/\(.*letters\)Database:.*//g;
++    	$target =~ s/(\(.*letters\))?Database:.*//g;
++      if ($target != "") {
+     	push (@out,"Target: $target\n");
+         $targetid=$target;
+ 		$targets{$targetid}="no prediction";
++      }
+         $true=0;
+         undef $target;
+ 

Added: trunk/packages/metastudent/trunk/debian/patches/series
===================================================================
--- trunk/packages/metastudent/trunk/debian/patches/series	                        (rev 0)
+++ trunk/packages/metastudent/trunk/debian/patches/series	2017-02-05 11:21:33 UTC (rev 23666)
@@ -0,0 +1 @@
+01_fix_blastp.patch




More information about the debian-med-commit mailing list