[med-svn] [hinge] 02/12: New upstream version 0.5

Afif Elghraoui afif at moszumanska.debian.org
Fri Oct 20 03:34:36 UTC 2017


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository hinge.

commit e288b49d26c4fac11409e30769a77c4ec2875153
Author: Afif Elghraoui <afif at debian.org>
Date:   Tue Oct 17 23:01:54 2017 -0400

    New upstream version 0.5
---
 demo/NCTC9657_demo/run.sh                |   4 ++
 demo/ecoli_P4_demo/run.sh                |   5 +-
 demo/ecoli_demo/run.sh                   |   4 +-
 demo/ecoli_nanopore/run.sh               |   4 +-
 scripts/draw_pileup_region.py            |  21 ++++--
 scripts/pruning_and_clipping_nanopore.py |   0
 src/consensus/consensus.cpp              |  22 ++++--
 src/consensus/draft.cpp                  | 113 +++++++++++++++++++++++--------
 src/filter/filter.cpp                    | 113 +++++++++++++++++++++++--------
 src/include/LAInterface.h                |   3 +
 src/layout/hinging.cpp                   | 107 +++++++++++++++++++++++------
 src/maximal/maximal.cpp                  | 107 ++++++++++++++++++++---------
 utils/update.sh                          |   2 +-
 13 files changed, 374 insertions(+), 131 deletions(-)

diff --git a/demo/NCTC9657_demo/run.sh b/demo/NCTC9657_demo/run.sh
index 9bdbf72..6f7dda0 100644
--- a/demo/NCTC9657_demo/run.sh
+++ b/demo/NCTC9657_demo/run.sh
@@ -14,8 +14,12 @@ mkdir log
 
 
 hinge filter --db NCTC9657 --las NCTC9657 --mlas -x NCTC9657 --config ../../utils/nominal.ini
+
+hinge maximal --db NCTC9657 --las NCTC9657 --mlas -x NCTC9657 --config ../../utils/nominal.ini
+
 hinge layout --db NCTC9657 --las NCTC9657.las -x NCTC9657 --config ../../utils/nominal.ini -o NCTC9657
 
+
 hinge clip NCTC9657.edges.hinges NCTC9657.hinge.list demo
 
 hinge draft-path $PWD NCTC9657 NCTC9657demo.G2.graphml
diff --git a/demo/ecoli_P4_demo/run.sh b/demo/ecoli_P4_demo/run.sh
index 5b9e9b3..40f0ec0 100644
--- a/demo/ecoli_P4_demo/run.sh
+++ b/demo/ecoli_P4_demo/run.sh
@@ -17,7 +17,10 @@ mkdir log
 
 
 
-hinge filter --db ecoli --las "ecoli.*.las" -x ecoli --config ../../utils/nominal.ini
+hinge filter --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
+
+hinge maximal --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
+
 hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
 
 hinge clip ecoli.edges.hinges ecoli.hinge.list demo
diff --git a/demo/ecoli_demo/run.sh b/demo/ecoli_demo/run.sh
index 9640f26..30e9deb 100644
--- a/demo/ecoli_demo/run.sh
+++ b/demo/ecoli_demo/run.sh
@@ -22,12 +22,12 @@ hinge filter --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal
 
 hinge maximal --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
 
-hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
+hinge layout --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini -o ecoli
 
 hinge clip ecoli.edges.hinges ecoli.hinge.list demo
 
 hinge draft-path $PWD ecoli ecolidemo.G2.graphml
-hinge draft --db ecoli --las ecoli.las --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
+hinge draft --db ecoli --las ecoli --mlas --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
 
 
 
diff --git a/demo/ecoli_nanopore/run.sh b/demo/ecoli_nanopore/run.sh
index 9953ae4..db66ea0 100644
--- a/demo/ecoli_nanopore/run.sh
+++ b/demo/ecoli_nanopore/run.sh
@@ -17,8 +17,10 @@ DASqv -c100 ecoli ecoli.las
 mkdir log
 
 
-
 hinge filter --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
+
+hinge maximal --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
+
 hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
 
 hinge clip-nanopore ecoli.edges.hinges ecoli.hinge.list demo
diff --git a/scripts/draw_pileup_region.py b/scripts/draw_pileup_region.py
index 31dfd11..5e7d75b 100755
--- a/scripts/draw_pileup_region.py
+++ b/scripts/draw_pileup_region.py
@@ -43,13 +43,13 @@ for i,item in enumerate(util.get_alignments_mapping3(ref, read, las, contig)):
 
     if item[3] >= left and item[4] <= right and item[4] - item[3] > length_th:
         aln.append(item)
-        
-        
-        
+
+
+
 covy = np.zeros((right - left, ))
 for item in aln:
     covy[item[3] - left : item[4] - left] += 1
-    
+
 covx = np.arange(left, right)
 
 
@@ -69,7 +69,8 @@ for item in aln:
     else:
         aln_group.append(item)
 
-num = len(alns)
+#num = len(alns)
+num = len(aln)
 
 print len(aln), len(alns)
 
@@ -102,7 +103,15 @@ ax1.add_line(dotted_line)
 dotted_line2 = plt.Line2D((right, right), (0, num*grid_size ),ls='-.')
 ax1.add_line(dotted_line2)
 
-for i,aln_group in enumerate(alns):
+alns_all = []
+for item in alns:
+    for aln in item:
+        alns_all.append([aln])
+
+alns_all.sort(key = lambda x:min([item[3] for item in x]))
+
+
+for i,aln_group in enumerate(alns_all):
     for item in aln_group:
         abpos = item[3]
         aepos = item[4]
diff --git a/scripts/pruning_and_clipping_nanopore.py b/scripts/pruning_and_clipping_nanopore.py
old mode 100644
new mode 100755
diff --git a/src/consensus/consensus.cpp b/src/consensus/consensus.cpp
index 578d855..b6df26f 100644
--- a/src/consensus/consensus.cpp
+++ b/src/consensus/consensus.cpp
@@ -59,6 +59,20 @@ char toLower(char c) {
 
 }
 
+int remove_multialign(std::vector<LAlignment *> idx, int idx_size, int LENGTH_THRESHOLD)
+{
+    int i, j, r=0; 
+     
+    for(i = 0; i < idx_size; i ++) {
+        if (idx[i]->aepos - idx[i]->abpos >= LENGTH_THRESHOLD) {
+            for(j = 0; j < r; j ++)
+                if(idx[j]->read_B_id_ == idx[i]->read_B_id_) break;
+            if(j == r) 
+                idx[r++] = idx[i];
+        }
+    }     
+    return r;
+}
 
 int main(int argc, char *argv[]) {
 
@@ -135,13 +149,7 @@ int main(int argc, char *argv[]) {
     for (int i = 0; i < n_contigs; i++) {
 
 
-
-        int k = 0;
-        for (k = 0; k < idx[i].size(); k++)
-            if (idx[i][k]->aepos - idx[i][k]->abpos < LENGTH_THRESHOLD)
-                break;
-
-        int seq_count = k;
+        int seq_count = remove_multialign(idx[i], idx[i].size(),LENGTH_THRESHOLD);
 
         std::cout << "Contig " << i << ": " << seq_count << " reads" << std::endl;
 
diff --git a/src/consensus/draft.cpp b/src/consensus/draft.cpp
index 702a7a7..6da5344 100644
--- a/src/consensus/draft.cpp
+++ b/src/consensus/draft.cpp
@@ -40,6 +40,34 @@ typedef adjacency_list <vecS, vecS, undirectedS> Graph;
 typedef std::tuple<Node, Node, int> Edge_w;
 
 
+inline std::vector<std::string> glob(const std::string& pat){
+    using namespace std;
+    glob_t glob_result;
+    int i = 1;
+    std::string search_name;
+    search_name = pat + "."+std::to_string(i)+".las";
+    std::cout << search_name << endl;
+    glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+
+    vector<string> ret;
+
+    while (glob_result.gl_pathc != 0){
+        ret.push_back(string(glob_result.gl_pathv[0]));
+        i ++;
+        search_name = pat + "."+std::to_string(i)+".las";
+        glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//        std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
+    }
+
+    std::cout << "-------------------------"<< std::endl;
+    std::cout << "Number of files " << i-1 << std::endl;
+    std::cout << "Input string " << pat.c_str() << std::endl;
+    std::cout << "-------------------------"<< std::endl;
+
+    globfree(&glob_result);
+    return ret;
+}
+
 std::vector<int> get_mapping(std::string aln_tag1, std::string aln_tag2) {
     int pos = 0;
     int count = 0;
@@ -686,18 +714,6 @@ int draft_assembly_ctg(std::vector<Edge_w> & edgelist, LAInterface & la, std::ve
 
 
 
-inline std::vector<std::string> glob(const std::string& pat){
-    using namespace std;
-    glob_t glob_result;
-    glob(pat.c_str(),GLOB_TILDE,NULL,&glob_result);
-    vector<string> ret;
-    for(unsigned int i=0;i<glob_result.gl_pathc;++i){
-        ret.push_back(string(glob_result.gl_pathv[i]));
-    }
-    globfree(&glob_result);
-    return ret;
-};
-
 
 int main(int argc, char *argv[]) {
 
@@ -712,6 +728,7 @@ int main(int argc, char *argv[]) {
     cmdp.add<std::string>("log", 'g', "log folder name", false, "log");
     cmdp.add<std::string>("path", 0, "path file name", false, "path");
     cmdp.add("debug", '\0', "debug mode");
+    cmdp.add("mlas", '\0', "multiple las files");
 
 //    cmdp.add<std::string>("restrictreads",'r',"restrict to reads in the file",false,"");
 
@@ -786,22 +803,24 @@ int main(int argc, char *argv[]) {
     std::vector<std::string> name_las_list;
     std::string name_las_str(name_las);
 
-    if (name_las_str.find('*') != -1)
+    if (cmdp.exist("mlas")) {
         name_las_list = glob(name_las_str);
+        console->info("calling glob.");
+    }
     else
         name_las_list.push_back(name_las_str);
 
 
-    if (strlen(name_las) > 0)
-        la.openAlignmentFile(name_las);
+    //if (strlen(name_las) > 0)
+    //    la.openAlignmentFile(name_las);
 
     int64 n_aln = 0;
 
-    if (strlen(name_las) > 0) {
-        n_aln = la.getAlignmentNumber();
-        console->info("Load alignments from {}", name_las);
-        console->info("# Alignments: {}", n_aln);
-    }
+    //if (strlen(name_las) > 0) {
+    //    n_aln = la.getAlignmentNumber();
+    //    console->info("Load alignments from {}", name_las);
+    //    console->info("# Alignments: {}", n_aln);
+    //}
 
     int n_read;
     if (strlen(name_db) > 0) {
@@ -816,8 +835,6 @@ int main(int argc, char *argv[]) {
 
     console->info("# Reads: {}", n_read); // output some statistics
 
-
-
     if (strlen(name_db) > 0) {
         la.getRead(reads, 0, n_read);
     }
@@ -841,26 +858,65 @@ int main(int argc, char *argv[]) {
         reads[i]->active = maximal_read[i];
     }
 
+    // start loading and cleaning
+    int number_of_parts;
+    number_of_parts = name_las_list.size();
 
     std::vector<int> range;
 
     for (int i = 0; i < n_read; i++) {
         if (reads[i]->active) range.push_back(i);
     }
-
     std::sort(range.begin(), range.end());
 
     std::vector<LOverlap *> aln;//Vector of pointers to all alignments
     std::vector<LAlignment *> full_aln;//Vector of pointers to all alignments
-
-
-    if (strlen(name_las) > 0) {
+    std::vector<LOverlap *> aln_to_remove;//Vector of pointers to all alignments
+    std::vector<LAlignment *> full_aln_to_remove;//Vector of pointers to all alignments
+
+
+    for (int part = 0; part < number_of_parts; part++) {
+        console->info("part:{}", part);
+        console->info("name of las {}", name_las_list[part]);
+        la.openAlignmentFile(name_las_list[part]);
+        int64 n_aln_part = 0;
+        n_aln_part = la.getAlignmentNumber();
+        n_aln += n_aln_part;
+        console->info("Load alignment from {}", name_las_list[part]);
+        console->info("# Alignments: {}", n_aln_part);
         la.resetAlignment();
-        la.getOverlap(aln, range);
+        la.getOverlap(aln_to_remove, range);
         la.resetAlignment();
-		la.getAlignment(full_aln, range);
+        la.getAlignment(full_aln_to_remove, range);
+
+        for (int j = 0; j < aln_to_remove.size(); j++) {
+            if ((reads[aln_to_remove[j]->read_A_id_]->active) && (reads[aln_to_remove[j]->read_B_id_]->active)) {
+                aln.push_back(aln_to_remove[j]);
+            }
+            else delete aln_to_remove[j];
+        }
+
+        for (int j = 0; j < full_aln_to_remove.size(); j++) {
+            if ((reads[full_aln_to_remove[j]->read_A_id_]->active) && (reads[full_aln_to_remove[j]->read_B_id_]->active)) {
+                full_aln.push_back(full_aln_to_remove[j]);
+            }
+            else delete full_aln_to_remove[j];
+        }
+
+        //need to do some cleaning here
+        aln_to_remove.clear();
+        full_aln_to_remove.clear();
+
+
     }
 
+    //if (strlen(name_las) > 0) {
+    //    la.resetAlignment();
+    //    la.getOverlap(aln, range);
+    //    la.resetAlignment();
+	//	la.getAlignment(full_aln, range);
+    //}
+
     if (strlen(name_paf) > 0) {
         n_aln = la.loadPAF(std::string(name_paf), aln);
         console->info("Load alignments from {}", name_paf);
@@ -872,7 +928,6 @@ int main(int argc, char *argv[]) {
         return 1;
     }
 
-
     console->info("Input data finished");
 
     INIReader reader(name_config);
diff --git a/src/filter/filter.cpp b/src/filter/filter.cpp
index eecc1e7..f3981a5 100644
--- a/src/filter/filter.cpp
+++ b/src/filter/filter.cpp
@@ -180,6 +180,8 @@ int main(int argc, char *argv[]) {
     cmdp.add("debug", '\0', "debug mode");
     cmdp.parse_check(argc, argv);
 
+
+
     LAInterface la;
     const char * name_db = cmdp.get<std::string>("db").c_str(); //.db file of reads to load
     const char * name_las_base = cmdp.get<std::string>("las").c_str();//.las file of alignments
@@ -190,10 +192,46 @@ int main(int argc, char *argv[]) {
     bool has_qv = true;
     const char * name_restrict = cmdp.get<std::string>("restrictreads").c_str();
 
+    namespace spd = spdlog;
+
+    //auto console = spd::stdout_logger_mt("console",true);
+
+    std::vector<spdlog::sink_ptr> sinks;
+    sinks.push_back(std::make_shared<spdlog::sinks::stdout_sink_st>());
+    sinks.push_back(std::make_shared<spdlog::sinks::daily_file_sink_st>(cmdp.get<std::string>("log") + "/log", "txt", 23, 59));
+    auto console = std::make_shared<spdlog::logger>("log", begin(sinks), end(sinks));
+    spdlog::register_logger(console);
+    //auto console = std::make_shared<spdlog::logger>("name", begin(sinks), end(sinks));
+
+
+    console->info("Reads filtering");
+
+
+    bool db_and_las, db_or_las, fa_and_paf, fa_or_paf;
+    db_and_las = (strlen(name_db) > 0) and (strlen(name_las_base) > 0);
+    db_or_las = (strlen(name_db) > 0) or (strlen(name_las_base) > 0);
+    fa_and_paf = (strlen(name_fasta) > 0) and (strlen(name_paf) > 0);
+    fa_or_paf = (strlen(name_fasta) > 0) or (strlen(name_paf) > 0);
+
+    if (db_or_las and fa_or_paf){
+        console->error("Pass in either a db and a las or a fasta and a paf");
+        return 1;
+    }
+
+    if (( not fa_and_paf) and (not db_and_las)){
+        console->error("Pass in at least one of the following two combinations: a db and a las or a fasta and a paf");
+        return 1;
+    }
+
     std::string name_las_string;
-    if (cmdp.exist("mlas"))
-        name_las_string =  std::string(name_las_base);
-    else {
+    if (cmdp.exist("mlas")) {
+        if (not db_and_las){
+            console->error("--mlas works only with db and las");
+            return 1;
+        }
+        name_las_string = std::string(name_las_base);
+    }
+    else if (strlen(name_las_base) > 0) {
         if (lastN(std::string(name_las_base), 4) == ".las")
             name_las_string = std::string(name_las_base);
         else
@@ -208,25 +246,15 @@ int main(int argc, char *argv[]) {
      * the other is fasta + paf, which corresponds to minimap as an overlapper.
      */
 
-    namespace spd = spdlog;
 
-    //auto console = spd::stdout_logger_mt("console",true);
-
-    std::vector<spdlog::sink_ptr> sinks;
-    sinks.push_back(std::make_shared<spdlog::sinks::stdout_sink_st>());
-    sinks.push_back(std::make_shared<spdlog::sinks::daily_file_sink_st>(cmdp.get<std::string>("log") + "/log", "txt", 23, 59));
-    auto console = std::make_shared<spdlog::logger>("log", begin(sinks), end(sinks));
-    spdlog::register_logger(console);
-    //auto console = std::make_shared<spdlog::logger>("name", begin(sinks), end(sinks));
-
-
-    console->info("Reads filtering");
 
+//    std::cout << "here now " <<  std::endl;
 
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
     console->info("name of fasta: {}, name of .paf file {}", name_fasta, name_paf);
 
 
+
     std::ifstream ini_file(name_config);
     std::string str((std::istreambuf_iterator<char>(ini_file)),
                     std::istreambuf_iterator<char>());
@@ -426,14 +454,31 @@ int main(int argc, char *argv[]) {
     std::ofstream covflag(out + ".cov.flag");
     std::ofstream selfflag(out + ".self.flag");
 
-    for (int part = 0; part < name_las_list.size(); part++) {
+//    std::cout << "LAS list length "<<  name_las_list.size() << std::endl;
+
+//    for (int ind = 0; ind < name_las_list.size() ; ind ++)
+//        std::cout << "name of las: "<< name_las_list[ind] << std::endl;
 
+    int number_of_parts;
+    if (strlen(name_las) > 0)
+        number_of_parts = name_las_list.size();
+    else if(strlen(name_paf) > 0)
+        number_of_parts = 1;
+    else {
+        console->error("Need to provide either las and db or paf and fasta");
+        return 1;
+    }
 
-        console->info("name of las: {}", name_las_list[part]);
+    for (int part = 0; part < number_of_parts; part++)
+    {
+        console->info("part: {}", part);
 
 
-        if (strlen(name_las_list[part].c_str()) > 0)
-            la.openAlignmentFile(name_las_list[part]);
+        if (strlen(name_las) > 0) {
+            console->info("name of las: {}", name_las_list[part]);
+            if (strlen(name_las_list[part].c_str()) > 0)
+                la.openAlignmentFile(name_las_list[part]);
+        }
 
         int64 n_aln = 0;
 
@@ -460,7 +505,7 @@ int main(int argc, char *argv[]) {
             return 1;
         }
 
-        console->info("Input data finished, part {}/{}", part + 1, name_las_list.size());
+        console->info("Input data finished, part {}/{}", part + 1, number_of_parts);
 
 
         console->info("length of alignments {}", aln.size());
@@ -500,13 +545,15 @@ int main(int argc, char *argv[]) {
             }
         }
 
+
+
         std::set<int> self_match_reads;
         for (auto it : self_aln_list) {
             float cov = 0.0;
             for (int i = 0; i < it.second.size(); i++)
                 cov += it.second[i].second - it.second[i].first;
             cov /= float(reads[it.first]->len);
-            std::cout << "selfcov: " <<  it.first << " " << cov << " " << reads[it.first]->len << std::endl;
+//            std::cout << "selfcov: " <<  it.first << " " << cov << " " << reads[it.first]->len << std::endl;
             if ((cov > 4.5) and (reads[it.first]->len > 10000))
                 self_match_reads.insert(it.first);
         }
@@ -564,16 +611,21 @@ int main(int argc, char *argv[]) {
             cgs[i] = (cg);
         }
 
-        console->info("profile coverage done part {}/{}", part + 1, name_las_list.size());
+        console->info("profile coverage done part {}/{}", part + 1, number_of_parts);
 
 
         std::set<int> rand_reads;
         srand(time(NULL));
         rand_reads.insert(0);
+        int temp_index(0);
         while (rand_reads.size() < (r_end - r_begin)/500){
+            temp_index ++;
             int rd_id=rand()%(r_end - r_begin) + r_begin;
             if (reads[rd_id]->len > 5000)
                 rand_reads.insert(rd_id);
+
+            if (temp_index > 20000)
+                break;
         }
 
         int num_slot = 0;
@@ -775,7 +827,6 @@ int main(int argc, char *argv[]) {
         }
 
 
-
         // need a better hinge detection
 
         // get hinges from repeat annotation information
@@ -1016,6 +1067,8 @@ int main(int argc, char *argv[]) {
             }
         }
 
+        console->info("reached end of loop");
+
         //output hinges
 
         int ra_cnt = 0;
@@ -1042,22 +1095,22 @@ int main(int argc, char *argv[]) {
             hg << std::endl;
         }
 
-
         console->info("Number of hinges: {}", hg_cnt);
 
 
-
-        for (int i = 0; i < aln.size(); i++) {
-            free(aln[i]);
+        if (strlen(name_las) > 0) {
+            for (int i = 0; i < aln.size(); i++) {
+                delete aln[i];
+            }
+            aln.clear();
         }
-        aln.clear();
+        console->info("part: {}", part);
+//        console->info("going through: {}", part+1 < name_las_list.size());
     }
 
 
-
     hg.close();
 
-
     if (strlen(name_db)>0)
         la.closeDB(); //close database
     return 0;
diff --git a/src/include/LAInterface.h b/src/include/LAInterface.h
index 51b6884..4f1e845 100644
--- a/src/include/LAInterface.h
+++ b/src/include/LAInterface.h
@@ -76,6 +76,9 @@ public:
 class LOverlap { // LOverlap is a simplified version of LAlignment, no trace
 public:
     LOverlap() { };
+	
+	~LOverlap() {free(trace_pts); };
+	
 	void show() {printf("%d %d %d [%d...%d]/%d x [%d...%d]/%d %d diffs, %d type\n", read_A_id_, read_B_id_,
 						reverse_complement_match_,
 						read_A_match_start_, read_A_match_end_, alen, read_B_match_start_, read_B_match_end_, blen, diffs,
diff --git a/src/layout/hinging.cpp b/src/layout/hinging.cpp
index a85d18b..f4ab31a 100644
--- a/src/layout/hinging.cpp
+++ b/src/layout/hinging.cpp
@@ -344,7 +344,8 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
 
 void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<std::unordered_map<int, std::vector<LOverlap *> > > & idx_ab,
                     std::vector<std::vector<LOverlap *>> & matches_forward, std::vector<std::vector<LOverlap *>>& matches_backward,
-                    int n_read, const char *name_db, const char *name_las_base, bool mult_las,
+                    int n_read, const char *name_db, const char *name_las_base,
+                    const char *name_paf, bool mult_las,
                     int ALN_THRESHOLD, int THETA, int THETA2, bool USE_TWO_MATCHES, int64 n_aln_full,
                     const std::shared_ptr<spdlog::logger> console,
                     std::string name_maximal_reads, bool KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS ){
@@ -357,14 +358,18 @@ void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<st
     int64 n_rev_aln_kept_full(0);
     std::string name_las_string;
     console->info("Multiple las files: {}", mult_las);
+    if (strlen(name_paf) > 0)
+        console->info("Loading from paf: {}", name_paf);
 
-    if (mult_las)
-        name_las_string =  std::string(name_las_base);
-    else {
-        if (lastN(std::string(name_las_base), 4) == ".las")
+    if (strlen(name_las_base) > 0) {
+        if (mult_las)
             name_las_string = std::string(name_las_base);
-        else
-            name_las_string = std::string(name_las_base) + ".las";
+        else {
+            if (lastN(std::string(name_las_base), 4) == ".las")
+                name_las_string = std::string(name_las_base);
+            else
+                name_las_string = std::string(name_las_base) + ".las";
+        }
     }
 
     n_aln_full = 0;
@@ -374,12 +379,17 @@ void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<st
     std::string name_las_str(name_las);
     console->info("Las files: {}", name_las_str);
 
-    if (mult_las) {
+    if (mult_las and strlen(name_las_base) > 0) {
         console->info("Calling glob.");
         name_las_list = glob(name_las_str);
     }
-    else
+    else if (strlen(name_las_base) > 0)
         name_las_list.push_back(name_las_str);
+    else{
+        name_las_str = std::string(name_paf);
+        name_las_list.push_back(name_las_str);
+    }
+
 
     console->info("number of las files: {}", name_las_list.size());
 
@@ -399,31 +409,50 @@ void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<st
         reads[i]->active = (reads[i]->active) and (maximal_read[i]);
     }
 
+    int number_of_parts;
+    if (strlen(name_las) > 0)
+        number_of_parts = name_las_list.size();
+    else if(strlen(name_paf) > 0)
+        number_of_parts = 1;
+    else {
+        console->error("Need to provide either las and db or paf and fasta");
+    }
 
-    for (int part = 0; part < name_las_list.size(); part++) {
+    for (int part = 0; part < number_of_parts; part++) {
 
-        console->info("name of las: {}", name_las_list[part]);
+        if (strlen(name_las_base) > 0) {
+            console->info("name of las: {}", name_las_list[part]);
 
-        if (strlen(name_las_list[part].c_str()) > 0)
-            la.openAlignmentFile(name_las_list[part]);
+            if (strlen(name_las_list[part].c_str()) > 0)
+                la.openAlignmentFile(name_las_list[part]);
+        }
 
         int64 n_aln = 0;
         int64 n_aln_accept = 0;
         int64 n_aln_rcomp_accept = 0;
+        std::vector<LOverlap *> aln;//Vector of pointers to all alignments
+
+        if (strlen(name_las_base) > 0) {
+            if (strlen(name_las_list[part].c_str()) > 0) {
+                n_aln = la.getAlignmentNumber();
+                console->info("Load alignments from {}", name_las_list[part]);
+                console->info("# Alignments: {}", n_aln);
+            }
+            if (strlen(name_las_list[part].c_str()) > 0) {
+                la.resetAlignment();
+                la.getOverlap(aln, 0, n_read);
+            }
+        }
 
-        if (strlen(name_las_list[part].c_str()) > 0) {
-            n_aln = la.getAlignmentNumber();
-            console->info("Load alignments from {}", name_las_list[part]);
+        if (strlen(name_paf) > 0){
+            n_aln = la.loadPAF(std::string(name_paf), aln);
+            console->info("Load alignments from {}", name_paf);
             console->info("# Alignments: {}", n_aln);
         }
 
-        std::vector<LOverlap *> aln;//Vector of pointers to all alignments
 
 
-        if (strlen(name_las_list[part].c_str()) > 0) {
-            la.resetAlignment();
-            la.getOverlap(aln, 0, n_read);
-        }
+
 
         int r_begin = aln.front()->read_A_id_;
         int r_end = aln.back()->read_A_id_;
@@ -466,6 +495,14 @@ void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<st
             n_rev_overlaps += aln[i]->reverse_complement_match_;
         }
 
+
+        for (int i = 0; i < aln.size(); i++) {
+            if ( not ((reads[aln[i]->read_A_id_]->active) and
+                ((reads[aln[i]->read_B_id_]->active) and KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS)))
+                if (strlen(name_las_base) > 0)
+                    delete aln[i];
+        }
+
         console->info("kept {}/{} overlaps,  {}/{} rev_overlaps in part {}/{}",n_aln_accept,
                       n_overlaps, n_aln_rcomp_accept,
                       n_rev_overlaps,
@@ -646,6 +683,7 @@ int main(int argc, char *argv[]) {
 
     console->info("Hinging layout");
 
+
     bool mult_las;
     mult_las = cmdp.exist("mlas");
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
@@ -655,6 +693,30 @@ int main(int argc, char *argv[]) {
     console->info("Multiple las files: {}", mult_las);
     console->info("Multiple las files: {}", cmdp.exist("mlas"));
 
+    bool db_and_las, db_or_las, fa_and_paf, fa_or_paf;
+    db_and_las = (strlen(name_db) > 0) and (strlen(name_las) > 0);
+    db_or_las = (strlen(name_db) > 0) or (strlen(name_las) > 0);
+    fa_and_paf = (strlen(name_fasta) > 0) and (strlen(name_paf) > 0);
+    fa_or_paf = (strlen(name_fasta) > 0) or (strlen(name_paf) > 0);
+
+    if (db_or_las and fa_or_paf){
+        console->error("Pass in either a db and a las or a fasta and a paf");
+        return 1;
+    }
+
+    if (( not fa_and_paf) and (not db_and_las)){
+        console->error("Pass in at least one of the following two combinations: a db and a las or a fasta and a paf");
+        return 1;
+    }
+
+    if (cmdp.exist("mlas")) {
+        if (not db_and_las) {
+            console->error("--mlas works only with db and las");
+            return 1;
+        }
+    }
+
+
     std::ifstream ini_file(name_config);
     std::string str((std::istreambuf_iterator<char>(ini_file)),
                     std::istreambuf_iterator<char>());
@@ -919,7 +981,8 @@ int main(int argc, char *argv[]) {
 
 
     GetAlignment ( la, reads,  idx_ab, matches_forward, matches_backward,
-            n_read,  name_db, name_las, mult_las, ALN_THRESHOLD,  THETA,  THETA2,
+            n_read,  name_db, name_las, name_paf,
+                   mult_las, ALN_THRESHOLD,  THETA,  THETA2,
                    USE_TWO_MATCHES, n_aln, console, name_max,
                    KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS);
 
diff --git a/src/maximal/maximal.cpp b/src/maximal/maximal.cpp
index 7850fc5..38a0f53 100644
--- a/src/maximal/maximal.cpp
+++ b/src/maximal/maximal.cpp
@@ -261,10 +261,45 @@ int main(int argc, char *argv[]) {
     const char * name_restrict = cmdp.get<std::string>("restrictreads").c_str();
     std::string name_mask = out + ".mas";
 
+    namespace spd = spdlog;
+
+    //auto console = spd::stdout_logger_mt("console",true);
+
+    std::vector<spdlog::sink_ptr> sinks;
+    sinks.push_back(std::make_shared<spdlog::sinks::stdout_sink_st>());
+    sinks.push_back(std::make_shared<spdlog::sinks::daily_file_sink_st>(cmdp.get<std::string>("log") + "/log", "txt", 23, 59));
+    auto console = std::make_shared<spdlog::logger>("log", begin(sinks), end(sinks));
+    spdlog::register_logger(console);
+    //auto console = std::make_shared<spdlog::logger>("name", begin(sinks), end(sinks));
+
+
+    console->info("Getting maximal reads");
+
+    bool db_and_las, db_or_las, fa_and_paf, fa_or_paf;
+    db_and_las = (strlen(name_db) > 0) and (strlen(name_las_base) > 0);
+    db_or_las = (strlen(name_db) > 0) or (strlen(name_las_base) > 0);
+    fa_and_paf = (strlen(name_fasta) > 0) and (strlen(name_paf) > 0);
+    fa_or_paf = (strlen(name_fasta) > 0) or (strlen(name_paf) > 0);
+
+    if (db_or_las and fa_or_paf){
+        console->error("Pass in either a db and a las or a fasta and a paf");
+        return 1;
+    }
+
+    if (( not fa_and_paf) and (not db_and_las)){
+        console->error("Pass in at least one of the following two combinations: a db and a las or a fasta and a paf");
+        return 1;
+    }
+
     std::string name_las_string;
-    if (cmdp.exist("mlas"))
-        name_las_string =  std::string(name_las_base);
-    else {
+    if (cmdp.exist("mlas")) {
+        if (not db_and_las){
+            console->error("--mlas works only with db and las");
+            return 1;
+        }
+        name_las_string = std::string(name_las_base);
+    }
+    else if (strlen(name_las_base) > 0) {
         if (lastN(std::string(name_las_base), 4) == ".las")
             name_las_string = std::string(name_las_base);
         else
@@ -278,19 +313,7 @@ int main(int argc, char *argv[]) {
      * the other is fasta + paf, which corresponds to minimap as an overlapper.
      */
 
-    namespace spd = spdlog;
 
-    //auto console = spd::stdout_logger_mt("console",true);
-
-    std::vector<spdlog::sink_ptr> sinks;
-    sinks.push_back(std::make_shared<spdlog::sinks::stdout_sink_st>());
-    sinks.push_back(std::make_shared<spdlog::sinks::daily_file_sink_st>(cmdp.get<std::string>("log") + "/log", "txt", 23, 59));
-    auto console = std::make_shared<spdlog::logger>("log", begin(sinks), end(sinks));
-    spdlog::register_logger(console);
-    //auto console = std::make_shared<spdlog::logger>("name", begin(sinks), end(sinks));
-
-
-    console->info("Getting maximal reads");
 
 
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
@@ -522,7 +545,17 @@ int main(int argc, char *argv[]) {
     }
     console->info("active reads after correcting for read lengths: {}", num_active_read);
 
-    console->info("number of las files: {}", name_las_list.size());
+    int number_of_parts;
+    if (strlen(name_las) > 0)
+        number_of_parts = name_las_list.size();
+    else if(strlen(name_paf) > 0)
+        number_of_parts = 1;
+    else {
+        console->error("Need to provide either las and db or paf and fasta");
+        return 1;
+    }
+
+    console->info("number of las files: {}", number_of_parts);
 
     for (int part = 0; part < name_las_list.size(); part++) {
 
@@ -530,22 +563,27 @@ int main(int argc, char *argv[]) {
         console->info("name of las: {}", name_las_list[part]);
 
 
-        if (strlen(name_las_list[part].c_str()) > 0)
-            la.openAlignmentFile(name_las_list[part]);
+
 
         int64 n_aln = 0;
 
-        if (strlen(name_las_list[part].c_str()) > 0) {
-            n_aln = la.getAlignmentNumber();
-            console->info("Load alignments from {}", name_las_list[part]);
-            console->info("# Alignments: {}", n_aln);
+        if(strlen(name_las_base)> 0) {
+
+            if (strlen(name_las_list[part].c_str()) > 0)
+                la.openAlignmentFile(name_las_list[part]);
+            if (strlen(name_las_list[part].c_str()) > 0) {
+                n_aln = la.getAlignmentNumber();
+                console->info("Load alignments from {}", name_las_list[part]);
+                console->info("# Alignments: {}", n_aln);
+            }
+            if (strlen(name_las_list[part].c_str()) > 0) {
+                la.resetAlignment();
+                la.getOverlap(aln, 0, n_read);
+            }
         }
 
 
-        if (strlen(name_las_list[part].c_str()) > 0) {
-            la.resetAlignment();
-            la.getOverlap(aln, 0, n_read);
-        }
+
 
         if (strlen(name_paf) > 0) {
             n_aln = la.loadPAF(std::string(name_paf), aln);
@@ -558,7 +596,7 @@ int main(int argc, char *argv[]) {
             return 1;
         }
 
-        console->info("Input data finished, part {}/{}", part + 1, name_las_list.size());
+        console->info("Input data finished, part {}/{}", part + 1, number_of_parts);
 
 
 
@@ -644,16 +682,20 @@ int main(int argc, char *argv[]) {
             cgs[i] = (cg);
         }
 
-        console->info("profile coverage done part {}/{}", part + 1, name_las_list.size());
+        console->info("profile coverage done part {}/{}", part + 1, number_of_parts);
 
 
         std::set<int> rand_reads;
         srand(time(NULL));
         rand_reads.insert(0);
+        int temp_index(0);
         while (rand_reads.size() < (r_end - r_begin)/500){
+            temp_index ++;
             int rd_id=rand()%(r_end - r_begin) + r_begin;
             if (reads[rd_id]->len > 5000)
                 rand_reads.insert(rd_id);
+            if (temp_index > 20000)
+                break;
         }
 
         int num_slot = 0;
@@ -841,11 +883,12 @@ int main(int argc, char *argv[]) {
         console->info("active reads: {}", num_active_read);
         console->info("total reads: {}", r_end-r_begin+1);
 
-
-        for (int i = 0; i < aln.size(); i++) {
-            free(aln[i]);
+        if (strlen(name_las) > 0) {
+            for (int i = 0; i < aln.size(); i++) {
+                delete aln[i];
+            }
+            aln.clear();
         }
-        aln.clear();
     }
 
 
diff --git a/utils/update.sh b/utils/update.sh
index 1fa5e68..a68a399 100755
--- a/utils/update.sh
+++ b/utils/update.sh
@@ -17,7 +17,7 @@ then rsync -rizP --delete --exclude '.*' --exclude 'build' $1 at shannon.stanford.e
 fi
 
 if [ "$2" == "update" ];
-then ssh -t $1 at shannon.stanford.edu "cd /home/$1/AwesomeAssembler && ./utils/build.sh"
+then ssh -t $1 at shannon.stanford.edu "export TEMP=/home/$1/tmp && cd /home/$1/AwesomeAssembler && ./utils/build.sh"
 fi
 
 if [ "$2" == "all" ];

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/hinge.git



More information about the debian-med-commit mailing list