[med-svn] [python-lefse] 01/02: Imported Upstream version 1.0+20151228

Andreas Tille tille at debian.org
Tue Jul 26 14:00:44 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository python-lefse.

commit d3d5d157680b3655d2c0b4e25b985269f4b594db
Author: Andreas Tille <tille at debian.org>
Date:   Tue Jul 26 15:58:57 2016 +0200

    Imported Upstream version 1.0+20151228
---
 __init__.py                       |    0
 example/run.sh                    |   51 +
 format_input.py                   |  453 +++++++
 lefse.py                          |  241 ++++
 lefse2circlader.py                |   43 +
 lefsebiom/AbundanceTable.py       | 2456 +++++++++++++++++++++++++++++++++++++
 lefsebiom/CClade.py               |  181 +++
 lefsebiom/ConstantsBreadCrumbs.py |  155 +++
 lefsebiom/ValidateData.py         |  624 ++++++++++
 lefsebiom/__init__.py             |    0
 plot_cladogram.py                 |  342 ++++++
 plot_features.py                  |  153 +++
 plot_res.py                       |  179 +++
 qiime2lefse.py                    |   88 ++
 requirements.txt                  |    5 +
 run_lefse.py                      |  103 ++
 16 files changed, 5074 insertions(+)

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/example/run.sh b/example/run.sh
new file mode 100755
index 0000000..7aa0648
--- /dev/null
+++ b/example/run.sh
@@ -0,0 +1,51 @@
+
+# Download a 3-classes example (with subclasses and subjects) from huttenhower.sph.harvard.edu
+# It is a small subset of the HMP 16S dataset for finding biomarkers characterizing
+# different level of oxygen availability in different bodysites
+wget http://huttenhower.sph.harvard.edu/webfm_send/129 -O hmp_aerobiosis_small.txt
+
+# Running the LEfSe commands with -h gives the list of available options
+
+# format_input.py convert the input data matrix to the format for LEfSe.
+#
+# In this example we use the class information in the first line (-c 1)
+# the subclass in the second line (-s 2) and the subject in the third (-u 3).
+# If the subclass or the subject are not present in the data you need to set
+# the value -1 for them.
+# -o 1000000 scales the feature such that the sum (of the same taxonomic leve)
+# is 1M: this is done only for obtaining more meaningful values for the LDA score
+../format_input.py hmp_aerobiosis_small.txt hmp_aerobiosis_small.in -c 1 -s 2 -u 3 -o 1000000
+
+# run_lefse.py performs the actual statistica analysis
+#
+# Apply LEfSe on the formatted data producing the results (to be further processed
+# for visualization with the other modules). The option available
+# can be listed using the -h option 
+../run_lefse.py hmp_aerobiosis_small.in hmp_aerobiosis_small.res
+
+# plot_res.py visualizes the output
+#
+# Plot the list of biomarkers with their effect size
+# Severak graphical options are available for personalizing the output
+../plot_res.py hmp_aerobiosis_small.res hmp_aerobiosis_small.png
+
+# plot_cladogram.py visualizes the output on a hierarchical tree
+#
+# Plot the representation of the biomarkers on the hierarchical tree
+# specified in the input data (using | in the name of the features)
+# In this case we will obtain the RDP taxonomy.
+# This is an early implementation of the module. I'm working on an improved version
+# that will be released independently from LEfSe
+../plot_cladogram.py hmp_aerobiosis_small.res hmp_aerobiosis_small.cladogram.png --format png
+
+# Create a directory for storing the raw-data representation of the discovered biomarkers
+mkdir biomarkers_raw_images
+
+# plot_features.py visualizes the raw-data features
+#
+# The module for exporting the raw-data representation of the features.
+# With the default options we will obtain the images for all the features that are
+# detected as biomarkers
+../plot_features.py hmp_aerobiosis_small.in hmp_aerobiosis_small.res biomarkers_raw_images/
+
+
diff --git a/format_input.py b/format_input.py
new file mode 100755
index 0000000..dd3d2e0
--- /dev/null
+++ b/format_input.py
@@ -0,0 +1,453 @@
+#!/usr/bin/env python
+
+import sys,os,argparse,pickle,re,numpy
+
+
+
+
+#*************************************************************************************************************** 
+#*   Log of change                                                                                             *
+#*   January 16, 2014  - George Weingart - george.weingart at gmail.com                                           *
+#*                                                                                                             *
+#*   biom Support                                                                                              *
+#*   Modified the program to enable it to accept biom files as input                                           *
+#*                                                                                                             *
+#*   Added two optional input parameters:	                                                                   *
+#*   1. biom_c is the name of the biom metadata to be used as class                                            *
+#*   2. biom_s is the name of the biom metadata to be used as subclass                                         *
+#*   class and subclass are used in the same context as the original                                           *
+#*   parameters class and subclass                                                                             *
+#*   These parameters are totally optional, the default is the program                                         *
+#*   chooses as class the first metadata received from the conversion                                          *
+#*   of the biom file into a sequential (pcl) file as generated by                                             *
+#*   breadcrumbs, and similarly, the second metadata is selected as                                            *
+#*   subclass.                                                                                                 *
+#*   The syntax or logic for the original non-biom case was NOT changed.                                       *
+#*                                                                                                             *
+#*   <*******************  IMPORTANT NOTE   *************************>                                         *
+#*   The biom case requires breadcrumbs and therefore there is a                                               *
+#*      a conditional import of the breadcrumbs modules                                                        *
+#*   If the User uses a biom input and breadcrumbs is not detected,                                            *
+#*       the run is abnormally ended                                                                           *
+#*   breadcrumbs itself needs a biom environment, so if the immport                                            *
+#*       of biom in breadcrumbs fails,  the run is also abnormally 
+#*       ended (Only if the input file was biom)                                                               *
+#*                                                                                                             *
+#*   USAGE EXAMPLES                                                                                            *
+#*   --------------                                                                                            *
+#*   Case #1: Using a sequential file as input (Old version - did not change                                   *
+#*  ./format_input.py hmp_aerobiosis_small.txt hmp_aerobiosis_small.in -c 1 -s 2 -u 3 -o 1000000               * 
+#*   Case #2: Using a biom file as input                                                                       *
+#*  ./format_input.py hmp_aerobiosis_small.biom hmp_aerobiosis_small.in  -o 1000000                            *
+#*   Case #3: Using a biom file as input and override the class and subclass                                   *
+#*   ./format_input.py lefse.biom hmp_aerobiosis_small.in -biom_c oxygen_availability -biom_s body_site -o 1000000
+#*                                                                                                             *
+#***************************************************************************************************************         
+
+def read_input_file(inp_file, CommonArea):
+
+	if inp_file.endswith('.biom'):   			#*  If the file format is biom: 
+		CommonArea = biom_processing(inp_file)  #*  Process in biom format 
+		return CommonArea 						#*  And return the CommonArea
+
+	with open(inp_file) as inp:
+		CommonArea['ReturnedData'] = [[v.strip() for v in line.strip().split("\t")] for line in inp.readlines()]
+		return CommonArea
+
+def transpose(data):
+	return zip(*data)
+
+def read_params(args):
+	parser = argparse.ArgumentParser(description='LEfSe formatting modules')
+	parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="the input file, feature hierarchical level can be specified with | or . and those symbols must not be present for other reasons in the input file.")
+	parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str,
+		help="the output file containing the data for LEfSe")
+	parser.add_argument('--output_table', type=str, required=False, default="",
+		help="the formatted table in txt format")
+	parser.add_argument('-f',dest="feats_dir", choices=["c","r"], type=str, default="r",
+		help="set whether the features are on rows (default) or on columns")
+	parser.add_argument('-c',dest="class", metavar="[1..n_feats]", type=int, default=1,
+		help="set which feature use as class (default 1)")	
+	parser.add_argument('-s',dest="subclass", metavar="[1..n_feats]", type=int, default=None,
+		help="set which feature use as subclass (default -1 meaning no subclass)")
+	parser.add_argument('-o',dest="norm_v", metavar="float", type=float, default=-1.0,
+		help="set the normalization value (default -1.0 meaning no normalization)")
+	parser.add_argument('-u',dest="subject", metavar="[1..n_feats]", type=int, default=None,
+		help="set which feature use as subject (default -1 meaning no subject)")
+	parser.add_argument('-m',dest="missing_p", choices=["f","s"], type=str, default="d",
+		help="set the policy to adopt with missin values: f removes the features with missing values, s removes samples with missing values (default f)")
+	parser.add_argument('-n',dest="subcl_min_card", metavar="int", type=int, default=10,
+		help="set the minimum cardinality of each subclass (subclasses with low cardinalities will be grouped together, if the cardinality is still low, no pairwise comparison will be performed with them)")
+
+	parser.add_argument('-biom_c',dest="biom_class", type=str, 
+		help="For biom input files: Set which feature use as class  ")		
+	parser.add_argument('-biom_s',dest="biom_subclass", type=str, 
+		help="For biom input files: set which feature use as subclass   ")
+	
+	args = parser.parse_args()
+	
+	return vars(args)	
+
+def remove_missing(data,roc):
+	if roc == "c": data = transpose(data)
+	max_len = max([len(r) for r in data])
+	to_rem = []
+	for i,r in enumerate(data):
+		if len([v for v in r if not( v == "" or v.isspace())]) < max_len: to_rem.append(i)
+	if len(to_rem):
+		for i in to_rem.reverse():
+			data.pop(i)
+	if roc == "c": return transpose(data)
+	return data
+
+                                                                                                                                      
+def sort_by_cl(data,n,c,s,u):           
+	def sort_lines1(a,b): 
+        	return int(a[c] > b[c])*2-1                                                                                           
+	def sort_lines2u(a,b):        
+        	if a[c] != b[c]: return int(a[c] > b[c])*2-1                                                                  
+		return int(a[u] > b[u])*2-1
+	def sort_lines2s(a,b):        
+        	if a[c] != b[c]: return int(a[c] > b[c])*2-1                                                                  
+		return int(a[s] > b[s])*2-1
+	def sort_lines3(a,b):      
+        	if a[c] != b[c]: return int(a[c] > b[c])*2-1                                                                   
+		if a[s] != b[s]: return int(a[s] > b[s])*2-1                                                                          
+		return int(a[u] > b[u])*2-1 
+        if n == 3: data.sort(sort_lines3)                                                                                  
+        if n == 2: 
+	  if s is None: 
+	    data.sort(sort_lines2u)
+	  else:
+	    data.sort(sort_lines2s)
+        if n == 1: data.sort(sort_lines1)                                                                                  
+	return data  
+
+def group_small_subclasses(cls,min_subcl):
+	last = ""
+	n = 0
+	repl = []
+	dd = [list(cls['class']),list(cls['subclass'])]
+	for d in dd:
+		if d[1] != last:
+			if n < min_subcl and last != "":
+				repl.append(d[1])
+			last = d[1]
+		n = 1	
+	for i,d in enumerate(dd):
+		if d[1] in repl: dd[i][1] = "other"
+		dd[i][1] = str(dd[i][0])+"_"+str(dd[i][1])
+	cls['class'] = dd[0]
+	cls['subclass'] = dd[1]
+	return cls		
+
+def get_class_slices(data):
+        previous_class = data[0][0]
+        previous_subclass = data[0][1]
+        subclass_slices = []
+        class_slices = []
+        last_cl = 0     
+        last_subcl = 0                                                                                                                 
+        class_hierarchy = []
+        subcls = []                                                                                                                    
+        for i,d in enumerate(data):
+                if d[1] != previous_subclass:                                                                                          
+                        subclass_slices.append((previous_subclass,(last_subcl,i)))
+                        last_subcl = i                                                                                                 
+                        subcls.append(previous_subclass)                                                                               
+                if d[0] != previous_class:                                                                                             
+                        class_slices.append((previous_class,(last_cl,i)))                                                              
+                        class_hierarchy.append((previous_class,subcls))                                                                
+                        subcls = [] 
+                        last_cl = i     
+                previous_subclass = d[1]
+                previous_class = d[0]
+        subclass_slices.append((previous_subclass,(last_subcl,i+1)))
+        subcls.append(previous_subclass)
+        class_slices.append((previous_class,(last_cl,i+1)))                                                                            
+        class_hierarchy.append((previous_class,subcls))
+        return dict(class_slices), dict(subclass_slices), dict(class_hierarchy)
+
+def numerical_values(feats,norm):
+	mm = []
+	for k,v in feats.items():
+		feats[k] = [float(val) for val in v]
+	if norm < 0.0: return feats
+	tr = zip(*(feats.values()))
+	mul = []
+	fk = feats.keys()
+	hie = True if sum([k.count(".") for k in fk]) > len(fk) else False
+	for i in range(len(feats.values()[0])):
+		if hie: mul.append(sum([t for j,t in enumerate(tr[i]) if fk[j].count(".") < 1 ]))
+		else: mul.append(sum(tr[i]))
+	if hie and sum(mul) == 0:
+		mul = []
+		for i in range(len(feats.values()[0])):
+			mul.append(sum(tr[i]))
+	for i,m in enumerate(mul):
+		if m == 0: mul[i] = 0.0
+		else: mul[i] = float(norm) / m
+	for k,v in feats.items():
+		feats[k] = [val*mul[i] for i,val in enumerate(v)]
+		if numpy.mean(feats[k]) and (numpy.std(feats[k])/numpy.mean(feats[k])) < 1e-10:
+			feats[k] = [ float(round(kv*1e6)/1e6) for kv in feats[k]]
+	return feats
+
+def add_missing_levels2(ff):
+	
+	if sum( [f.count(".") for f in ff] ) < 1: return ff
+	
+	dn = {}
+
+	added = True
+	while added:
+		added = False
+		for f in ff:
+			lev = f.count(".")
+			if lev == 0: continue
+			if lev not in dn: dn[lev] = [f]
+			else: dn[lev].append(f)	
+		for fn in sorted(dn,reverse=True):
+			for f in dn[fn]:
+				fc = ".".join(f.split('.')[:-1])
+				if fc not in ff:
+					ab_all = [ff[fg] for fg in ff if (fg.count(".") == 0 and fg == fc) or (fg.count(".") > 0 and fc == ".".join(fg.split('.')[:-1]))]
+					ab =[]
+					for l in [f for f in zip(*ab_all)]:
+						ab.append(sum([float(ll) for ll in l]))
+					ff[fc] = ab
+					added = True
+			if added:
+				break
+		
+	return ff
+				
+				
+def add_missing_levels(ff):
+	if sum( [f.count(".") for f in ff] ) < 1: return ff
+	
+	clades2leaves = {}
+	for f in ff:
+		fs = f.split(".")
+		if len(fs) < 2:
+			continue
+		for l in range(len(fs)):
+			n = ".".join( fs[:l] )
+			if n in clades2leaves:
+				clades2leaves[n].append( f )
+			else:
+				clades2leaves[n] = [f]
+	for k,v in clades2leaves.items():
+		if k and k not in ff:
+			ff[k] = [sum(a) for a in zip(*[[float(fn) for fn in ff[vv]] for vv in v])]
+	return ff
+
+			
+
+def modify_feature_names(fn):
+	ret = fn
+
+	for v in [' ',r'\$',r'\@',r'#',r'%',r'\^',r'\&',r'\*',r'\"',r'\'']:
+                ret = [re.sub(v,"",f) for f in ret]
+        for v in ["/",r'\(',r'\)',r'-',r'\+',r'=',r'{',r'}',r'\[',r'\]',
+		r',',r'\.',r';',r':',r'\?',r'\<',r'\>',r'\.',r'\,']:
+                ret = [re.sub(v,"_",f) for f in ret]
+        for v in ["\|"]:
+                ret = [re.sub(v,".",f) for f in ret]
+	
+	ret2 = []
+	for r in ret:
+		if r[0] in ['0','1','2','3','4','5','6','7','8','9']:
+			ret2.append("f_"+r)
+		else: ret2.append(r)	
+				
+	return ret2 
+		
+
+def rename_same_subcl(cl,subcl):
+	toc = []
+	for sc in set(subcl):
+		if len(set([cl[i] for i in range(len(subcl)) if sc == subcl[i]])) > 1:
+			toc.append(sc)
+	new_subcl = []
+	for i,sc in enumerate(subcl):
+		if sc in toc: new_subcl.append(cl[i]+"_"+sc)
+		else: new_subcl.append(sc)
+	return new_subcl
+	
+	
+#*************************************************************************************
+#*  Modifications by George Weingart,  Jan 15, 2014                                  *
+#*  If the input file is biom:                                                       *
+#*  a. Load an AbundanceTable (Using breadcrumbs)                                    *
+#*  b. Create a sequential file from the AbundanceTable (de-facto - pcl)             *
+#*  c. Use that file as input to the rest of the program                             *
+#*  d. Calculate the c,s,and u parameters, either from the values the User entered   *
+#*     from the meta data values in the biom file or set up defaults                 * 
+#*  <<<-------------  I M P O R T A N T     N O T E ------------------->>            *
+#*  breadcrumbs src directory must be included in the PYTHONPATH                     *
+#*  <<<-------------  I M P O R T A N T     N O T E ------------------->>            *	
+#*************************************************************************************	
+def biom_processing(inp_file):
+	CommonArea = dict()			#* Set up a dictionary to return
+	CommonArea['abndData']   = AbundanceTable.funcMakeFromFile(inp_file,	#* Create AbundanceTable from input biom file
+		cDelimiter = None,
+		sMetadataID = None,
+		sLastMetadataRow = None,
+		sLastMetadata = None,
+		strFormat = None)
+
+	#****************************************************************
+	#*  Building the data element here                              *
+	#****************************************************************
+	ResolvedData = list()		#This is the Resolved data that will be returned
+	IDMetadataName  = CommonArea['abndData'].funcGetIDMetadataName()   #* ID Metadataname 
+	IDMetadata = [CommonArea['abndData'].funcGetIDMetadataName()]  #* The first Row
+	for IDMetadataEntry in CommonArea['abndData'].funcGetMetadataCopy()[IDMetadataName]:  #* Loop on all the metadata values
+		IDMetadata.append(IDMetadataEntry)
+	ResolvedData.append(IDMetadata)					#Add the IDMetadata with all its values to the resolved area
+	for key, value in  CommonArea['abndData'].funcGetMetadataCopy().iteritems(): 
+		if  key  != IDMetadataName:
+			MetadataEntry = list()		#*  Set it up
+			MetadataEntry.append(key)	#*	And post it to the area
+			for x in value:
+				MetadataEntry.append(x)		#* Append the metadata value name
+			ResolvedData.append(MetadataEntry)
+	for AbundanceDataEntry in    CommonArea['abndData'].funcGetAbundanceCopy(): 		#* The Abundance Data
+		lstAbundanceDataEntry = list(AbundanceDataEntry)	#Convert tuple to list
+		ResolvedData.append(lstAbundanceDataEntry)			#Append the list to the metadata list
+	CommonArea['ReturnedData'] = 	ResolvedData			#Post the results 
+	return CommonArea   
+
+	
+#*******************************************************************************
+#*    Check the params and override in the case of biom                        *
+#*******************************************************************************
+def  check_params_for_biom_case(params, CommonArea):
+	CommonArea['MetadataNames'] = list()			#Metadata  names
+	params['original_class'] = params['class']			#Save the original class
+	params['original_subclass'] = params['subclass']	#Save the original subclass	
+	params['original_subject'] = params['subject']	#Save the original subclass	
+
+
+	TotalMetadataEntriesAndIDInBiomFile = len(CommonArea['abndData'].funcGetMetadataCopy())  # The number of metadata entries
+	for i in range(0,TotalMetadataEntriesAndIDInBiomFile): 	#* Populate the meta data names table
+		CommonArea['MetadataNames'].append(CommonArea['ReturnedData'][i][0])	#Add the metadata name
+	
+
+	#****************************************************
+	#* Setting the params here                          *
+	#****************************************************
+	
+	if TotalMetadataEntriesAndIDInBiomFile > 0:		#If there is at least one entry - has to be the subject
+		params['subject'] =  1
+	if TotalMetadataEntriesAndIDInBiomFile == 2:		#If there are 2 - The first is the subject and the second has to be the metadata, and that is the class
+		params['class'] =  2
+	if TotalMetadataEntriesAndIDInBiomFile == 3:		#If there are 3:  Set up default that the second entry is the class and the third is the subclass
+		params['class'] =  2
+		params['subclass'] =  3
+		FlagError = False								#Set up error flag
+
+		if not params['biom_class'] is None and not params['biom_subclass'] is None:				#Check if the User passed a valid class and subclass
+			if  params['biom_class'] in CommonArea['MetadataNames']:
+				params['class'] =  CommonArea['MetadataNames'].index(params['biom_class']) +1	#* Set up the index for that metadata
+			else:
+				FlagError = True
+			if  params['biom_subclass'] in  CommonArea['MetadataNames']:
+				params['subclass'] =  CommonArea['MetadataNames'].index(params['biom_subclass']) +1 #* Set up the index for that metadata
+			else:
+				FlagError = True
+		if FlagError == True:		#* If the User passed an invalid class
+			print "**Invalid biom class or subclass passed - Using defaults: First metadata=class, Second Metadata=subclass\n"
+			params['class'] =  2
+			params['subclass'] =  3
+	return params
+ 	
+	
+
+if  __name__ == '__main__':
+	CommonArea = dict()			#Build a Common Area to pass variables in the biom case
+	params = read_params(sys.argv)
+
+	#*************************************************************
+	#* Conditionally import breadcrumbs if file is a biom file   *
+	#* If it is and no breadcrumbs found - abnormally exit       *
+	#*************************************************************
+	if  params['input_file'].endswith('.biom'):
+		try:
+			from lefsebiom.ConstantsBreadCrumbs import *	 
+			from lefsebiom.AbundanceTable import *
+		except ImportError:
+			sys.stderr.write("************************************************************************************************************ \n")
+			sys.stderr.write("* Error:   Breadcrumbs libraries not detected - required to process biom files - run abnormally terminated * \n")
+			sys.stderr.write("************************************************************************************************************ \n")
+			exit(1)
+
+	
+	if type(params['subclass']) is int and int(params['subclass']) < 1:
+		params['subclass'] = None
+	if type(params['subject']) is int and int(params['subject']) < 1:
+		params['subject'] = None
+
+
+	CommonArea = read_input_file(sys.argv[1], CommonArea)		#Pass The CommonArea to the Read
+	data = CommonArea['ReturnedData']					#Select the data
+
+	if sys.argv[1].endswith('biom'):	#*	Check if biom:
+		params = check_params_for_biom_case(params, CommonArea)	#Check the params for the biom case
+
+	if params['feats_dir'] == "c":
+		data = transpose(data)
+
+	ncl = 1
+	if not params['subclass'] is None: ncl += 1	
+	if not params['subject'] is None: ncl += 1	
+
+	first_line = zip(*data)[0]
+	
+	first_line = modify_feature_names(list(first_line))
+
+	data = zip(	first_line,
+			*sort_by_cl(zip(*data)[1:],
+			  ncl,
+			  params['class']-1,
+			  params['subclass']-1 if not params['subclass'] is None else None,
+			  params['subject']-1 if not params['subject'] is None else None))
+#	data.insert(0,first_line)
+#	data = remove_missing(data,params['missing_p'])
+	cls = {}
+
+	cls_i = [('class',params['class']-1)]
+	if params['subclass'] > 0: cls_i.append(('subclass',params['subclass']-1))
+	if params['subject'] > 0: cls_i.append(('subject',params['subject']-1))
+	cls_i.sort(lambda x, y: -cmp(x[1],y[1]))
+	for v in cls_i: cls[v[0]] = data.pop(v[1])[1:]
+	if not params['subclass'] > 0: cls['subclass'] = [str(cl)+"_subcl" for cl in cls['class']]
+	
+	cls['subclass'] = rename_same_subcl(cls['class'],cls['subclass'])
+#	if 'subclass' in cls.keys(): cls = group_small_subclasses(cls,params['subcl_min_card'])
+	class_sl,subclass_sl,class_hierarchy = get_class_slices(zip(*cls.values()))
+    
+	feats = dict([(d[0],d[1:]) for d in data])
+    
+	feats = add_missing_levels(feats)
+    
+	feats = numerical_values(feats,params['norm_v'])
+	out = {}
+	out['feats'] = feats
+	out['norm'] = params['norm_v'] 
+	out['cls'] = cls
+	out['class_sl'] = class_sl
+	out['subclass_sl'] = subclass_sl
+	out['class_hierarchy'] = class_hierarchy
+
+	if params['output_table']:
+		with open( params['output_table'], "w") as outf: 
+			if 'class' in cls: outf.write( "\t".join(list(["class"])+list(cls['class'])) + "\n" )
+			if 'subclass' in cls: outf.write( "\t".join(list(["subclass"])+list(cls['subclass'])) + "\n" )
+			if 'subject' in cls: outf.write( "\t".join(list(["subject"])+list(cls['subject']))  + "\n" )
+			for k,v in out['feats'].items(): outf.write( "\t".join([k]+[str(vv) for vv in v]) + "\n" )
+
+	with open(params['output_file'], 'wb') as back_file:
+		pickle.dump(out,back_file)    	
+
diff --git a/lefse.py b/lefse.py
new file mode 100755
index 0000000..0a29be0
--- /dev/null
+++ b/lefse.py
@@ -0,0 +1,241 @@
+import os,sys,math,pickle
+import random as lrand
+import rpy2.robjects as robjects
+import argparse
+import numpy
+#import svmutil
+
+def init(): 
+	lrand.seed(1982)
+	robjects.r('library(splines)')
+	robjects.r('library(stats4)')
+	robjects.r('library(survival)')
+	robjects.r('library(mvtnorm)')
+	robjects.r('library(modeltools)')
+	robjects.r('library(coin)')
+	robjects.r('library(MASS)')
+
+def get_class_means(class_sl,feats):
+	means = {}
+	clk = class_sl.keys()
+	for fk,f in feats.items():
+		means[fk] = [numpy.mean((f[class_sl[k][0]:class_sl[k][1]])) for k in clk] 
+	return clk,means 
+	
+def save_res(res,filename): 
+	with open(filename, 'w') as out:
+		for k,v in res['cls_means'].items():
+			out.write(k+"\t"+str(math.log(max(max(v),1.0),10.0))+"\t")
+			if k in res['lda_res_th']:
+				for i,vv in enumerate(v):
+					if vv == max(v):
+						out.write(str(res['cls_means_kord'][i])+"\t")
+						break
+				out.write(str(res['lda_res'][k])) 
+			else: out.write("\t")
+			out.write( "\t" + (res['wilcox_res'][k] if 'wilcox_res' in res and k in res['wilcox_res'] else "-")+"\n")
+
+def load_data(input_file, nnorm = False):
+	with open(input_file, 'rb') as inputf:
+		inp = pickle.load(inputf)
+	if nnorm: return inp['feats'],inp['cls'],inp['class_sl'],inp['subclass_sl'],inp['class_hierarchy'],inp['norm']  
+	else: return inp['feats'],inp['cls'],inp['class_sl'],inp['subclass_sl'],inp['class_hierarchy']
+
+def load_res(input_file):
+	with open(input_file, 'rb') as inputf:	
+		inp = pickle.load(inputf)
+	return inp['res'],inp['params'],inp['class_sl'],inp['subclass_sl']		
+
+
+def test_kw_r(cls,feats,p,factors):
+	robjects.globalenv["y"] = robjects.FloatVector(feats)
+	for i,f in enumerate(factors):
+		robjects.globalenv['x'+str(i+1)] = robjects.FactorVector(robjects.StrVector(cls[f]))
+	fo = "y~x1"
+    #for i,f in enumerate(factors[1:]):
+    #	if f == "subclass" and len(set(cls[f])) <= len(set(cls["class"])): continue
+    #	if len(set(cls[f])) == len(cls[f]): continue
+    #	fo += "+x"+str(i+2)
+	kw_res = robjects.r('kruskal.test('+fo+',)$p.value')
+	return float(tuple(kw_res)[0]) < p, float(tuple(kw_res)[0])
+
+def test_rep_wilcoxon_r(sl,cl_hie,feats,th,multiclass_strat,mul_cor,fn,min_c,comp_only_same_subcl,curv=False):
+	comp_all_sub = not comp_only_same_subcl
+	tot_ok =  0
+	alpha_mtc = th
+	all_diff = []
+	for pair in [(x,y) for x in cl_hie.keys() for y in cl_hie.keys() if x < y]:
+		dir_cmp = "not_set" #
+		l_subcl1, l_subcl2 = (len(cl_hie[pair[0]]), len(cl_hie[pair[1]]))
+		if mul_cor != 0: alpha_mtc = th*l_subcl1*l_subcl2 if mul_cor == 2 else 1.0-math.pow(1.0-th,l_subcl1*l_subcl2)
+		ok = 0
+		curv_sign = 0
+		first = True
+		for i,k1 in enumerate(cl_hie[pair[0]]):
+			br = False
+			for j,k2 in enumerate(cl_hie[pair[1]]):
+				if not comp_all_sub and k1[len(pair[0]):] != k2[len(pair[1]):]: 
+					ok += 1	
+					continue 
+				cl1 = feats[sl[k1][0]:sl[k1][1]]
+				cl2 = feats[sl[k2][0]:sl[k2][1]]
+				med_comp = False
+				if len(cl1) < min_c or len(cl2) < min_c: 
+					med_comp = True
+				sx,sy = numpy.median(cl1),numpy.median(cl2)
+				if cl1[0] == cl2[0] and len(set(cl1)) == 1 and  len(set(cl2)) == 1: 
+					tres, first = False, False
+				elif not med_comp:
+					robjects.globalenv["x"] = robjects.FloatVector(cl1+cl2)
+					robjects.globalenv["y"] = robjects.FactorVector(robjects.StrVector(["a" for a in cl1]+["b" for b in cl2]))	
+					pv = float(robjects.r('pvalue(wilcox_test(x~y,data=data.frame(x,y)))')[0])
+					tres = pv < alpha_mtc*2.0
+				if first:
+					first = False
+					if not curv and ( med_comp or tres ): 
+						dir_cmp = sx < sy
+                        #if sx == sy: br = True
+					elif curv: 
+						dir_cmp = None
+						if med_comp or tres: 
+							curv_sign += 1
+							dir_cmp = sx < sy
+					else: br = True
+				elif not curv and med_comp:
+					if ((sx < sy) != dir_cmp or sx == sy): br = True
+				elif curv:
+					if tres and dir_cmp == None:
+						curv_sign += 1
+						dir_cmp = sx < sy
+					if tres and dir_cmp != (sx < sy):
+						br = True
+						curv_sign = -1
+				elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True
+				if br: break
+				ok += 1
+			if br: break
+		if curv: diff = curv_sign > 0
+		else: diff = (ok == len(cl_hie[pair[1]])*len(cl_hie[pair[0]])) # or (not comp_all_sub and dir_cmp != "not_set") 
+		if diff: tot_ok += 1
+		if not diff and multiclass_strat: return False
+		if diff and not multiclass_strat: all_diff.append(pair)
+	if not multiclass_strat:
+		tot_k = len(cl_hie.keys())
+		for k in cl_hie.keys():
+			nk = 0
+			for a in all_diff:
+				if k in a: nk += 1
+			if nk == tot_k-1: return True
+		return False
+	return True 
+
+
+
+def contast_within_classes_or_few_per_class(feats,inds,min_cl,ncl):
+	ff = zip(*[v for n,v in feats.items() if n != 'class'])
+	cols = [ff[i] for i in inds]
+	cls = [feats['class'][i] for i in inds]
+	if len(set(cls)) < ncl:
+		return True
+	for c in set(cls):
+		if cls.count(c) < min_cl:
+			return True
+		cols_cl = [x for i,x in enumerate(cols) if cls[i] == c]
+		for i,col in enumerate(zip(*cols_cl)):
+			if (len(set(col)) <= min_cl and min_cl > 1) or (min_cl == 1 and len(set(col)) <= 1):
+				return True
+	return False 
+
+def test_lda_r(cls,feats,cl_sl,boots,fract_sample,lda_th,tol_min,nlogs):
+        fk = feats.keys()
+        means = dict([(k,[]) for k in feats.keys()])
+        feats['class'] = list(cls['class'])
+        clss = list(set(feats['class']))
+        for uu,k in enumerate(fk):
+                if k == 'class': continue
+                ff = [(feats['class'][i],v) for i,v in enumerate(feats[k])]
+                for c in clss:
+                        if len(set([float(v[1]) for v in ff if v[0] == c])) > max(float(feats['class'].count(c))*0.5,4): continue
+                        for i,v in enumerate(feats[k]):
+                                if feats['class'][i] == c:
+                                        feats[k][i] = math.fabs(feats[k][i] + lrand.normalvariate(0.0,max(feats[k][i]*0.05,0.01)))
+        rdict = {}
+        for a,b in feats.items():
+                if a == 'class' or a == 'subclass' or a == 'subject':
+                        rdict[a] = robjects.StrVector(b)
+                else: rdict[a] = robjects.FloatVector(b)
+        robjects.globalenv["d"] = robjects.DataFrame(rdict)
+        lfk = len(feats[fk[0]])
+        rfk = int(float(len(feats[fk[0]]))*fract_sample)
+        f = "class ~ "+fk[0]
+        for k in fk[1:]: f += " + " + k.strip()
+        ncl = len(set(cls['class']))
+        min_cl = int(float(min([cls['class'].count(c) for c in set(cls['class'])]))*fract_sample*fract_sample*0.5) 
+        min_cl = max(min_cl,1) 
+        pairs = [(a,b) for a in set(cls['class']) for b in set(cls['class']) if a > b]
+
+	for k in fk:	
+		for i in range(boots):
+			means[k].append([])	
+        for i in range(boots):
+                for rtmp in range(1000):
+                        rand_s = [lrand.randint(0,lfk-1) for v in range(rfk)]
+                        if not contast_within_classes_or_few_per_class(feats,rand_s,min_cl,ncl): break
+                rand_s = [r+1 for r in rand_s]
+		means[k][i] = []
+		for p in pairs:
+	        	robjects.globalenv["rand_s"] = robjects.IntVector(rand_s)
+                	robjects.globalenv["sub_d"] = robjects.r('d[rand_s,]')
+                	z = robjects.r('z <- suppressWarnings(lda(as.formula('+f+'),data=sub_d,tol='+str(tol_min)+'))')
+			robjects.r('w <- z$scaling[,1]')
+			robjects.r('w.unit <- w/sqrt(sum(w^2))')
+			robjects.r('ss <- sub_d[,-match("class",colnames(sub_d))]')
+			if 'subclass' in feats:
+				robjects.r('ss <- ss[,-match("subclass",colnames(ss))]')
+			if 'subject' in feats:
+				robjects.r('ss <- ss[,-match("subject",colnames(ss))]')
+			robjects.r('xy.matrix <- as.matrix(ss)')
+			robjects.r('LD <- xy.matrix%*%w.unit')
+			robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))')
+			scal = robjects.r('wfinal <- w.unit * effect.size')
+			rres = robjects.r('mm <- z$means')
+			rowns = list(rres.rownames)
+			lenc = len(list(rres.colnames))
+			coeff = [abs(float(v)) if not math.isnan(float(v)) else 0.0 for v in scal]
+                	res = dict([(pp,[float(ff) for ff in rres.rx(pp,True)] if pp in rowns else [0.0]*lenc ) for pp in [p[0],p[1]]])
+			for j,k in enumerate(fk):
+				gm = abs(res[p[0]][j] - res[p[1]][j])
+                        	means[k][i].append((gm+coeff[j])*0.5)
+        res = {}
+        for k in fk:
+		m = max([numpy.mean([means[k][kk][p] for kk in range(boots)]) for p in range(len(pairs))])
+                res[k] = math.copysign(1.0,m)*math.log(1.0+math.fabs(m),10)
+        return res,dict([(k,x) for k,x in res.items() if math.fabs(x) > lda_th])
+
+
+def test_svm(cls,feats,cl_sl,boots,fract_sample,lda_th,tol_min,nsvm):
+	return NULL
+"""
+	fk = feats.keys()
+	clss = list(set(cls['class']))
+	y = [clss.index(c)*2-1 for c in list(cls['class'])]
+	xx = [feats[f] for f in fk]
+	if nsvm: 
+		maxs = [max(v) for v in xx]
+		mins = [min(v) for v in xx]
+		x = [ dict([(i+1,(v-mins[i])/(maxs[i]-mins[i])) for i,v in enumerate(f)]) for f in zip(*xx)]
+	else: x = [ dict([(i+1,v) for i,v in enumerate(f)]) for f in zip(*xx)]
+	
+	lfk = len(feats[fk[0]])
+	rfk = int(float(len(feats[fk[0]]))*fract_sample)
+	mm = []
+
+	best_c = svmutil.svm_ms(y, x, [pow(2.0,i) for i in range(-5,10)],'-t 0 -q')
+	for i in range(boots):
+		rand_s = [lrand.randint(0,lfk-1) for v in range(rfk)]
+		r = svmutil.svm_w([y[yi] for yi in rand_s], [x[xi] for xi in rand_s], best_c,'-t 0 -q')
+		mm.append(r[:len(fk)])
+	m = [numpy.mean(v) for v in zip(*mm)]
+	res = dict([(v,m[i]) for i,v in enumerate(fk)])
+	return res,dict([(k,x) for k,x in res.items() if math.fabs(x) > lda_th])
+"""	
diff --git a/lefse2circlader.py b/lefse2circlader.py
new file mode 100755
index 0000000..79cc2a0
--- /dev/null
+++ b/lefse2circlader.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+
+import sys
+import os
+import argparse
+
+def read_params(args):
+    parser = argparse.ArgumentParser(description='Convert LEfSe output to '
+                        'Circlader input')
+    parser.add_argument(    'inp_f', metavar='INPUT_FILE', nargs='?', 
+                            default=None, type=str, 
+                            help="the input file [stdin if not present]")    
+    parser.add_argument(    'out_f', metavar='OUTPUT_FILE', nargs='?', 
+                            default=None, type=str, 
+                            help="the output file [stdout if not present]")
+    parser.add_argument('-l', metavar='levels with label', default=0, type=int)
+
+    return vars(parser.parse_args()) 
+
+def lefse2circlader(par):
+    finp,fout = bool(par['inp_f']), bool(par['out_f'])
+
+    with open(par['inp_f']) if finp else sys.stdin as inpf:
+        put_bm = (l.strip().split('\t') for l in inpf.readlines()) 
+    biomarkers = [p for p in put_bm if len(p) > 2]
+
+    circ = [    [   b[0],
+                    "" if b[0].count('.') > par['l'] else b[0].split('.')[-1],
+                    b[2],
+                    b[2]+"_col" ] for b in biomarkers]
+
+    with open(par['out_f'],'w') if fout else sys.stdout as out_file:
+        for c in circ:
+            out_file.write( "\t".join( c ) + "\n" )
+
+if __name__ == '__main__':
+    params = read_params(sys.argv)
+    lefse2circlader(params)
+
+
+
diff --git a/lefsebiom/AbundanceTable.py b/lefsebiom/AbundanceTable.py
new file mode 100755
index 0000000..77c11aa
--- /dev/null
+++ b/lefsebiom/AbundanceTable.py
@@ -0,0 +1,2456 @@
+"""
+Author: Timothy Tickle
+Description: Class to abstract an abundance table and methods to run on such a table.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle at sph.harvard.edu"
+__status__ = "Development"
+
+import csv
+import sys
+import blist
+from CClade import CClade
+from ConstantsBreadCrumbs import ConstantsBreadCrumbs
+import copy
+from datetime import date
+import numpy as np
+import os
+import re
+import scipy.stats
+import string
+from ValidateData import ValidateData
+
+
+#*************************************************************
+#*  import biom                                              *
+#* If not found - abnormally exit                            *
+#*************************************************************
+try:
+	from biom.parse import *
+	from biom.table import *
+except ImportError:
+	sys.stderr.write("************************************************************************************************************ \n")
+	sys.stderr.write("* Error:   biom environment required to process biom files - Not found - run abnormally terminated         * \n")
+	sys.stderr.write("* See http://http://biom-format.org/                                                                       * \n")
+	sys.stderr.write("************************************************************************************************************ \n")
+	exit(1)
+
+from biom.parse import *
+from biom.table import *
+
+c_dTarget	= 1.0
+c_fRound	= False
+c_iSumAllCladeLevels = -1
+c_fOutputLeavesOnly = False
+
+class RowMetadata:
+	"""
+	Holds the row (feature) metadata and associated functions.
+	"""
+
+	def __init__(self, dictRowMetadata, iLongestMetadataEntry=None, lsRowMetadataIDs=None):
+		""" Constructor requires a dictionary or row metadata.
+		:param dictRowMetadata:	The row metadata values with the ids as the keys, must be stable (keep order)
+		:type:			{string feature id: {'metadata': {'taxonomy': [list of metadata values]}}}
+		"""
+
+		self.dictRowMetadata = dictRowMetadata
+		self.iLongestMetadataEntry = iLongestMetadataEntry
+		self.lsRowMetadataIDs = lsRowMetadataIDs
+
+		self.dictMetadataIDs = {}
+		# Get the ids for the metadata
+		if self.dictRowMetadata:
+			for dictMetadata in self.dictRowMetadata.values():
+				dictMetadata = dictMetadata.get(ConstantsBreadCrumbs.c_metadata_lowercase, None)
+
+				if dictMetadata:
+					for key,value in dictMetadata.items():
+						if self.dictMetadataIDs.get(key, None):
+							self.dictMetadataIDs[key] = max(self.dictMetadataIDs[key],len(dictMetadata[key]))
+						else:
+							self.dictMetadataIDs[key] = len(dictMetadata[key])
+
+	def funcMakeIDs(self):
+		""" There should be a one to one mapping of row metadata ids and the values associated here with a feature ID.
+		    If not make ids from the key by appending numbers.
+		"""
+
+		# If there exists a ids list already return (this allows ID order to be given and preserved)
+		# Else make a list of IDs
+		if self.lsRowMetadataIDs:
+			return self.lsRowMetadataIDs
+
+		lsIDs = []
+		lsKeys = []
+
+		for key, value in self.dictMetadataIDs.items():
+			lsKeys.append( key )
+			if value > 1:
+				lsIDs.extend( [ "_".join( [ key, str( iIndex ) ] ) for iIndex in xrange( value ) ] )
+			else:
+				lsIDs.append( key )
+		return [ lsIDs, lsKeys ]
+
+	def funGetFeatureMetadata(self, sFeature, sMetadata):
+		"""
+		Returns a list of values in the order of row metadta ids for a microbial feature given an id.
+
+		:param sFeature:	Feature id to get metadata
+		:type:			string
+		:param sMetadata:	Metadata to get
+		:type:			string
+		:return:		list of metadata associated with the metadata
+		"""
+		lsMetadata = []
+		if self.dictRowMetadata:
+			dictFeature = self.dictRowMetadata.get( sFeature, None )
+			if dictFeature:
+				dictFeatureMetadata = dictFeature.get(ConstantsBreadCrumbs.c_metadata_lowercase, None)
+				if dictFeatureMetadata:
+					lsMetadata = dictFeatureMetadata.get(sMetadata, None)
+		return lsMetadata
+
+
+class AbundanceTable:
+	"""
+	Represents an abundance table and contains common function to perform on the object.
+
+	This class is made from an abundance data file. What is expected is a text file delimited by
+	a character (which is given to the object). The first column is expected to be the id column
+	for each of the rows. Metadata is expected before measurement data. Columns are samples and
+	rows are features (bugs).
+
+	This object is currently not hashable.
+	"""
+
+	def __init__(self, npaAbundance, dictMetadata, strName, strLastMetadata, rwmtRowMetadata = None, dictFileMetadata = None, lOccurenceFilter = None, cFileDelimiter = ConstantsBreadCrumbs.c_cTab, cFeatureNameDelimiter = "|"):
+		"""
+		Constructor for an abundance table.
+
+		:param	npaAbundance:	Structured Array of abundance data (Row=Features, Columns=Samples)
+		:type:	Numpy Structured Array abundance data (Row=Features, Columns=Samples)
+		:param	dictMetadata:	Dictionary of metadata {"String ID":["strValue","strValue","strValue","strValue","strValue"]}
+		:type:	Dictionary	Dictionary
+		:param	npaRowMetdata	Structured Array of row (feature) metadata (optional)
+		:type:	Numpy Structured Array abundance data (Row=Features, Columns=Feature metadata)
+	 	:param	strName:	The name of the metadata that serves as the ID for the columns (For example a sample ID)
+		:type:	string
+		:param	strLastMetadata: The string last metadata name
+      		:type:	string
+		:param	lOccurenceFilter: List of integers used in an occurence filter. [Min abundance, Min sample]
+		:type:	List of integers
+		:param	cFileDelimiter:	Character used as the delimiter of the file that is read in to create the abundance table.
+								Will also be used to write the abudance table file to a file to keep file consistency.
+		:type:	Character delimiter for reading the data in (default = TAB)
+		:param	cFeatureNameDelimiter:	Character used as the delimiter of the feature names (column 1). This is useful if the name are complex, for instance consensus lineages in metagenomics.
+		:type:	Character delimiter for feature names (default = |)
+		"""
+
+		### File Metadata
+
+		#Date
+		self.dateCreationDate = dictFileMetadata.get(ConstantsBreadCrumbs.c_strDateKey,None) if dictFileMetadata else None
+
+		#Indicates if the table has been filtered and how
+		self._strCurrentFilterState = ""
+
+		#The delimiter from the source file
+		self._cDelimiter = cFileDelimiter
+
+		#The feature name delimiter
+		self._cFeatureDelimiter = cFeatureNameDelimiter
+
+		#File type
+		self.strFileFormatType = dictFileMetadata.get(ConstantsBreadCrumbs.c_strFormatKey,None) if dictFileMetadata else None
+
+		#File generation source
+		self.strFileGenerationSource = dictFileMetadata.get(ConstantsBreadCrumbs.c_strSourceKey,None) if dictFileMetadata else None
+
+		#File type
+		self.strFileType = dictFileMetadata.get(ConstantsBreadCrumbs.c_strTypekey,None) if dictFileMetadata else None
+
+		#File url
+		self.strFileURL = dictFileMetadata.get(ConstantsBreadCrumbs.c_strURLKey,None) if dictFileMetadata else None
+
+		#The id of the file
+		self.strId = dictFileMetadata.get(ConstantsBreadCrumbs.c_strIDKey,None) if dictFileMetadata else None
+
+		#The lastmetadata name (which should be preserved when writing the file)
+		# Can be a None if biom file is read in.
+		self._strLastMetadataName = strLastMetadata
+
+		#The original number of features in the table
+		self._iOriginalFeatureCount = -1
+
+		#The name of the object relating to the file it was read from or would have been read from if it exists
+		#Keeps tract of changes to the file through the name
+		#Will be used to write out the object to a file as needed
+		self._strOriginalName = strName
+
+		#The original number of samples in the table
+		self._iOriginalSampleCount = -1
+
+		#Data sparsity type
+		self.fSparseMatrix = dictFileMetadata.get(ConstantsBreadCrumbs.c_strSparsityKey,False) if dictFileMetadata else False
+
+		### Data metadata
+		#The column (sample) metdata
+		self._dictTableMetadata = dictMetadata
+
+		#The row (feature) metadata (Row Metadata object)
+		self.rwmtRowMetadata = rwmtRowMetadata
+
+		### Data
+
+		#The abundance data
+		self._npaFeatureAbundance = npaAbundance
+
+
+		### Logistical
+
+		#Clade prefixes for biological samples
+		self._lsCladePrefixes = ["k__","p__","c__","o__","f__","g__","s__"]
+
+		#This is not a hashable object
+		self.__hash__ = None
+
+
+		### Prep the object
+
+		self._fIsNormalized = self._fIsSummed = None
+		#If contents is not a false then set contents to appropriate objects
+		# Checking to see if the data is normalized, summed and if we need to run a filter on it.
+		if ( self._npaFeatureAbundance != None ) and self._dictTableMetadata:
+			self._iOriginalFeatureCount = self._npaFeatureAbundance.shape[0]
+			self._iOriginalSampleCount = len(self.funcGetSampleNames())
+		
+			self._fIsNormalized = ( max( [max( list(a)[1:] or [0] ) for a in self._npaFeatureAbundance] or [0] ) <= 1 )
+
+			lsLeaves = AbundanceTable.funcGetTerminalNodesFromList( [a[0] for a in self._npaFeatureAbundance], self._cFeatureDelimiter )
+			self._fIsSummed = ( len( lsLeaves ) != len( self._npaFeatureAbundance ) )
+
+			#Occurence filtering
+			#Removes features that do not have a given level iLowestAbundance in a given amount of samples iLowestSampleOccurence
+			if ( not self._fIsNormalized ) and lOccurenceFilter:
+				iLowestAbundance, iLowestSampleOccurrence = lOccurenceFilter
+				self.funcFilterAbundanceBySequenceOccurence( iLowestAbundance, iLowestSampleOccurrence )
+#	  else:
+#		sys.stderr.write( "Abundance or metadata was None, should be atleast an empty object\n" )
+
+	@staticmethod
+	def funcMakeFromFile(xInputFile, cDelimiter = ConstantsBreadCrumbs.c_cTab, sMetadataID = None, sLastMetadataRow = None, sLastMetadata = None,
+	   lOccurenceFilter = None, cFeatureNameDelimiter="|", xOutputFile = None, strFormat = None):
+		"""
+		Creates an abundance table from a table file.
+
+		:param	xInputFile:	Path to input file.
+		:type:	String		String path.
+		:param	cDelimiter:	Delimiter for parsing the input file.
+		:type:	Character	Character
+		:param	sMetadataID:	String ID that is a metadata row ID (found on the first column) and used as an ID for samples
+		:type:	String		String ID
+		:param sLastRowMetadata: The id of the last (most right column) row metadata
+		:type: String	String ID
+		:param	sLastMetadata:	The ID of the metadata that is the last metadata before measurement or feature rows.
+		:type:	String		String ID
+		:param	lOccurenceFilter: List of integers used in an occurence filter. [Min abundance, Min sample]
+		:type:	List of integers
+		:param	cFeatureNameDelimiter:	Used to parse feature (bug) names if they are complex.
+						For example if they are consensus lineages and contain parent clade information.
+		:type:	Character	Delimiting letter
+		:param	xOutputFile:	File to output the abundance table which was read in.
+		:type:	FileStream or String file path
+		:return	AbundanceTable:	Will return an AbundanceTable object on no error. Returns False on error.
+		"""
+		
+		#Get output file and remove if existing
+		outputFile = open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile
+		
+		#################################################################################
+		#    Check if file is a biom file - if so invoke the biom routine               #
+		#################################################################################
+		strFileName = xInputFile if isinstance(xInputFile, str) else xInputFile.name
+
+                # Determine the file read function by file extension
+		if strFileName.endswith(ConstantsBreadCrumbs.c_strBiomFile) or (strFormat == ConstantsBreadCrumbs.c_strBiomFile):
+			BiomCommonArea = AbundanceTable._funcBiomToStructuredArray(xInputFile)
+			if  BiomCommonArea:
+				lContents = [BiomCommonArea[ConstantsBreadCrumbs.c_BiomTaxData],
+					BiomCommonArea[ConstantsBreadCrumbs.c_Metadata],
+					BiomCommonArea[ ConstantsBreadCrumbs.c_dRowsMetadata],
+					BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo]
+					]
+
+				# Update last metadata and id if given
+				if not sLastMetadata: 
+					strLastMetadata = BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata]
+			else:
+				# return false on failure
+				lContents = False
+		elif( strFileName.endswith(ConstantsBreadCrumbs.c_strPCLFile) or (strFormat == ConstantsBreadCrumbs.c_strPCLFile) ):	
+			#Read in from text file to create the abundance and metadata structures
+			lContents = AbundanceTable._funcTextToStructuredArray(xInputFile=xInputFile, cDelimiter=cDelimiter,
+				sMetadataID = sMetadataID, sLastMetadataRow = sLastMetadataRow, sLastMetadata = sLastMetadata, ostmOutputFile = outputFile)
+                else:
+                  print("I do not understand the format to read and write the data as, please use the correct file extension or indicate a type.")
+                  return( false )
+
+		#If contents is not a false then set contents to appropriate objects
+		return AbundanceTable(npaAbundance=lContents[0], dictMetadata=lContents[1], strName=str(xInputFile), strLastMetadata=sLastMetadata, rwmtRowMetadata = lContents[2],
+		dictFileMetadata = lContents[3], lOccurenceFilter = lOccurenceFilter, cFileDelimiter=cDelimiter, cFeatureNameDelimiter=cFeatureNameDelimiter) if lContents else False
+
+	#Testing Status: Light happy path testing
+	@staticmethod
+	def funcCheckRawDataFile(strReadDataFileName, iFirstDataIndex = -1, sLastMetadataName = None, lOccurenceFilter = None, strOutputFileName = "", cDelimiter = ConstantsBreadCrumbs.c_cTab):
+		"""
+		Check the input abundance table.
+		Currently reduces the features that have no occurence.
+		Also inserts a NA for blank metadata and a 0 for blank abundance data.
+		Gives the option to filter features through an occurence filter (a feature must have a level of abundance in a minimal number of samples to be included).
+		Either iFirstDataIndex or sLastMetadataName must be given
+
+		:param	strReadDataFileName:	File path of file to read and check.
+		:type:	String	File path.
+		:param	iFirstDataIndex:	First (row) index of data not metadata in the abundance file.
+		:type:	Integer	Index starting at 0.
+		:param	sLastMetadataName:	The ID of the last metadata in the file. Rows of measurements should follow this metadata.
+		:type:	String
+		:param	lOccurenceFilter:	The lowest number of occurences in the lowest number of samples needed for a feature to be kept
+		:type:	List[2]	List length 2 [lowest abundance (not normalized), lowest number of samples to occur in] (eg. [2.0,2.0])
+		:param	strOutputFileName:	File path of out put file.
+		:type:	String	File path.
+		:param	cDelimiter:	Character delimiter for reading and writing files.
+		:type:	Character	Delimiter.
+		:return	Output Path:	Output path for written checked file.
+		"""
+
+		#Validate parameters
+		if (iFirstDataIndex == -1) and (sLastMetadataName == None):
+			print "AbundanceTable:checkRawDataFile::Error, either iFirstDataIndex or sLastMetadataNamemust be given."
+			return False
+
+		#Get output file and remove if existing
+		outputFile = strOutputFileName
+		if not strOutputFileName:
+			outputFile = os.path.splitext(strReadDataFileName)[0]+ConstantsBreadCrumbs.OUTPUT_SUFFIX
+
+		#Read input file lines
+		#Drop blank lines
+		readData = ""
+		with open(strReadDataFileName,'rU') as f:
+			readData = f.read()
+		readData = filter(None,readData.split(ConstantsBreadCrumbs.c_strEndline))
+
+		#Read the length of each line and make sure there is no jagged data
+		#Also hold row count for the metadata
+		iLongestLength = len(readData[0].split(cDelimiter))
+		iMetadataRow = -1
+		if not sLastMetadataName:
+			sLastMetadataName = "None"
+		for iIndex, strLine in enumerate(readData):
+			sLineElements = strLine.split(cDelimiter)
+		if sLineElements[0] == sLastMetadataName:
+			iMetadataRow = iIndex
+		iLongestLength = max(iLongestLength, len(sLineElements))
+
+		#If not already set, set iFirstDataIndex
+		if iFirstDataIndex < 0:
+			iFirstDataIndex = iMetadataRow + 1
+
+		#Used to substitute . to -
+		reSubPeriod = re.compile('\.')
+
+		#File writer
+		with open(outputFile,'w') as f:
+
+			#Write metadata
+			#Empty data is changed to a default
+			#Jagged ends are filled with a default
+			for strDataLine in readData[:iFirstDataIndex]:
+				lsLineElements = strDataLine.split(cDelimiter)
+				for iindex, sElement in enumerate(lsLineElements):
+					if not sElement.strip():
+						lsLineElements[iindex] = ConstantsBreadCrumbs.c_strEmptyDataMetadata
+				if len(lsLineElements) < iLongestLength:
+					lsLineElements = lsLineElements + ([ConstantsBreadCrumbs.c_strEmptyDataMetadata]*(iLongestLength-len(lsLineElements)))
+				f.write(cDelimiter.join(lsLineElements)+ConstantsBreadCrumbs.c_strEndline)
+
+			#For each data line in the table
+			for line in readData[iFirstDataIndex:]:
+				writeToFile = False
+				cleanLine = list()
+				#Break line into delimited elements
+				lineElements = line.split(cDelimiter)
+
+				#Clean feature name
+				sCleanFeatureName = reSubPeriod.sub("-",lineElements[0])
+
+				#For each element but the first (taxa name)
+				#Element check to see if not == zero
+				#If so add to output
+				for element in lineElements[1:]:
+					if(element.strip() in string.whitespace):
+						cleanLine.append(ConstantsBreadCrumbs.c_strEmptyAbundanceData)
+					#Set abundance of 0 but do not indicate the line should be saved
+					elif(element == "0"):
+						cleanLine.append(element)
+					#If an abundance is found set the line to be saved.
+					else:
+						cleanLine.append(element)
+						writeToFile = True
+
+				#Occurence filtering
+				#Removes features that do not have a given level iLowestAbundance in a given amount of samples iLowestSampleOccurence
+				if lOccurenceFilter:
+					iLowestAbundance, iLowestSampleOccurence = lOccurenceFilter
+					if iLowestSampleOccurence > sum([1 if float(sEntry) >= iLowestAbundance else 0 for sEntry in cleanLine]):
+						writeToFile = False
+
+				#Write to file
+				if writeToFile:    
+					f.write(sCleanFeatureName+cDelimiter+cDelimiter.join(cleanLine)+ConstantsBreadCrumbs.c_strEndline)
+		return outputFile
+
+	def __repr__(self):
+		"""
+		Represent or print object.
+		"""
+		return "AbundanceTable"
+
+	def __str__(self):
+	  """
+	  Create a string representation of the Abundance Table.
+	  """
+
+	  return "".join(["Sample count:", str(len(self._npaFeatureAbundance.dtype.names[1:])),
+	  os.linesep+"Feature count:", str(len(self._npaFeatureAbundance[self._npaFeatureAbundance.dtype.names[0]])),
+	  os.linesep+"Id Metadata:", self._npaFeatureAbundance.dtype.names[0],
+	  os.linesep+"Metadata ids:", str(self._dictTableMetadata.keys()),
+	  os.linesep+"Metadata count:", str(len(self._dictTableMetadata.keys())),
+	  os.linesep+"Originating source:",self._strOriginalName,
+	  os.linesep+"Original feature count:", str(self._iOriginalFeatureCount),
+	  os.linesep+"Original sample count:", str(self._iOriginalSampleCount),
+	  os.linesep+"Is normalized:", str(self._fIsNormalized),
+	  os.linesep+"Is summed:", str(self._fIsSummed),
+	  os.linesep+"Current filtering state:", str(self._strCurrentFilterState),
+	  os.linesep+"Feature delimiter:", self._cFeatureDelimiter,
+	  os.linesep+"File delimiter:",self._cDelimiter])
+
+	def __eq__(self, objOther):
+		"""
+		Check if an object is equivalent in data to this object
+		Check to make sure that objOther is not None
+		Check to make sure objOther is the correct class type
+		Check to make sure self and other internal data are the same (exclusing file name)
+		Check data and make sure the npa arrays are the same
+		Check the metdata to make sure the dicts are the same 
+		(will need to sort the keys of the dicts before comparing, they do not guarentee any order.
+		"""
+                # Check for none
+		if objOther is None:
+			return False
+
+                #Check for object type
+		if isinstance(objOther,AbundanceTable) != True:
+			return False
+		
+		#Check feature delimiter
+		if self.funcGetFeatureDelimiter() != objOther.funcGetFeatureDelimiter():
+			return False
+
+		#Check file delimiter
+		if self.funcGetFileDelimiter() != objOther.funcGetFileDelimiter():
+			return False
+
+			
+			
+			
+		#************************************************** 
+		#* Commented out                                  *  
+		#************************************************** 			
+        #Check name  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same
+                #Check name
+		#if self.funcGetName() != objOther.funcGetName():
+			#return  False
+		
+
+	        #Check sample metadata
+		#Go through the metadata
+		result1 = self.funcGetMetadataCopy()
+		result2 = objOther.funcGetMetadataCopy()
+		if sorted(result1.keys()) != sorted(result2.keys()):
+			return False
+		for strKey in result1.keys():
+			if strKey not in result2:
+				return False
+			if result1[strKey] != result2[strKey]:
+				return False
+
+		#TODO check the row (feature) metadata
+
+		#TODO check the file metadata
+		#Check the ID
+		if self.funcGetFileDelimiter() != objOther.funcGetFileDelimiter():
+			return False
+
+		#Check the date
+		if self.dateCreationDate != objOther.dateCreationDate:
+			return False
+
+		#Check the format
+		if self.strFileFormatType != objOther.strFileFormatType:
+			return False
+
+			
+
+		#************************************************** 
+		#* Commented out                                  *  
+		#************************************************** 			
+		#Check source  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same			
+		#Check the source
+		#if self.strFileGenerationSource != objOther.strFileGenerationSource:
+			#return False
+
+		#Check the type
+		if self.strFileType != objOther.strFileType:
+			return False
+
+		#Check the URL
+		if self.strFileURL != objOther.strFileURL:
+			return False
+
+		#Check data
+		#TODO go through the data
+		#TODO also check the data type
+		result1 = self.funcGetAbundanceCopy()
+		result2 = objOther.funcGetAbundanceCopy()	
+		if len(result1) != len(result2):
+			return False
+
+		sorted_result1 = sorted(result1, key=lambda tup: tup[0])
+		sorted_result2 = sorted(result2, key=lambda tup: tup[0])		
+			
+		if sorted_result1 != sorted_result2 :
+			return  False
+				
+
+		#************************************************** 
+		#* Commented out                                  *  
+		#************************************************** 			
+		#Check AbundanceTable.__str__(self)  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same						
+				
+		#Check string representation
+		#if AbundanceTable.__str__(self) !=  AbundanceTable.__str__(objOther):
+				#return False
+					
+		#Check if sample ids are the same and in the same order
+		if self.funcGetSampleNames() != objOther.funcGetSampleNames():
+			return  False
+
+		return  True
+	
+
+	def __ne__(self, objOther):
+		return not self == objOther
+
+	  
+	#Testing Status: Light happy path testing
+        #TODO: Tim change static to class methods
+	@staticmethod
+	def _funcTextToStructuredArray(xInputFile = None, cDelimiter = ConstantsBreadCrumbs.c_cTab, sMetadataID = None, sLastMetadataRow = None, sLastMetadata = None, ostmOutputFile = None):
+
+		"""
+		Private method
+		Used to read in a file that is samples (column) and taxa (rows) into a structured array.
+
+		:param	xInputFile:	File stream or path to input file.
+		:type:	String		File stream or string path.
+		:param	cDelimiter:	Delimiter for parsing the input file.
+		:type:	Character	Character.
+		:param	sMetadataID:	String ID that is a metadata row ID (found on the first column) and used as an ID for samples. 
+					If not given it is assumed to be position 0
+		:type: String		String ID
+		:param sLastMetadataRow: String ID that is the last row metadat id (id of the most right column with row/feature metadata)
+		:type:	String		String ID
+		:param	sLastMetadata:	The ID of the metadata that is the last metadata before measurement or feature rows.
+		:type:	String		String ID
+		:param	ostmOutputFile:	Output File to write to if needed. None does not write the file.
+		:type:	FileStream or String
+		:return	[taxData,metadata,rowmetadata]: Numpy Structured Array of abundance data and dictionary of metadata.
+ 										Metadata is a dictionary as such {"ID", [value,value,values...]}
+ 										Values are in the order thety are read in (and the order of the sample names).
+ 										ID is the first column in each metadata row.
+-										rowmetadata is a optional Numpy strucured array (can be None if not made)
++										rowmetadata is a optional RowMetadata object (can be None if not made)
+ 										The rowmetadata and taxData row Ids should match
+-										[Numpy structured Array, Dictionary, Numpy structured array]
++										The last dict is a collection of BIOM fielparameters when converting from a BIOM file
++										[Numpy structured Array, Dictionary, Numpy structured array, dict]
+		"""
+
+		# Open file from a stream or file path
+		istmInput = open( xInputFile, 'rU' ) if isinstance(xInputFile, str) else xInputFile
+		# Flag that when incremented will switch from metadata parsing to data parsing
+		iFirstDataRow = -1
+		# Sample id row
+		namesRow = None
+		# Row metadata names
+		lsRowMetadataIDs = None
+		# Index of the last row metadata
+		iIndexLastMetadataRow = None
+		# Holds metadata {ID:[list of values]}
+		metadata = dict()
+		# Holds the data measurements [(tuple fo values)]
+		dataMatrix = []
+		# Holds row metadata { sID : [ list of values ] }
+		dictRowMetadata = {}
+		# Positional index
+		iIndex = -1
+		# File handle
+		csvw = None
+
+		# Read in files
+		if ostmOutputFile:
+			csvw = csv.writer( open(ostmOutputFile,'w') if isinstance(ostmOutputFile, str) else ostmOutputFile, csv.excel_tab, delimiter = cDelimiter )
+		# For each line in the file, and assume the tax id is the first element and the data follows
+		for lsLineElements in csv.reader( istmInput, dialect = csv.excel_tab, delimiter = cDelimiter ):
+			iIndex += 1
+			taxId, sampleReads = lsLineElements[0], lsLineElements[1:]
+
+			# Read through data measurements
+			# Process them as a list of tuples (needed for structured array)
+			if iFirstDataRow > 0:
+				try:
+					# Parse the sample reads, removing row metadata and storing row metadata if it exists
+					if lsRowMetadataIDs:
+						# Build expected dict for row metadata dictionary {string feature id: {'metadata': {metadatakey: [list of metadata values]}}}
+						dictFeature = dict([ [sID, [sKey]] for sID, sKey in zip( lsRowMetadataIDs, sampleReads[ 0 : iIndexLastMetadataRow ]) ])
+						if len( dictFeature ):
+							dictRowMetadata[ taxId ] = { ConstantsBreadCrumbs.c_metadata_lowercase: dictFeature }
+						dataMatrix.append(tuple([taxId] + [( float(s) if s.strip( ) else 0 ) for s in sampleReads[ iIndexLastMetadataRow: ]]))
+					else:
+						dataMatrix.append(tuple([taxId] + [( float(s) if s.strip( ) else 0 ) for s in sampleReads]))
+				except ValueError:
+					sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, non-numerical value on data row. File:" + str(xInputFile) +
+						" Row:" + str(lsLineElements) + "\n" )
+					return False
+			# Go through study measurements
+			else:
+				# Read in metadata values, if the entry is blank then give it the default empty metadata value.
+				for i, s in enumerate( sampleReads ):
+					if not s.strip( ):
+						sampleReads[i] = ConstantsBreadCrumbs.c_strEmptyDataMetadata
+
+				# If no id metadata (sample ids) is given then the first row is assumed to be the id row, otherwise look for the id for the metadata.
+				# Add the metadata to the containing dict
+				if ( ( not sMetadataID ) and ( iIndex == 0 ) ) or ( taxId == sMetadataID ):
+					namesRow = lsLineElements
+					# Remove the row metadata ids, these names are for the column ID and the samples ids
+					if sLastMetadataRow:
+						iIndexLastMetadataRow = lsLineElements.index(sLastMetadataRow)
+						lsRowMetadataIDs = namesRow[ 1 : iIndexLastMetadataRow + 1 ]
+						namesRow = [ namesRow[ 0 ] ] + namesRow[ iIndexLastMetadataRow + 1: ]
+
+						# If the sample metadata dictionary already has entries then remove the row metadata info from it.
+						if len( metadata ) and len( lsRowMetadataIDs ):
+							for sKey, lsValues in metadata.items():
+								metadata[ sKey ] = lsValues[ iIndexLastMetadataRow: ]
+
+				# Set the metadata without row metadata entries
+				metadata[taxId] = sampleReads[ iIndexLastMetadataRow: ] if (lsRowMetadataIDs and len( lsRowMetadataIDs )) else sampleReads
+
+				# If the last metadata was just processed switch to data processing
+				# If the last metadata name is not given it is assumed that there is only one metadata
+				if ( not sLastMetadata ) or ( taxId == sLastMetadata ):
+					iFirstDataRow = iIndex + 1
+
+			# If writing out the data write back out the line read in.
+			# This happens at the end so that the above cleaning is captured and written.
+			if csvw:
+				csvw.writerow( [taxId] + sampleReads )
+
+		if sLastMetadata and ( not dataMatrix ):
+			sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, did not find the row for the last metadata ID. File:" + str(xInputFile) +
+				" Identifier:" + sLastMetadata + "\n" )
+			return False
+
+		# Make sure the names are found
+		if namesRow == None:
+			sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, did not find the row for the unique sample/column. File:" + str(xInputFile) +
+				" Identifier:" + sMetadataID + "\n" )
+			return False
+
+		# Now we know the longest taxId we can define the first column holding the tax id
+		# Gross requirement of Numpy structured arrays, a = ASCII followed by max # of characters (as a string)
+		longestTaxId = max( len(a[0]) for a in dataMatrix )
+		dataTypeVector = [(namesRow[0],'a' + str(longestTaxId*2))] + [(s, "f4") for s in namesRow[1:]]
+		# Create structured array
+		taxData = np.array(dataMatrix,dtype=np.dtype(dataTypeVector))
+
+		# Returns a none currently because the PCL file specification this originally worked on did not have feature metadata
+ 		# Can be updated in the future.
+		# [Data (structured array), column metadata (dict), row metadata (structured array), file metadata (dict)]
+		return [taxData, metadata, RowMetadata(dictRowMetadata = dictRowMetadata, lsRowMetadataIDs = lsRowMetadataIDs), {
+                    ConstantsBreadCrumbs.c_strIDKey:ConstantsBreadCrumbs.c_strDefaultPCLID,
+                    ConstantsBreadCrumbs.c_strDateKey:str(date.today()),
+                    ConstantsBreadCrumbs.c_strFormatKey:ConstantsBreadCrumbs.c_strDefaultPCLFileFormateType,
+                    ConstantsBreadCrumbs.c_strSourceKey:ConstantsBreadCrumbs.c_strDefaultPCLGenerationSource,
+                    ConstantsBreadCrumbs.c_strTypekey:ConstantsBreadCrumbs.c_strDefaultPCLFileTpe,
+                    ConstantsBreadCrumbs.c_strURLKey:ConstantsBreadCrumbs.c_strDefaultPCLURL,
+                    ConstantsBreadCrumbs.c_strSparsityKey:ConstantsBreadCrumbs. c_fDefaultPCLSparsity}]
+
+#	def funcAdd(self,abndTwo,strFileName=None):
+#		"""
+#		Allows one to add an abundance table to an abundance table. They both must be the same state of normalization or summation
+#		or they will be summed or normalized if one of the two are.
+#
+#		:param	abndTwo:	AbundanceTable object 2
+#		:type:	AbundanceTable
+#		:return	AbudanceTable:
+#		"""
+#
+#		#Check summation and normalization
+#		if(self.funcIsSummed() or abndTwo.funcIsSummed()):
+#			self.funcSum()
+#			abndTwo.funcSum()
+#		if(self.funcIsNormalized() or abndTwo.funcIsNormalized()):
+#			self.funcNormalize()
+#			abndTwo.funcNormalize()
+#
+#		#Normalize Feature names
+#    		#Get if the abundance tables have clades
+#    		fAbndInputHasClades = self.funcHasFeatureHierarchy()
+#    		fAbndCompareHasClades = abndTwo.funcHasFeatureHierarchy()
+#
+#    		if(fAbndInputHasClades or fAbndCompareHasClades):
+#			#If feature delimiters do not match, switch
+#			if not self.funcGetFeatureDelimiter() == abndTwo.funcGetFeatureDelimiter():
+#				abndTwo.funcSetFeatureDelimiter(self.funcGetFeatureDelimiter())
+#
+#			#Add prefixes if needed.
+#            		self.funcAddCladePrefixToFeatures()
+#        		abndTwo.funcAddCladePrefixToFeatures()
+#
+#		#Get feature Names
+#		lsFeatures1 = self.funcGetFeatureNames()
+#		lsFeatures2 = abndTwo.funcGetFeatureNames()
+#
+#		#Make one feature name list
+#		lsFeaturesCombined = list(set(lsFeatures1+lsFeature2))
+#
+#		#Add samples by features (Use 0.0 for empty data features, use NA for empty metadata features)
+#		
+#
+#		#Combine metadata
+#		dictMetadata1 = self.funcGetMetadataCopy()
+#		dictMetadata2 = abndTwo.funcGetMetadataCopy()
+#
+#		#Get first table metadata and add NA for metadata it is missing for the length of the current metadata
+#		lsMetadataOnlyInTwo = list(set(dictMetadata2.keys())-set(dictMetadata1.keys()))
+#		dictCombinedMetadata = dictMetadata1
+#		lsEmptyMetadata = ["NA" for i in xrange(self.funcGetSampleCount())]
+#		for sKey in lsMetadataOnlyInTwo:
+#			dictCombinedMetadata[sKey]=lsEmptyMetadata
+#		#Add in the other metadata dictionary
+#		lsCombinedKeys = dictCombinedMetadata.keys()
+#		lsEmptyMetadata = ["NA" for i in xrange(abndTwo.funcGetSampleCount())]
+#		for sKey in lsCombinedKeys():
+#			if sKey in dictMetadata2:
+#				dictCombinedMetadata[sKey] = dictCombinedMetadata[sKey]+dictMetadata2[sKey]
+#			else:
+#				dictCombinedMetadata[sKey] = dictCombinedMetadata[sKey]+lsEmptyMetadata
+#
+#		#Make Abundance table
+#		return AbundanceTable(npaAbundance=npaAbundance,
+#				dictMetadata = dictCombinedMetadata,
+#				strName = strFileName if strFileName else os.path.splitext(self)[0]+"_combined_"+os.path.splitext(abndTwo)[0],
+#				strLastMetadata = self.funcGetLastMetadataName(),
+#				cFileDelimiter = self.funcGetFileDelimiter(), cFeatureNameDelimiter=self.funcGetFeatureDelimiter())
+
+	#TODO This does not adjust for sample ordering, needs to
+	def funcAddDataFeature(self, lsNames, npdData):
+		"""
+		Adds a data or group of data to the underlying table.
+		Names should be in the order of the data
+		Each row is considered a feature (not sample).
+
+		:param lsNames:	Names of the features being added to the data of the table
+		:type: List	List of string names
+		:param npdData: Rows of features to add to the table
+		:type:	Numpy array accessed by row.
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return False
+
+		# Check number of input data rows
+		iDataRows = npdData.shape[0]
+		if (len(lsNames) != iDataRows):
+			print "Error:The names and the rows of data features to add must be of equal length"
+
+		# Grow the array by the neccessary amount and add the new rows
+		iTableRowCount = self.funcGetFeatureCount()
+		iRowElementCount = self.funcGetSampleCount()
+		self._npaFeatureAbundance.resize(iTableRowCount+iDataRows)
+		for iIndexData in xrange(iDataRows):
+			self._npaFeatureAbundance[iTableRowCount+iIndexData] = tuple([lsNames[iIndexData]]+list(npdData[iIndexData]))
+
+		return True
+
+	#TODO This does not adjust for sample ordering, needs to
+	def funcAddMetadataFeature(self,lsNames,llsMetadata):
+		"""
+		Adds metadata feature to the underlying table.
+		Names should be in the order of the lists of metadata
+		Each internal list is considered a metadata and paired to a name
+		"""
+		if ( self._dictTableMetadata == None ):
+			return False
+
+		# Check number of input data rows
+		iMetadataCount = len(llsMetadata)
+		if (len(lsNames) != iMetadataCount):
+			print "Error:The names and the rows of metadata features to add must be of equal length"
+
+		# Add the metadata
+		for tpleMetadata in zip(lsNames,llsMetadata):
+			self._dictTableMetadata[tpleMetadata[0]]=tpleMetadata[1]
+		return True
+
+	#2 test Cases
+	def funcSetFeatureDelimiter(self, cDelimiter):
+		"""
+		Changes the feature delimiter to the one provided.
+		Updates the feature names.
+
+		:param	cDelimiter:	Character feature delimiter
+		:type:	Character
+		:return	Boolean:	Indicator of success or not (false)
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return False
+		cDelimiterCurrent = self.funcGetFeatureDelimiter()
+		if ( not cDelimiter or not cDelimiterCurrent):
+			return False
+
+		#Make new feature names
+		lsNewFeatureNames = [sFeatureName.replace(cDelimiterCurrent,cDelimiter) for sFeatureName in self.funcGetFeatureNames()]
+		
+		#Update new feature names to abundance table
+		if (not self.funcGetIDMetadataName() == None):
+			self._npaFeatureAbundance[self.funcGetIDMetadataName()] = np.array(lsNewFeatureNames)
+
+		#Update delimiter
+		self._cFeatureDelimiter = cDelimiter
+		return True
+
+	#Happy path tested
+	def funcGetSampleNames(self):
+		"""
+		Returns the sample names (IDs) contained in the abundance table.
+
+		:return	Sample Name:	A List of sample names indicated by the metadata associated with the sMetadataId given in table creation.
+								A list of string names or empty list on error as well as no underlying table.
+		"""
+
+		return self._npaFeatureAbundance.dtype.names[1:] if ( self._npaFeatureAbundance != None ) else []
+
+	#Happy Path Tested
+	def funcGetIDMetadataName(self):
+		"""
+		Returns the metadata id.
+
+		:return	ID:	The metadata id (the sample Id).
+					  Returns none on error.
+		"""
+
+		return self._npaFeatureAbundance.dtype.names[0] if ( self._npaFeatureAbundance != None ) else None
+
+	#Happy path tested
+	def funcGetAbundanceCopy(self):
+		"""
+		Returns a deep copy of the abundance table.
+
+		:return	Numpy Structured Array:	The measurement data in the Abundance table. Can use sample names to access each column of measurements.
+									   Returns none on error.
+		"""
+
+		return self._npaFeatureAbundance.copy() if ( self._npaFeatureAbundance != None ) else None
+
+	#Happy path tested
+	def funcGetAverageAbundancePerSample(self, lsTargetedFeatures):
+		"""
+		Averages feature abundance within a sample.
+	
+		:param	lsTargetedFeatures:	String names of features to average
+		:type:	List of string names of features which are measured
+		:return	List: List of lists or boolean (False on error). One internal list per sample indicating the sample and the feature's average abudance
+			[[sample,average abundance of selected taxa]] or False on error
+		"""
+
+		#Sample rank averages [[sample,average abundance of selected taxa]]
+		sampleAbundanceAverages = []
+		
+		sampleNames = self.funcGetSampleNames()
+		allTaxaNames = self.funcGetFeatureNames()
+		#Get an abundance table compressed to features of interest
+		abndReducedTable = self.funcGetFeatureAbundanceTable(lsTargetedFeatures)
+		if abndReducedTable == None:
+			return False  
+
+		#If the taxa to be selected are not in the list, Return nothing and log
+		lsMissing = []
+		for sFeature in lsTargetedFeatures:
+			if not sFeature in allTaxaNames:
+				lsMissing.append(sFeature)
+			else:
+				#Check to make sure the taxa of interest is not average abundance of 0
+				if not abndReducedTable.funcGetFeatureSumAcrossSamples(sFeature):
+					lsMissing.append(sFeature)
+		if len(lsMissing) > 0:
+			sys.stderr.write( "Could not find features for averaging: " + str(lsMissing) )
+			return False
+
+		#For each sample name get average abundance
+		for sName in sampleNames:
+			npaFeaturesSample = abndReducedTable.funcGetSample(sName)
+			sampleAbundanceAverages.append([sName,sum(npaFeaturesSample)/float(len(npaFeaturesSample))])
+
+		#Sort based on average
+		return sorted(sampleAbundanceAverages, key = lambda sampleData: sampleData[1], reverse = True)
+
+	#Happy path tested 1
+	def funcGetAverageSample(self):
+		"""
+		Returns the average sample of the abundance table.
+		This average sample is made of the average of each feature.
+		:return list: A list of averages in the order of the feature names.
+		"""
+
+		ldAverageSample = []
+		#If there are no samples then return empty list.
+		if len(self.funcGetSampleNames()) < 1:
+			return ldAverageSample
+
+		#If there are samples return the average of each feature in the order of the feature names.
+		for sFeature in self._npaFeatureAbundance:
+			npFeaturesAbundance = list(sFeature)[1:]
+			ldAverageSample.append(sum(npFeaturesAbundance)/float(len(npFeaturesAbundance)))
+
+		return ldAverageSample
+
+	#Tested 2 cases
+	def funcHasFeatureHierarchy(self):
+		"""
+		Returns an indicator of having a hierarchy in the features indicated by the existance of the
+		feature delimiter.
+
+		:return	Boolean:	True (Has a hierarchy) or False (Does not have a hierarchy)
+		"""
+
+		if ( self._npaFeatureAbundance == None ):
+			return None
+		cDelimiter = self.funcGetFeatureDelimiter()
+		if ( not cDelimiter ):
+			return False
+
+		#For each feature name, check to see if the delimiter is in the name
+		for sFeature in self.funcGetFeatureNames():
+			if cDelimiter in sFeature:
+				return True
+		return False
+
+	def funcGetCladePrefixes(self):
+		"""
+		Returns the list of prefixes to use on biological sample hierarchy
+
+		:return	List:	List of strings
+		"""
+		return self._lsCladePrefixes
+
+	#3 test cases
+	def funcAddCladePrefixToFeatures(self):
+		"""
+		As a standardized clade prefix to indicate biological clade given hierarchy.
+		Will not add a prefix to already prefixes feature names.
+		Will add prefix to feature names that do not have them or clades in a feature name that
+		do not have them while leaving ones that do as is.
+
+		:return	Boolean:	True (Has a hierarchy) or False (Does not have a hierarchy)
+		"""
+
+		if ( self._npaFeatureAbundance == None ):
+			return None
+		cDelimiter = self.funcGetFeatureDelimiter()
+		lsPrefixes = self.funcGetCladePrefixes()
+		iPrefixLength = len(lsPrefixes)
+		if ( not cDelimiter ):
+			return False
+
+		#Append prefixes to feature names
+		lsUpdatedFeatureNames = []
+		lsFeatureNames = self.funcGetFeatureNames()
+
+		for sFeatureName in lsFeatureNames:
+			lsClades = sFeatureName.split(cDelimiter)
+			#If there are not enough then error
+			if(len(lsClades) > iPrefixLength):
+                                print "Error:: Too many clades given to be biologically meaningful"
+				return False
+			lsUpdatedFeatureNames.append(cDelimiter.join([lsPrefixes[iClade]+lsClades[iClade] if not(lsClades[iClade][0:len(lsPrefixes[iClade])]==lsPrefixes[iClade]) else lsClades[iClade] for iClade in xrange(len(lsClades))]))
+
+		#Update new feature names to abundance table
+		if not self.funcGetIDMetadataName() == None:
+			self._npaFeatureAbundance[self.funcGetIDMetadataName()] = np.array(lsUpdatedFeatureNames)
+
+		return True
+
+	#Happy Path Tested
+	def funcGetFeatureAbundanceTable(self, lsFeatures):
+		"""
+		Returns a copy of the current abundance table with the abundance of just the given features.
+
+		:param	lsFeatures:	String Feature IDs that are kept in the compressed abundance table.
+		:type:	List of strings	Feature IDs (found as the first entry of a filter in the input file.
+		:return	AbundanceTable:	A compressed version of the abundance table.
+				  On an error None is returned.
+		"""
+		
+		if ( self._npaFeatureAbundance == None ) or ( lsFeatures == None ):
+			return None
+
+		#Get a list of boolean indicators that the row is from the features list
+		lfFeatureData = [sRowID in lsFeatures for sRowID in self.funcGetFeatureNames()]
+		#compressed version as an Abundance table
+		lsNamePieces = os.path.splitext(self._strOriginalName)
+		abndFeature = AbundanceTable(npaAbundance=np.compress(lfFeatureData, self._npaFeatureAbundance, axis = 0),
+					dictMetadata = self.funcGetMetadataCopy(),
+					strName = lsNamePieces[0] + "-" + str(len(lsFeatures)) +"-Features"+lsNamePieces[1],
+					strLastMetadata=self.funcGetLastMetadataName(),
+					cFileDelimiter = self.funcGetFileDelimiter(), cFeatureNameDelimiter= self.funcGetFeatureDelimiter())
+		#Table is no longer normalized
+		abndFeature._fIsNormalized = False
+		return abndFeature
+
+	#Happy path tested
+	def funcGetFeatureDelimiter(self):
+		"""
+		The delimiter of the feature names (For example to use on concensus lineages).
+
+		:return	Character:	Delimiter for the feature name pieces if it is complex.
+		"""
+
+		return self._cFeatureDelimiter
+
+	#Happy path tested
+	def funcGetFeatureCount(self):
+		"""
+		Returns the current feature count.
+
+		:return	Count:	Returns the int count of features in the abundance table.
+						Returns None on error.
+		"""
+
+		return self._npaFeatureAbundance.shape[0] if not self._npaFeatureAbundance is None else 0
+
+	#Happy path tested
+	def funcGetFeatureSumAcrossSamples(self,sFeatureName):
+		"""
+		Returns float sum of feature values across the samples.
+
+		:param	sFeatureName: The feature ID to get the sum from.
+		:type:	String.
+		:return	Double:	Sum of one feature across samples.
+		"""
+		return sum(self.funcGetFeature(sFeatureName))
+
+	def funcGetFeature(self,sFeatureName):
+		"""
+		Returns feature values across the samples.
+
+		:param	sFeatureName: The feature ID to get the sum from.
+		:type:	String.
+		:return	Double:	Feature across samples.
+		"""
+
+		for sFeature in self._npaFeatureAbundance:
+			if sFeature[0] == sFeatureName:
+				return list(sFeature)[1:]
+		return None
+
+	#Happy path tested
+	def funcGetFeatureNames(self):
+		"""
+		Return the feature names as a list.
+
+		:return	Feature Names:	List of feature names (or IDs) as strings.
+								As an error returns empty list.
+		"""
+
+		if (not self._npaFeatureAbundance == None):
+			return self._npaFeatureAbundance[self.funcGetIDMetadataName()]
+		return []
+
+	#Happy path tested
+	def funcGetFileDelimiter(self):
+		"""
+		The delimiter of the file the data was read from and which is also the delimiter which would be used to write the data to a file.
+
+		:return	Character:	Delimiter for the parsing and writing the file.
+		"""
+
+		return self._cDelimiter
+
+	def funcGetLastMetadataName(self):
+		"""
+		Get the last metadata name that seperates abundance and metadata measurements.
+
+		:return string:	Metadata name
+		"""
+		return self._strLastMetadataName
+
+	#Happy path tested
+	def funcGetSample(self,sSampleName):
+		"""
+		Return a copy of the feature measurements of a sample.
+
+		:param	sSampleName:	Name of sample to return.	
+		:type:	String	
+		:return	Sample: Measurements	Feature measurements of a sample.
+				Empty numpy array returned on error.
+		"""
+
+		if (not self._npaFeatureAbundance == None):
+			return self._npaFeatureAbundance[sSampleName].copy()
+		return np.array([])
+
+	#Happy path tested
+	def funcGetMetadata(self, strMetadataName):
+		"""
+		Returns a list of metadata that is associated with the given metadata name (id).
+
+		:param	strMetadataName:	String metadata ID to be returned
+		:type:	String	ID
+		:return	Metadata:	List of metadata
+		"""
+		
+		return copy.deepcopy( self._dictTableMetadata.get(strMetadataName) ) \
+			if self._dictTableMetadata else None
+
+	#Happy path tested
+	def funcGetMetadataCopy(self):
+		"""
+		Returns a deep copy of the metadata.
+
+		:return	Metadata copy:	{"ID":[value,value...]}
+		"""
+
+		return copy.deepcopy(self._dictTableMetadata)
+		
+	#Happy path tested
+	def funcGetName(self):
+		"""
+		Returns the name of the object which is the file name that generated it.
+		If the object was generated from an Abundance Table (for instance through stratification)
+		the name is still in the form of a file that could be written to which is informative
+		of the changes that have occurred on the data set.
+		:return string: Name
+		"""
+		return self._strOriginalName
+
+	#Happy path tested. could do more
+	def funcGetTerminalNodes(self):
+		"""
+		Returns the terminal nodes given the current feature names in the abundance table. The 
+		features must contain a consensus lineage or all will be returned.
+		:return List:	List of strings of the terminal nodes given the abundance table.
+		"""
+		return AbundanceTable.funcGetTerminalNodesFromList(lsNames=self.funcGetFeatureNames(),cNameDelimiter=self.funcGetFeatureDelimiter())
+
+	#Tested 2 test cases
+	@staticmethod
+	def funcGetTerminalNodesFromList(lsNames,cNameDelimiter):
+		"""
+		Returns the terminal nodes given the current feature names in the abundance table. The 
+		features must contain a consensus lineage or all will be returned.
+
+		:param	lsNames:	The list of string names to parse and filter.
+		:type:	List of strings
+		:param	cNameDelimiter:	The delimiter for the name of the features.
+		:type:	Character	Delimiter
+		:return list:	A list of terminal elements in the list (given only the list).
+		"""
+
+		#Build hash
+		dictCounts = dict()
+		for strTaxaName in lsNames:
+			#Split into the elements of the clades
+			lsClades = filter(None,strTaxaName.split(cNameDelimiter))
+			#Count clade levels
+			iCladeLength = len(lsClades)
+
+			#Evaluate first element
+			sClade = lsClades[0]
+			dictCounts[sClade] = sClade not in dictCounts
+
+			#Evaluate the rest of the elements
+			if iCladeLength < 2:
+				continue
+			for iIndex in xrange(1,iCladeLength):
+				prevClade = sClade
+				sClade = cNameDelimiter.join([sClade,lsClades[iIndex]])
+				if sClade in dictCounts:
+					dictCounts[sClade] = dictCounts[prevClade] = False
+				else:
+					dictCounts[sClade] = True
+					dictCounts[prevClade] = False
+
+		#Return only the elements that were of count 1
+		return filter( lambda s: dictCounts[s] == True, dictCounts )
+
+	#Happy path tested
+	def funcIsNormalized(self):
+		"""
+		Returns if the data has been normalized.
+
+		:return	Boolean:	Indicates if the data is normalized.
+						   True indicates it the data is normalized.
+		"""
+
+		return self._fIsNormalized
+
+	#Happy path tested
+	def funcIsPrimaryIdMetadata(self,sMetadataName):
+		"""
+		Checks the metadata data associated with the sMetadatName and returns if the metadata is unique.
+		This is important to some of the functions in the Abundance Table specifically when translating from one metadata to another.
+		
+		:param	sMetadataName:	ID of metadata to check for uniqueness.
+		:type:	String	Metadata ID.
+		:return	Boolean:	Returns indicator of uniqueness.
+							True indicates unique.
+		"""
+
+		lMetadata = self.funcGetMetadata(sMetadataName)
+		if not lMetadata:
+			return False
+		return (len(lMetadata) == len(set(lMetadata)))
+
+	#Happy path tested
+	def funcIsSummed(self):
+		"""
+		Return is the data is summed.
+
+		:return	Boolean:	Indicator of being summed. True indicates summed.
+		"""
+
+		return self._fIsSummed
+
+	#Happy path tested
+	def funcFilterAbundanceByPercentile(self, dPercentileCutOff = 95.0, dPercentageAbovePercentile=1.0):
+		"""
+		Filter on features.
+		A feature is removed if it's abundance is not found in the top X percentile a certain percentage of the samples.
+
+		:param	dPercentileCutOff:	The percentile used for filtering.
+		:type:	double	A double between 0.0 and 100.0
+		:param	dPercentageAbovePercentile:	The percentage above the given percentile (dPercentileCutOff) that must exist to keep the feature.
+		:type:	double	Between 0.0 and 100.0
+		:return	Boolean:	Indicator of filtering occuring without error. True indicates filtering occuring.
+		"""
+
+		#No need to do anything
+		if(dPercentileCutOff==0.0) or (dPercentageAbovePercentile==0.0):
+			return True
+
+		#Sample names
+		lsSampleNames = self.funcGetSampleNames()
+
+		#Scale percentage out of 100
+		dPercentageAbovePercentile = dPercentageAbovePercentile/100.0
+
+		#Sample count
+		iSampleCount = len(lsSampleNames)
+
+		#Get a threshold score of the value at the specified percentile for each sample
+		#In the order of the sample names
+		ldScoreAtPercentile = [scipy.stats.scoreatpercentile(self._npaFeatureAbundance[lsSampleNames[iIndex]],dPercentileCutOff) for iIndex in xrange(iSampleCount)]
+
+		#Record how many entries for each feature have a value equal to or greater than the dPercentileCutOff
+		#If the percentile of entries passing the criteria are above the dPercentageAbovePercentile put index in list to keep
+		liKeepIndices = []
+		iSampleCount = float(iSampleCount)
+		for iRowIndex, npaRow in enumerate(self._npaFeatureAbundance):
+			iCountPass = sum([1 if dValue >= ldScoreAtPercentile[iValueIndex] else 0 for iValueIndex, dValue in enumerate(list(npaRow)[1:])])
+			if (iCountPass / iSampleCount) >= dPercentageAbovePercentile:
+				liKeepIndices.append(iRowIndex)
+
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepIndices,:]
+
+		#Update filter state
+		self._strCurrentFilterState += ":dPercentileCutOff=" + str(dPercentileCutOff) + ",dPercentageAbovePercentile=" + str(dPercentageAbovePercentile)
+
+		#Table is no longer normalized
+		self._fIsNormalized = False
+
+		return True
+
+	def funcFilterAbundanceByMinValue(self, dMinAbundance = 0.0001, iMinSamples = 3):
+		"""
+		Filter abundance by requiring features to have a minimum relative abundance in a minimum number of samples.
+		Will evaluate greater than or equal to the dMinAbundance and iMinSamples.
+
+		:param	dMinAbundance:	Minimum relative abundance.
+		:type:	Real	Number Less than 1.
+		:param	iMinSamples:	Minimum samples to have the relative abundnace or greater in.
+		:type:	Integer	Number greater than 1.
+		:return	Boolean:	Indicator of the filter running without error. False indicates error.
+		"""
+
+		#No need to do anything
+		if(dMinAbundance==0) or (iMinSamples==0):
+			return True
+
+		#This normalization requires the data to be relative abundance
+		if not self._fIsNormalized:
+			#sys.stderr.write( "Could not filter by sequence occurence because the data is already normalized.\n" )
+			return False
+
+		#Holds which indexes are kept
+		liKeepFeatures = []
+		for iRowIndex, dataRow in enumerate( self._npaFeatureAbundance ):
+			#See which rows meet the criteria and keep the index if needed.
+			if len( filter( lambda d: d >= dMinAbundance, list(dataRow)[1:] ) ) >= iMinSamples:
+				liKeepFeatures.append(iRowIndex)
+
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+		#Update filter state
+		self._strCurrentFilterState += ":dMinAbundance=" + str(dMinAbundance) + ",iMinSamples=" + str(iMinSamples)
+
+		return True
+
+	#Happy path tested
+	def funcFilterAbundanceBySequenceOccurence(self, iMinSequence = 2, iMinSamples = 2):
+		"""
+		Filter occurence by requiring features to have a minimum sequence occurence in a minimum number of samples.
+		Will evaluate greater than or equal to the iMinSequence and iMinSamples.
+
+		:param	iMinSequence:	Minimum sequence to occur.
+		:type:	Integer	Number Greater than 1.
+		:param	iMinSamples:	Minimum samples to occur in.
+		:type:	Integer	Number greater than 1.
+		:return	Boolean:	Indicator of the filter running without error. False indicates error.
+		"""
+
+		#No need to do anything
+		if(iMinSequence==0) or (iMinSamples==0):
+			return True
+
+		#This normalization requires the data to be reads
+		if self._fIsNormalized:
+			#sys.stderr.write( "Could not filter by sequence occurence because the data is already normalized.\n" )
+			return False
+
+		#Holds which indexes are kept
+		liKeepFeatures = []
+		for iRowIndex, dataRow in enumerate( self._npaFeatureAbundance ):
+			#See which rows meet the criteria and keep the index if needed.
+			if len( filter( lambda d: d >= iMinSequence, list(dataRow)[1:] ) ) >= iMinSamples:
+				liKeepFeatures.append(iRowIndex)
+
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+		#Update filter state
+		self._strCurrentFilterState += ":iMinSequence=" + str(iMinSequence) + ",iMinSamples=" + str(iMinSamples)
+
+		return True
+   
+	#1 Happy path test
+	def funcFilterFeatureBySD(self, dMinSDCuttOff = 0.0):
+		"""
+		A feature is removed if it's abundance is not found to have standard deviation more than the given dMinSDCutoff.
+
+		:param	dMinSDCuttOff:	Standard deviation threshold.
+		:type:	Double	A double greater than 0.0.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+
+		#No need to do anything
+		if(dMinSDCuttOff==0.0):
+			return True
+
+		#Holds which indexes are kept
+		liKeepFeatures = []
+
+		#Evaluate each sample
+		for iRowIndex, dataRow in enumerate(self._npaFeatureAbundance):
+			if(np.std(list(dataRow)[1:])>=dMinSDCuttOff):
+				liKeepFeatures.append(iRowIndex)
+		
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+
+		#Update filter state
+		self._strCurrentFilterState += ":dMinSDCuttOff=" + str(dMinSDCuttOff)
+
+		#Table is no longer normalized
+		self._fIsNormalized = False
+
+		return True
+
+        #Happy path tested 2 tests
+	def funcGetWithoutOTUs(self):
+		"""
+		Remove features that are terminal otus. Terminal otus are identified as being an integer.
+		"""
+
+		#Get the feature names
+		lsFeatures = self.funcGetFeatureNames()
+
+		#Reduce, filter the feature names
+		lsFeatures = [sFeature for sFeature in lsFeatures if not (ValidateData.funcIsValidStringInt(sFeature.split(self.funcGetFeatureDelimiter())[-1]))]
+
+		return self.funcGetFeatureAbundanceTable(lsFeatures)
+
+	#Happy path tested
+	def funcNormalize(self):
+		"""
+		Convenience method which will call which ever normalization is approriate on the data.
+		:return Boolean: Indicator of success (true).
+		"""
+
+		if self._fIsSummed:
+			return self.funcNormalizeColumnsWithSummedClades()
+		else:
+			return self.funcNormalizeColumnsBySum()
+
+	#Testing Status: Light happy path testing
+	def funcNormalizeColumnsBySum(self):
+		"""
+		Normalize the data in a manner that is approrpiate for NOT summed data.
+		Normalize the columns (samples) of the abundance table.
+		Normalizes as a fraction of the total (number/(sum of all numbers in the column)).
+		Will not act on summed tables.
+
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+
+		if self._fIsNormalized:
+#			sys.stderr.write( "This table is already normalized, did not perform new normalization request.\n" )
+			return False
+
+		if self._fIsSummed:
+			sys.stderr.write( "This table has clades summed, this normalization is not appropriate. Did not perform.\n" )
+			return False
+
+		#Normalize
+		for columnName in self.funcGetSampleNames():
+			column = self._npaFeatureAbundance[columnName]
+			columnTotal = sum(column)
+			if(columnTotal > 0.0):
+				column = column/columnTotal
+			self._npaFeatureAbundance[columnName] = column
+
+		#Indicate normalization has occured
+		self._fIsNormalized = True
+
+		return True
+
+	#Happy path tested
+	def funcNormalizeColumnsWithSummedClades(self):
+		"""
+		Normalizes a summed Abundance Table.
+		If this is called on a dataset which is not summed and not normalized.
+		The data will be summed first and then normalized.
+		If already normalized, the current normalization is kept.
+
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+
+		if self._fIsNormalized:
+#			sys.stderr.write( "This table is already normalized, did not perform new summed normalization request.\n" )
+			return False
+
+		if not self._fIsSummed:
+			sys.stderr.write( "This table does not have clades summed, this normalization is not appropriate until the clades are summed. The clades are being summed now before normalization.\n" )
+			self.funcSumClades()
+
+		#Load a hash table with root data {sKey: npaAbundances}
+		hashRoots = {}
+		for npaRow in self._npaFeatureAbundance:
+
+			curldAbundance = np.array(list(npaRow)[1:])
+			curFeatureNameLength = len(npaRow[0].split(self._cFeatureDelimiter))
+			curlRootData = hashRoots.get(npaRow[0].split(self._cFeatureDelimiter)[0])
+
+			if not curlRootData:
+				hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]] = [curFeatureNameLength, curldAbundance]
+			elif curlRootData[0] > curFeatureNameLength:
+				hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]] = [curFeatureNameLength, curldAbundance]
+
+		#Normalize each feature by thier root feature
+		dataMatrix = list()
+		for npaRow in self._npaFeatureAbundance:
+
+			curHashRoot = list(hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]][1])
+			dataMatrix.append(tuple([npaRow[0]]+[npaRow[i+1]/curHashRoot[i] if curHashRoot[i] > 0 else 0 for i in xrange(len(curHashRoot))]))
+
+		self._npaFeatureAbundance = np.array(dataMatrix,self._npaFeatureAbundance.dtype)
+
+		#Indicate normalization has occured
+		self._fIsNormalized = True
+
+		return True
+	
+	def _funcRankAbundanceHelper( self, aaTodo, iRank, lRankAbundance ):
+		"""
+		Helper method for ranking abudance which are tied.
+
+		:params aaTodo: List of tied ranks to change to a rank.
+		:type:	List of Enumerates of samples.
+		:params iRank: Current Rank
+		:type:	Integer
+		:params lRankAbundance: Sample of abundance
+		:type:	List of integers
+		"""
+
+		# Subtract one from iRank (each time) to account for next loop iteration
+		# Then average it with itself minus (the length of aaTodo + 1)
+		dRank = ( iRank + iRank - len( aaTodo ) - 1 ) / 2.0
+		for a in aaTodo:
+			lRankAbundance[a[0]] = dRank
+
+	#1 Happy path test
+	def funcRankAbundance(self):
+		"""
+		Rank abundances of features with in a sample.
+
+		:return	AbundanceTable:	Abundance table data ranked (Features with in samples).
+							  None is returned on error.
+		"""
+
+		if self._npaFeatureAbundance == None:
+			return None
+
+		lsSampleNames = self.funcGetSampleNames()
+		npRankAbundance = self.funcGetAbundanceCopy()
+		liRanks = []
+		#For each sample name get the ranks
+		for sName in lsSampleNames:
+			#Enumerate for order and sort abundances
+			lfSample = list(enumerate(npRankAbundance[sName]))
+			lfSample = sorted(lfSample, key = lambda a: a[1], reverse = True)
+
+			# Accumulate indices until a new value is encountered to detect + handle ties
+			aaTodo = []
+			for i, a in enumerate( lfSample ):
+				if ( not aaTodo ) or ( a[1] == aaTodo[-1][1] ):
+					aaTodo.append( a )
+				else:
+			# Make multiple tied ranks = average of first and last
+					self._funcRankAbundanceHelper( aaTodo, i, npRankAbundance[sName] )
+					aaTodo = [a]
+			self._funcRankAbundanceHelper( aaTodo, i + 1, npRankAbundance[sName] )
+
+		abndRanked = AbundanceTable(npaAbundance=npRankAbundance, dictMetadata=self.funcGetMetadataCopy(),
+			strName= self.funcGetName() + "-Ranked",
+			strLastMetadata=self.funcGetLastMetadataName(),
+			cFileDelimiter=self.funcGetFileDelimiter(),
+			cFeatureNameDelimiter=self.funcGetFeatureDelimiter())
+
+		#Table is no longer normalized
+		abndRanked._fIsNormalized = False
+		return abndRanked
+
+	def funcGetSampleCount(self):
+		"""
+		Returns the sample count of the abundance table.
+		"""
+		return len(self.funcGetSampleNames())
+
+	#Happy Path Tested
+	def funcReduceFeaturesToCladeLevel(self, iCladeLevel):
+		"""
+		Reduce the current table to a certain clade level.
+
+		:param	iCladeLevel:	The level of the clade to trim the features to.
+		:type:	Integer	The higher the number the more clades are presevered in the consensus lineage contained in the feature name.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+
+		if iCladeLevel < 1: return False
+		if not self._npaFeatureAbundance == None:
+			liFeatureKeep = []
+			[liFeatureKeep.append(tplFeature[0]) if (len(tplFeature[1][0].split(self.funcGetFeatureDelimiter())) <= iCladeLevel) else 0
+			 for tplFeature in enumerate(self._npaFeatureAbundance)]
+			#Compress array
+			self._npaFeatureAbundance = self._npaFeatureAbundance[liFeatureKeep,:]
+
+			#Update filter state
+			self._strCurrentFilterState += ":iCladeLevel=" + str(iCladeLevel)
+			return True
+		else:
+			return False
+
+	#Happy path tested
+	def funcRemoveSamples(self,lsSampleNames):
+		"""
+		Removes the samples given in the list.
+
+		:param	lsSampleNames:	A list of string names of samples to remove.
+		:type:	List of strings	Unique values
+		:return Boolean: Indicator of success (True = success, no error)
+		"""
+
+		#Samples to remove
+		setSamples = set(lsSampleNames)
+
+		#Get orignal sample count
+		iOriginalCount  = self._iOriginalSampleCount
+
+		#The samples to keep
+		lsKeepSamples = [sSample for sSample in self.funcGetSampleNames() if not sSample in setSamples]
+		#The sample to keep as boolean flags for compressing the metadata
+		lfKeepSamples = [not sSample in setSamples for sSample in self.funcGetSampleNames()]
+		
+		#Reduce the abundance data and update
+		self._npaFeatureAbundance = self._npaFeatureAbundance[[self.funcGetIDMetadataName()]+lsKeepSamples]
+
+		#Reduce the metadata and update
+		for sKey in self._dictTableMetadata:
+			self._dictTableMetadata[sKey] = [value for iindex, value in enumerate(self._dictTableMetadata[sKey]) if lfKeepSamples[iindex]]
+
+		#Update sample number count
+		self._iOriginalSampleCount = len(self.funcGetSampleNames())
+
+		return self._iOriginalSampleCount == (iOriginalCount-len(setSamples))
+
+	#Happy path tested
+	def funcRemoveSamplesByMetadata(self, sMetadata, lValuesToRemove):
+		"""
+		Removes samples from the abundance table based on values of a metadata.
+		If a metadata has any value given the associated sample is removed.
+
+		:param	sMetadata:	ID of the metdata to check the given values.
+		:type:	String	Metadata ID
+		:param	lValuesToRemove:	A list of values which if equal to a metadata entry indicate to remove the associated sample.
+		:type:	List of values:	List
+		:return	Boolean:	Indicator of success (True = success, no error)
+		"""
+
+		lsSampleNames = self.funcGetSampleNames()
+		return self.funcRemoveSamples([lsSampleNames[iindex] for iindex, sValue in enumerate(self.funcGetMetadata(sMetadata)) if sValue in lValuesToRemove])
+
+	#Happy path testing
+	def funcSumClades(self):
+		"""
+		Sums abundance data by clades indicated in the feature name (as consensus lineages).
+
+		:return	Boolean:	Indicator of success.
+					False indicates an error.
+		"""
+
+		if not self.funcIsSummed():
+
+			#Read in the data
+			#Find the header column (iCol) assumed to be 1 or 2 depending on the location of "NAME"
+			#Create a list (adSeq) that will eventually hold the sum of the columns of data
+			astrHeaders = iCol = None
+			adSeqs = np.array([0] * len(self.funcGetSampleNames()))
+			pTree = CClade( )
+			aastrRaw = []
+
+			#For each row in the npaAbundance
+			#Get the feature name, feature abundances, and sum up the abudance columns
+			#Keep the sum for later normalization
+			#Give a tree the feature name and abundance
+			for dataRow in self._npaFeatureAbundance:
+				
+				sFeatureName = dataRow[0]
+				ldAbundances = list(dataRow)[1:]
+
+				#Add to the sum of the columns (samples)
+				adSeqs = adSeqs + np.array(list(dataRow)[1:])
+
+				#Build tree
+				pTree.get( sFeatureName.split(self._cFeatureDelimiter) ).set( ldAbundances )
+
+			#Create tree of data
+			#Input missing data
+			#Fill hashFeatures with the clade name (key) and a blist of values (value) of the specified level interested.
+			pTree.impute( )
+			hashFeatures = {}
+			pTree.freeze( hashFeatures, c_iSumAllCladeLevels, c_fOutputLeavesOnly )
+			setstrFeatures = hashFeatures.keys( )
+
+			#Remove parent clades that are identical to child clades
+			for strFeature, adCounts in hashFeatures.items( ):
+					astrFeature = strFeature.strip( ).split( "|" )
+					while len( astrFeature ) > 1:
+						astrFeature = astrFeature[:-1]
+						strParent = "|".join( astrFeature )
+						adParent = hashFeatures.get( strParent )
+						if adParent == adCounts:
+							del hashFeatures[strParent]
+							setstrFeatures.remove( strParent )
+
+			#Sort features to be nice
+			astrFeatures = sorted( setstrFeatures )
+
+			#Change the hash table to an array
+			dataMatrix = list()
+			for sFeature in astrFeatures:
+				dataMatrix.append(tuple([sFeature]+list(hashFeatures[sFeature])))
+			self._npaFeatureAbundance=np.array(dataMatrix,self._npaFeatureAbundance.dtype)
+
+			#Indicate summation has occured
+			self._fIsSummed = True
+
+		return True
+
+	#Happy path tested
+	def funcStratifyByMetadata(self, strMetadata, fWriteToFile=False):
+		"""
+		Stratifies the AbundanceTable by the given metadata.
+		Will write each stratified abundance table to file
+		if fWriteToFile is True the object will used it's internally stored name as a file to write to
+		if fWriteToFile is a string then it should be a directory and end with "." This will rebase the file
+		and store it in a different directory but with an otherwise unchanged name.
+		Note: If the metadata used for stratification has NAs, they will be segregated to thier own table and returned.
+
+		:param	strMetadata:	Metadata ID to stratify data with.
+		:type:	String	ID for a metadata.
+		:param	fWriteToFile:	Indicator to write to file.
+		:type:	Boolean	True indicates to write to file.
+		:return	List:	List of AbundanceTables which are deep copies of the original.
+						Empty list on error.
+		"""
+
+		if self._npaFeatureAbundance is None or self._dictTableMetadata is None:
+			return []
+
+		#Get unique metadata values to stratify by
+		lsMetadata = self._dictTableMetadata.get(strMetadata,[])
+		setValues = set(lsMetadata)
+		#If there is only one metadata value then no need to stratify so return the original in the list (and write if needed)
+		if len(setValues) == 0:
+		  return []
+
+		retlAbundanceTables = []
+		dictAbundanceBlocks = dict()
+		#Given here there are multiple metadata values, continue to stratify
+		lsNames = self.funcGetSampleNames()
+		#Get index of values to break up
+		for value in setValues:
+			lfDataIndex = [sData==value for sData in lsMetadata]
+			#Get abundance data for the metadata value
+			#The true is added to keep the first column which should be the feature id
+			npaStratfiedAbundance = self._npaFeatureAbundance[[self.funcGetIDMetadataName()]+list(np.compress(lfDataIndex,lsNames))]
+
+			#Get metadata for the metadata value
+			dictStratifiedMetadata = dict()
+			for metadataType in self._dictTableMetadata:
+				dictValues = self.funcGetMetadata(metadataType)
+				dictStratifiedMetadata[metadataType] = np.compress(lfDataIndex,dictValues).tolist()
+
+			#Make abundance table
+			#Add abundance table to the list
+			lsNamePieces = os.path.splitext(self._strOriginalName)
+			objStratifiedAbundanceTable = AbundanceTable(npaAbundance=npaStratfiedAbundance, dictMetadata=dictStratifiedMetadata,
+				strName=lsNamePieces[0] + "-StratBy-" + value+lsNamePieces[1],
+				strLastMetadata=self.funcGetLastMetadataName(),
+				cFeatureNameDelimiter=self._cFeatureDelimiter, cFileDelimiter = self._cDelimiter)
+			if fWriteToFile:
+				objStratifiedAbundanceTable.funcWriteToFile(lsNamePieces[0] + "-StratBy-" + value+lsNamePieces[1])
+			#Append abundance table to returning list
+			retlAbundanceTables.append(objStratifiedAbundanceTable)
+
+		return retlAbundanceTables
+
+	#Happy Path Tested
+	def funcTranslateIntoMetadata(self, lsValues, sMetadataFrom, sMetadataTo, fFromPrimaryIds=True):
+		"""
+		Takes the given data values in one metadata and translates it to values in another
+		metadata of the sample samples holding the values of the first metadata
+		FPrimaryIds, if true the sMetadataFrom are checked for unique values,
+		If FPrimaryIds is not true, duplicate values can stop the preservation of order
+		Or may cause duplication in the "to" group. This is not advised.
+		if the sMetadataFrom has any duplicates the function fails and return false.
+
+		:param	lsValues:	Values to translate.
+		:type:	List	List of values.
+		:param	sMetadataFrom:	The metadata the lsValues come from.
+		:type:	String	ID for the metadata.
+		:param	sMetadataTo:	The metadata the lsValues will be translated into keeping the samples the same.
+		:type:	String	ID for the metadata.
+		:param	fFromPrimaryIds:	The metadata that are in the from metadata list must be unique in each sample.
+		:type:	Boolean	True indicates the metadata list should be unique in each sample. Otherwise a false will return.
+		:return List:	List of new values or False on error.
+		"""
+
+		#Get metadata
+		lFromMetadata = self.funcGetMetadata(sMetadataFrom)
+		if not lFromMetadata:
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. Did not receive lFromMetadata.\n" )
+				return False
+
+		lToMetadata = self.funcGetMetadata(sMetadataTo)
+		if not lToMetadata:
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. Did not receive lToMetadata.\n" )
+				return False
+
+		#Check to see if the values are unique if indicated to do so
+		if fFromPrimaryIds:
+			if not len(lFromMetadata) == len(set(lFromMetadata)):
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. sMetadataFrom did not have unique values.\n" )
+				return False
+
+		#Translate over
+		if lFromMetadata and lToMetadata:
+			return [lToMetadata[iIndex] for iIndex in [lFromMetadata.index(value) for value in lsValues]]
+
+		return False
+
+	#Happy path tested
+	def funcToArray(self):
+		"""
+		Returns a numpy array of the current Abundance Table.
+		Removes the first ID head column and the numpy array is
+		Made of lists, not tuples.
+
+		:return Numpy Array:	np.array([[float,float,...],[float,float,...],[float,float,...]])
+								None is returned on error.
+		"""
+
+		if not self._npaFeatureAbundance == None:
+			return np.array([list(tplRow)[1:] for tplRow in self._npaFeatureAbundance],'float')
+		return None
+
+	#Happy Path tested
+	def funcWriteToFile(self, xOutputFile, cDelimiter=None, cFileType=ConstantsBreadCrumbs.c_strPCLFile):
+		"""
+		Writes the AbundanceTable to a file strOutputFile.
+		Will rewrite over a file as needed.
+		Will use the cDelimiter to delimit columns if provided.
+
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path
+		:param	cDelimiter:	Delimiter for the output file.
+		:type:	Character	If cDlimiter is not specified, the internally stored file delimiter is used.
+		"""
+
+		if not xOutputFile:
+			return
+		# Check delimiter argument
+		if not cDelimiter:
+			cDelimiter = self._cDelimiter
+
+		#  Check file type: If pcl: Write pcl file; If biom: write biom file;  If None - write pcl file
+		if(cFileType == None):		
+			cFileType == ConstantsBreadCrumbs.c_strPCLFile 
+				
+		if(cFileType == ConstantsBreadCrumbs.c_strPCLFile):
+			# Write as a pcl file
+			self._funcWritePCLFile(xOutputFile, cDelimiter=cDelimiter)
+		elif(cFileType == ConstantsBreadCrumbs.c_strBiomFile):
+			#Write as a biom  file
+			self._funcWriteBiomFile(xOutputFile)
+		return
+
+	def _funcWritePCLFile(self, xOutputFile, cDelimiter=None):
+		"""
+		Write an abundance table object as a PCL file.
+
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path
+		:param	cDelimiter:	Delimiter for the output file.
+		:type:	Character	If cDlimiter is not specified, the internally stored file delimiter is used.
+		"""
+
+		f = csv.writer(open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile, csv.excel_tab, delimiter=cDelimiter)
+		
+		# Get Row metadata id info (IDs for column header, keys that line up with the ids)
+		lsRowMetadataIDs, lsRowMetadataIDKeys = self.rwmtRowMetadata.funcMakeIDs() if self.rwmtRowMetadata else [[],[]]
+
+		#Write Ids
+		f.writerows([[self.funcGetIDMetadataName()]+lsRowMetadataIDs+list(self.funcGetSampleNames())])
+
+		#Write column metadata
+		lsKeys = list(set(self._dictTableMetadata.keys())-set([self.funcGetIDMetadataName(),self.funcGetLastMetadataName()]))
+		lMetadataIterations = list(set(lsKeys+[self.funcGetLastMetadataName()] ))
+
+		f.writerows([[sMetaKey]+([ConstantsBreadCrumbs.c_strEmptyDataMetadata]*len(lsRowMetadataIDs))+self.funcGetMetadata(sMetaKey) for sMetaKey in lMetadataIterations if sMetaKey != self.funcGetIDMetadataName() and not sMetaKey is None]) 
+
+		#Write abundance
+		lsOutput = list()
+		curAbundance = self._npaFeatureAbundance.tolist()
+
+		for curAbundanceRow in curAbundance:
+			# Make feature metadata, padding with NA as needed
+			lsMetadata = []
+			for sMetadataId in lsRowMetadataIDKeys:
+				lsMetadata = lsMetadata + self.rwmtRowMetadata.funGetFeatureMetadata( curAbundanceRow[0], sMetadataId )
+				lsMetadata = lsMetadata + ( [ ConstantsBreadCrumbs.c_strEmptyDataMetadata ] * 
+					( self.rwmtRowMetadata.dictMetadataIDs.get( sMetadataId, 0 ) - len( lsMetadata ) ) )
+			f.writerows([[curAbundanceRow[0]]+lsMetadata+[str(curAbundanceElement) for curAbundanceElement in curAbundanceRow[1:]]])
+		return
+
+	def _funcWriteBiomFile(self, xOutputFile):
+		"""
+		Write an abundance table object as a Biom file.
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path	
+		"""
+		
+		#**************************
+		# Get Sample Names        *
+		#**************************
+		lSampNames = list(self.funcGetSampleNames())
+
+		#**************************
+		# Metadata Names          *
+		#**************************
+
+		dictMetadataCopy = self.funcGetMetadataCopy()
+		lMetaData = list()
+		iKeysCounter = 0
+		for lMetadataCopyEntry in dictMetadataCopy.iteritems():
+			iKeysCounter +=1
+			sMetadataName = lMetadataCopyEntry[0]
+			lMetadataEntries = lMetadataCopyEntry[1]
+			iMetadataEntryCounter =  -1
+			for sMetadataEntry in lMetadataEntries:
+				iMetadataEntryCounter+=1
+				dictMetadataNames = dict()
+				dictMetadataNames[sMetadataName ] = sMetadataEntry
+				if iKeysCounter == 1:
+					lMetaData.append(dictMetadataNames)
+				else:
+					lMetaData[iMetadataEntryCounter][sMetadataName ] = sMetadataEntry
+
+
+		#**************************
+		# Observation Ids         *
+		# and row metadata        *
+		#**************************
+		bTaxonomyInRowsFlag = False	
+		if  self.rwmtRowMetadata.dictRowMetadata  is not None:
+				bTaxonomyInRowsFlag = True	
+			
+		lObservationMetadataTable = list()
+
+		lObservationIds = list()
+		lFeatureNamesResultArray = self.funcGetFeatureNames()
+		for sFeatureName  in  lFeatureNamesResultArray:
+			lObservationIds.append(sFeatureName)
+
+			if self.rwmtRowMetadata and self.rwmtRowMetadata.dictRowMetadata:
+				RowMetadataEntry = self.rwmtRowMetadata.dictRowMetadata[sFeatureName][ConstantsBreadCrumbs.c_metadata_lowercase]	
+				lObservationMetadataTable.append( RowMetadataEntry )
+
+		#**************************
+		# Data                    *
+		#**************************
+		
+		lData = list()
+		lAbundanceCopyResultArray = self.funcGetAbundanceCopy()
+
+		for r in lAbundanceCopyResultArray:
+			lr = list(r)
+			lr.pop(0)	#Remove metadata
+			lAbundanceValues = list()
+			for AbundanceEntry in lr:
+				flAbundanceEntry = float(AbundanceEntry)
+				lAbundanceValues.append(flAbundanceEntry)
+			lData.append(lAbundanceValues)
+		arrData = array(lData)  #Convert list to array
+
+		
+
+		#**************************
+		# Invoke the              *
+		# biom table factory      *     
+		#**************************
+		if  bTaxonomyInRowsFlag == False:
+			BiomTable = table_factory(arrData,
+							  lSampNames,
+							  lObservationIds,
+							  lMetaData,
+							  constructor=SparseOTUTable)
+		else:				#There was metadata in the rows
+			BiomTable = table_factory(arrData,
+							  lSampNames,
+							  lObservationIds,
+							  lMetaData,
+							  lObservationMetadataTable if len(lObservationMetadataTable) > 0 else None,
+							  constructor=SparseOTUTable)	
+	  
+		#**************************
+		# Generate biom Output    *   
+		#**************************
+		f = open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile
+		f.write(BiomTable.getBiomFormatJsonString(ConstantsBreadCrumbs.c_biom_file_generated_by))
+		f.close()
+		return
+
+	#Testing Status: 1 Happy path test
+	@staticmethod
+	def funcPairTables(strFileOne, strFileTwo, strIdentifier, cDelimiter, strOutFileOne, strOutFileTwo, lsIgnoreValues=None):
+		"""
+		This method will read in two files and abridge both files (saved as new files)
+		to just the samples in common between the two files given a common identifier.
+		***If the identifier is not unique in each data set, the first sample with the pairing id is taken so make sure the ID is unique.
+		Expects the files to have the sample delimiters.
+
+		:param	strFileOne:	Path to file one to be paired.
+		:type:	String	File path.
+		:param	strFileTwo:	Path to file two to be paired.
+		:type:	String	File path.
+		:param	strIdentifier:	Metadata ID that is used for pairing.
+		:type:	String	Metadata ID.
+		:param	cDelimiter:	Character delimiter to read the files.
+		:type:	Character	Delimiter.
+		:param	strOutFileOne:	The output file for the paired version of the first file.
+		:type:	String	File path.
+		:param	strOutFileTwo:	The output file for the paired version of the second file.
+		:type:	String	File path.
+		:param	lsIgnoreValues:	These values are ignored even if common IDs between the two files.
+		:type:	List	List of strings.
+		:return	Boolean:	Indicator of no errors.
+							  False indicates errors.
+		"""
+
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strFileOne)):
+			sys.stderr.write( "AbundanceTable:checkRawDataFile::Error, file not valid. File:" + strFileOne + "\n" )
+			return False
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strFileTwo)):
+			sys.stderr.write( "AbundanceTable:checkRawDataFile::Error, file not valid. File:"+ strFileTwo + "\n" )
+			return False
+
+		#Make file one
+		#Read in file
+		istm = csv.reader(open(strFileOne,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		lsContentsOne = [lsRow for lsRow in istm]
+
+		#Get the file identifier for file one
+		fileOneIdentifier = None
+		for sLine in lsContentsOne:
+			if sLine[0] == strIdentifier:
+				fileOneIdentifier = sLine
+				break
+
+		#Make file two
+		#Read in file
+		istm = csv.reader(open(strFileTwo,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		lsContentsTwo = [lsRow for lsRow in istm]
+
+		#Get the file identifier for file two
+		fileTwoIdentifier = None
+		for sLine in lsContentsTwo:
+			if sLine[0] == strIdentifier:
+				fileTwoIdentifier = sLine
+				break
+
+		#Get what is in common between the identifiers
+		#And find which columns to keep in the tables based on the common elements
+		setsCommonIdentifiers = set(fileOneIdentifier) & set(fileTwoIdentifier)
+		if lsIgnoreValues:
+			setsCommonIdentifiers = setsCommonIdentifiers - set(lsIgnoreValues)
+
+		#Get positions of common identifiers in each data set, if the identifier is not unique in a date set just take the first index
+		lfFileOneIDIndexes = [fileOneIdentifier.index(sCommonID) for sCommonID in setsCommonIdentifiers]
+		lfFileTwoIDIndexes = [fileTwoIdentifier.index(sCommonID) for sCommonID in setsCommonIdentifiers]
+
+		#Convert index list to list of boolean
+		lfFileOneElements = [iIndex in lfFileOneIDIndexes for iIndex, sIdentifier in enumerate(fileOneIdentifier)]
+		lfFileTwoElements = [iIndex in lfFileTwoIDIndexes for iIndex, sIdentifier in enumerate(fileTwoIdentifier)]
+
+		#Write out file one
+		ostm = csv.writer(open(strOutFileOne,'w'), csv.excel_tab, delimiter=cDelimiter)
+		(ostm.writerows([np.compress(lfFileOneElements,sLine) for sLine in lsContentsOne]))
+
+		#Write out file two
+		ostm = csv.writer(open(strOutFileTwo,'w'), csv.excel_tab, delimiter=cDelimiter)
+		(ostm.writerows([np.compress(lfFileTwoElements,sLine) for sLine in lsContentsTwo]))
+
+		return True
+
+	#Testing Status: Light happy path testing
+	@staticmethod
+	def funcStratifyAbundanceTableByMetadata(strInputFile = None, strDirectory = "", cDelimiter = ConstantsBreadCrumbs.c_cTab, iStratifyByRow = 1, llsGroupings = []):
+		"""
+		Splits an abundance table into multiple abundance tables stratified by the metadata
+
+		:param	strInputFile:	String file path to read in and stratify.
+		:type:	String	File path.
+		:param	strDirectory:	Output directory to write stratified files.
+		:type:	String	Output directory path.
+		:param	cDelimiter:	The delimiter used in the adundance file.
+		:type:	Character	Delimiter.
+		:param	iStratifyByRow:	The row which contains the metadata to use in stratification.
+		:type:	Integer	Positive integer index.
+		:param	llsGroupings:	A list of string lists where each string list holds values that are equal and should be grouped together.
+								So for example, if you wanted to group metadata "1", "2", and "3" seperately but "4" and "5" together you would
+								Give the following [["4","5"]].
+								If you know what "1" and "3" also together you would give [["1","3"],["4","5"]]
+		:type	List	List of list of strings
+		:return	Boolean:	Indicator of NO error.
+							False indicates an error.
+		"""
+
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strInputFile)):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, file not valid. File:" + strInputFile + "\n" )
+			return False
+		if(not ValidateData.funcIsValidStringType(cDelimiter)):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, Delimiter is not a valid string/char type. Delimiter =" + cDelimiter + "\n" )
+			return False
+		if(not ValidateData.funcIsValidPositiveInteger(iStratifyByRow, tempZero = True) and (not ValidateData.funcIsValidString(iStratifyByRow))):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, Stratify by row is not a positive integer or string keyword. Row =" +
+				str(iStratifyByRow) + ".\n" )
+			return False
+
+		#Get the base of the file path
+		#This is dependent on the given output directory and the prefix of the file name of the input file
+		#If no output file is given then the input file directory is used.
+		baseFilePath = strDirectory
+		lsFilePiecesExt = os.path.splitext(strInputFile)
+		if baseFilePath:
+			baseFilePath = baseFilePath + os.path.splitext(os.path.split(strInputFile)[1])[0]
+		else:
+			baseFilePath = lsFilePiecesExt[0]
+
+		#Read in file
+		istm = csv.reader(open(strInputFile,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		sFileContents = [lsRow for lsRow in istm]
+
+		#Collect metadata
+		metadataInformation = dict()
+
+		#If the tempStratifyRow is by key word than find the index
+		if ValidateData.funcIsValidString(iStratifyByRow):
+			for iLineIndex, strLine in enumerate(sFileContents):
+				if strLine[0].strip("\"") == iStratifyByRow:
+					iStratifyByRow = iLineIndex
+					break
+
+		#Stratify by metadata row
+		#Split metadata row into metadata entries
+		#And put in a dictionary containing {"variable":[1,2,3,4 column index]}
+		stratifyByRow = sFileContents[iStratifyByRow]
+		for metaDataIndex in xrange(1,len(stratifyByRow)):
+			metadata = stratifyByRow[metaDataIndex]
+			#Put all wierd categories, none, whitespace, blank space metadata cases into one bin
+			if not metadata or metadata in string.whitespace:
+				metadata = "Blank"
+			#Remove any extraneous formatting
+			metadata = metadata.strip(string.whitespace)
+			#Store processed metadata with column occurence in dictionary
+			if(not metadata in metadataInformation):
+				metadataInformation[metadata] = []
+			metadataInformation[metadata].append(metaDataIndex)
+
+		#For each of the groupings
+		#Use the first value as the primary value which the rest of the values in the list are placed into
+		#Go through the dict holding the indices and extend the list for the primary value with the secondary values
+		#Then set the secondary value list to empty so that it will be ignored.
+		if llsGroupings:
+			for lSKeyGroups in llsGroupings:
+				if len(lSKeyGroups) > 1:
+					for sGroup in lSKeyGroups[1:]:
+						if sGroup in metadataInformation:
+							metadataInformation[lSKeyGroups[0]].extend(metadataInformation[sGroup])
+							metadataInformation[sGroup] = []
+
+		#Stratify data
+		stratifiedAbundanceTables = dict()
+		for tableRow in sFileContents:
+			if(len(tableRow)> 1):
+				for metadata in metadataInformation:
+					#[0] includes the taxa line
+					columns = metadataInformation[metadata]
+					if columns:
+						columns = [0] + columns
+						lineList = list()
+						for column in columns:
+							lineList.append(tableRow[column])
+						stratifiedAbundanceTables.setdefault(metadata,[]).append(lineList)
+
+		#Write to file
+		lsFilesWritten = []
+		for metadata in stratifiedAbundanceTables:
+			sOutputFile = baseFilePath+"-by-"+metadata.strip("\"")+lsFilePiecesExt[1]
+			f = csv.writer(open(sOutputFile,'w'), csv.excel_tab, delimiter = cDelimiter )
+			f.writerows(stratifiedAbundanceTables[metadata])
+			lsFilesWritten.append(sOutputFile)
+
+		return lsFilesWritten
+		
+		
+				
+	#*******************************************
+	#* biom interface functions:               *
+	#* 1. _funcBiomToStructuredArray           *
+	#* 2. _funcDecodeBiomMetadata              *
+	#*******************************************	
+	@staticmethod
+	def _funcBiomToStructuredArray(xInputFile = None):	
+		"""
+		Reads the biom input file and builds a "BiomCommonArea"  that contains:
+		1.BiomCommonArea['sLastMetadata'] - This is the name of the last Metadata (String)
+		2.BiomCommonArea['BiomTaxData']- dict() - going to be used as  lcontents[0]==TaxData 
+ 		3.BiomCommonArea['Metadata']   - dict() -  going to be used as lcontents[1]==MetaData
+		4.BiomCommonArea['BiomFileInfo'] - dict() - going to be used as lcontents[2]==FileInfo (id, format:eg. Biological Observation Matrix 0.9.1) etc.
+		5.BiomCommonArea['column_metadata_id'] - This is a string which is the name of the column id
+  		:param	xInputFile:	File path of biom file to read.
+		:type:	String	File path.
+		:return:   BiomCommonArea  (See description above)
+		:type:	dict()		
+		"""	
+ 
+		#*******************************************
+		#* Build the metadata                      *
+		#*******************************************
+		try:
+			BiomTable = parse_biom_table(open(xInputFile,'U') if isinstance(xInputFile, str) else xInputFile)	#Import the biom file
+		except:
+			print("Failure decoding biom file - please check your input biom file and rerun")
+			BiomCommonArea = None
+			return BiomCommonArea
+ 
+		BiomCommonArea = dict()		
+		dBugNames = list()			#Bug Names Table
+		dRowsMetadata = None		#Initialize the np.array of the Rows metadata
+		BiomElements  =  BiomTable.getBiomFormatObject('')	
+		for BiomKey, BiomValue in BiomElements.iteritems():
+		#****************************************************
+		#*     Checking the different keys:  format,        *
+		#*     rows, columns, date, generated_by            *
+		#****************************************************
+			if (BiomKey == ConstantsBreadCrumbs.c_strFormatKey  
+			or BiomKey == ConstantsBreadCrumbs.c_strFormatUrl  
+			or BiomKey == ConstantsBreadCrumbs.c_MatrixTtype
+			or BiomKey == ConstantsBreadCrumbs.c_strTypekey
+			or BiomKey == ConstantsBreadCrumbs.c_strIDKey #Same as below
+			or BiomKey == ConstantsBreadCrumbs.c_GeneratedBy  #<---Need to follow up with Biom as always BiomValue = "" even though in the file has a value
+			or BiomKey == ConstantsBreadCrumbs.c_strDateKey):  #Same as above
+				BiomCommonArea = AbundanceTable._funcInsertKeyToCommonArea(BiomCommonArea, BiomKey, BiomValue)
+
+
+			if BiomKey == ConstantsBreadCrumbs.c_rows:
+				iMaxIdLen = 0 
+				for iIndexRowMetaData in range(0, len(BiomValue)):
+					if ConstantsBreadCrumbs.c_id_lowercase in BiomValue[iIndexRowMetaData]:
+						sBugName = BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase]
+						dBugNames.append(sBugName) 	 #Post to the bug table
+						if len(sBugName) > iMaxIdLen:    #We  are calculating dynamically the length of the ID
+							iMaxIdLen  =  len(sBugName)
+		
+				if ConstantsBreadCrumbs.c_metadata_lowercase in BiomValue[0] and BiomValue[0][ConstantsBreadCrumbs.c_metadata_lowercase] != None :
+ 					dRowsMetadata = AbundanceTable._funcBiomBuildRowMetadata(BiomValue,  iMaxIdLen )
+
+ 
+			if BiomKey == ConstantsBreadCrumbs.c_columns:
+				BiomCommonArea = AbundanceTable._funcDecodeBiomMetadata(BiomCommonArea, BiomValue, iMaxIdLen)	#Call the subroutine to Build the metadata
+ 
+	 
+		#*******************************************
+		#* Build the TaxData                       *
+		#*******************************************
+	
+		BiomTaxDataWork = list()			#Initlialize TaxData
+		BiomObservations = BiomTable.iterObservations(conv_to_np=True)		#Invoke biom method to fetch data from the biom file
+		for BiomObservationData in BiomObservations:
+			sBugName = str( BiomObservationData[1])
+			BiomTaxDataEntry = list()
+			BiomTaxDataEntry.append(sBugName)
+			BiomObservationsValues = BiomObservationData[0]
+			for BiomDataValue in BiomObservationsValues:
+				BiomTaxDataEntry.append(BiomDataValue)
+			BiomTaxDataWork.append(tuple(BiomTaxDataEntry))	
+	
+		BiomCommonArea[ConstantsBreadCrumbs.c_BiomTaxData] = np.array(BiomTaxDataWork,dtype=np.dtype(BiomCommonArea[ConstantsBreadCrumbs.c_Dtype]))
+		BiomCommonArea[ConstantsBreadCrumbs.c_dRowsMetadata] = RowMetadata(dRowsMetadata)
+		del(BiomCommonArea[ConstantsBreadCrumbs.c_Dtype])			#Not needed anymore
+ 
+		return BiomCommonArea
+	
+
+	@staticmethod
+	def _funcDecodeBiomMetadata(BiomCommonArea,  BiomValue = None,  iMaxIdLen=0 ):	
+		"""
+		Decode the Biom Metadata and build:  
+			1. BiomCommonArea['Metadata'] 
+			2. BiomCommonArea['Dtype']
+			3. BiomCommonArea['sLastMetadata']
+			4. BiomCommonArea['column_metadata_id'] - This is a string which is the name of the column id
+			These elements will be formatted and passed down the line to build the AbundanceTable
+ 		:param	BiomValue:	The "columns" Metadata from the biom file (Contains the Metadata information)
+		:type:	dict()	 
+		:param	iMaxIdLen:	 The maximum length of a row ID
+		:type:	Integer	
+		:return:   BiomCommonArea 
+		:type:	dict()		
+		"""
+
+		BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata] = None	#Initialize the LastMetadata element 
+		BiomCommonArea['dRowsMetadata'] = None				#Initialize for cases that there is no metadata in the rows
+
+		strLastMetadata = None
+		strIDMetadata = None
+
+		lenBiomValue = len(BiomValue)
+		BiomMetadata = dict()				 
+		for cntMetadata in range(0, lenBiomValue):
+			BiomMetadataEntry = BiomValue[cntMetadata]
+ 
+			for key, value in BiomMetadataEntry.iteritems(): 		#Loop on the entries
+ 				if key == ConstantsBreadCrumbs.c_id_lowercase:		#If id - process it
+					strIDMetadata = ConstantsBreadCrumbs.c_ID
+					if  ConstantsBreadCrumbs.c_ID  not in BiomMetadata:	#If ID  not in the common area - initalize it
+						BiomMetadata[ConstantsBreadCrumbs.c_ID] = list() #Initialize a list
+						for indx in range(0, lenBiomValue):			#And post the values
+							BiomMetadata[ConstantsBreadCrumbs.c_ID].append(None)
+					BiomMetadata[ConstantsBreadCrumbs.c_ID][cntMetadata] = value.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+ 
+				if  key == ConstantsBreadCrumbs.c_metadata_lowercase:		#If key = metadata
+					if  not value is None:					#And value is not empty
+						MetadataDict = value				#Initialize a dictionary and post the values
+						for MDkey, MDvalue in MetadataDict.iteritems():
+							if type(MDkey) == unicode :
+								MDkeyAscii = MDkey.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+							else:
+								MDkeyAscii = MDkey 
+							if type(MDvalue) == unicode:
+								MDvalueAscii = MDvalue.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+							else:
+								MDvalueAscii = MDvalue 
+							
+							if  len(MDkeyAscii) > 0:		#Search for the last metadata
+									if not strIDMetadata:
+										strIDMetadata = MDkeyAscii
+									BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata] =  MDkeyAscii #Set the last Metadata
+							if  MDkeyAscii  not in BiomMetadata:
+								BiomMetadata[MDkeyAscii] = list()
+								for indx in range(0, lenBiomValue):
+									BiomMetadata[MDkeyAscii].append(None)
+							BiomMetadata[MDkeyAscii][cntMetadata] = MDvalueAscii 
+ 
+
+		BiomCommonArea[ConstantsBreadCrumbs.c_Metadata] = BiomMetadata
+		BiomCommonArea[ConstantsBreadCrumbs.c_MetadataID] = strIDMetadata
+		
+		#**********************************************
+		#*    Build dtype                             *
+		#**********************************************
+
+		BiomDtype = list()
+		iMaxIdLen+=10 #Increase it by 10
+		BiomDtypeEntry = list()
+		FirstValue = ConstantsBreadCrumbs.c_ID
+		SecondValue = "a" + str(iMaxIdLen)
+		BiomDtypeEntry.append(FirstValue)
+		BiomDtypeEntry.append(SecondValue)
+		BiomDtype.append(tuple(BiomDtypeEntry))
+
+		for a in BiomMetadata[ConstantsBreadCrumbs.c_ID]:
+				BiomDtypeEntry = list()
+				FirstValue =  a.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+				SecondValue = ConstantsBreadCrumbs.c_f4 
+				BiomDtypeEntry.append(FirstValue)
+				BiomDtypeEntry.append(SecondValue)
+				BiomDtype.append(tuple(BiomDtypeEntry))
+				
+		BiomCommonArea[ConstantsBreadCrumbs.c_Dtype] = BiomDtype
+		return BiomCommonArea
+
+	@staticmethod
+	def _funcBiomBuildRowMetadata( BiomValue, iMaxIdLen ):	
+		"""
+		Builds the row metadata from a BIOM value
+
+  		:param	BiomValue:	BIOM Value from the BIOM JSON parsing
+		:type:			Complex dict of string pairs and dicts
+		:param	iMaxIdLen:	Maximum length of all the IDs
+		:type:			int
+		:return:		dictRowsMetadata - np Array containing the rows metadata
+		:type:			{string feature id: {'metadata': {'taxonomy': [list of metadata values]}}}	
+		"""	
+		# Build the input dict for RowMetadata from a dict of dicts from a BIOM file 
+		dictRowsMetadata = dict()
+		for iIndexRowMetaData in range(0, len(BiomValue)):
+			dictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])] = dict()
+			RowMetadataEntryFromTable = BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_metadata_lowercase]
+			dMetadataTempDict = dict()
+			for key, value in RowMetadataEntryFromTable.iteritems():
+				dMetadataTempDict[key] = value
+			dictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])][ConstantsBreadCrumbs.c_metadata_lowercase] = dMetadataTempDict
+		return dictRowsMetadata
+
+	@staticmethod
+	def _funcInsertKeyToCommonArea(BiomCommonArea, BiomKey, BiomValue):
+		"""
+		Inserts the keys into the BiomCommonArea["BiomFileInfo"]
+  		:param	BiomCommonArea   - The common area that has been built before
+		:type:	dict()
+		:param	BiomKey - The current key (eg. format, date, generated by)
+		:type:	str
+		:param	BiomValue - The current value of the key (eg. for format: "Biological Observation Matrix 0.9.1")
+		:type:	str
+		:return:   BiomCommonArea  - The updated common area
+		:type:	dict()		
+		"""	
+	
+		if ConstantsBreadCrumbs.c_BiomFileInfo not in BiomCommonArea:
+				BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo] = dict()
+			
+		strInsertKey = BiomKey			#Set Default - But it is now always the same... (eg. URL is not: format_url -->url and others)
+		PostBiomValue = BiomValue		#The default value to be posted 
+		if  BiomKey == ConstantsBreadCrumbs.c_strFormatUrl:
+			strInsertKey = ConstantsBreadCrumbs.c_strURLKey
+			
+		if  BiomKey == ConstantsBreadCrumbs.c_MatrixTtype:
+			strInsertKey = ConstantsBreadCrumbs.c_strSparsityKey
+			
+		if  BiomKey == ConstantsBreadCrumbs.c_GeneratedBy:
+			PostBiomValue = None
+
+		if  BiomKey == ConstantsBreadCrumbs.c_strDateKey:
+			PostBiomValue = None			
+			
+		BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo][strInsertKey] = PostBiomValue
+		return BiomCommonArea
+		
diff --git a/lefsebiom/CClade.py b/lefsebiom/CClade.py
new file mode 100644
index 0000000..5bd1c44
--- /dev/null
+++ b/lefsebiom/CClade.py
@@ -0,0 +1,181 @@
+"""
+Author: Curtis Huttenhower
+Description: Used to create tree structures to hierarchically normalize abundance tables.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Curtis Huttenhower"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Curtis Huttenhower"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle at sph.harvard.edu"
+__status__ = "Development"
+
+import blist
+import sys
+
+class CClade:
+
+	def __init__( self ):
+		"""
+		Initialize CClade
+		Dictionary to hold the children nodes from feature consensus lineages.
+		adValues is a list of the abundance value.
+		"""
+		
+		self.m_hashChildren = {}
+		self.m_adValues = None
+
+	def get( self, astrClade ):
+		"""
+		Recursively travel the length of a tree until you find the terminal node
+		(where astrClade == Falseor actually [])
+		or a dict key that matches the clade call.
+		If at any time a clade is given that is not currently know, return a new clade
+		which is set to the current Clade as a child.
+		"""
+		
+		return self.m_hashChildren.setdefault(
+			astrClade[0], CClade( ) ).get( astrClade[1:] ) if astrClade else self
+
+	def set( self, adValues ):
+		"""
+        Set all the values given as a list in the same order given.
+		"""
+		
+		self.m_adValues = blist.blist( [0] ) * len( adValues )
+		for i, d in enumerate( adValues ):
+			if d:
+				self.m_adValues[i] = d
+
+	def impute( self ):
+		"""
+		This allows you to recursively impute values for clades without values given their children counts.
+		Assumably this should be called only once and after all clade abundances have been added.
+		If the m_adValues already exist return the stored m_adValues. (No imputation needed).
+		Otherwise call impute for all children and take the sum of the values from all the children by column
+		Not a sum of a list but summing a list with lists by element.
+		"""
+		
+        #If values do not exist
+		if not self.m_adValues:
+            #Call impute on all children
+            #If the parent clade has no abundance values
+            #Then take a copy of the child's
+            #If they now have a copy of a child's but have other children
+            #Sum their children with thier current values
+			for pChild in self.m_hashChildren.values( ):
+				adChild = pChild.impute( )
+				if self.m_adValues:
+					for i in range( len( adChild or [] ) ):
+						if adChild[i]:
+							self.m_adValues[i] += adChild[i]
+				elif adChild:
+					self.m_adValues = adChild[:] 
+		#If values exist return			
+		return self.m_adValues
+	
+	def _freeze( self, hashValues, iTarget, astrClade, iDepth, fLeaves ):
+		"""
+		Update the given hashValues dict with clade abundances given depth specifications
+		Return a set of integers returning an indicator of the structure of the tree preserved in the dict/hash
+		When the appropriate level of the tree is reached
+		Hashvalue is updated with the clade (including lineage) and the abundance. looks like {"clade":blist(["0.0","0.1"...])}
+		"""
+		
+        #fHit is true on atleast one of the following conditions:
+        #iTarget is not 0 indicating no changes
+        #Leaves are indicated to be only given and the target depth for the leaves is reached.
+        #The required depth is reached.
+		fHit = ( not iTarget ) or ( ( fLeaves and ( iDepth == iTarget ) ) or ( ( not fLeaves ) and ( iDepth <= iTarget ) ) )
+                #Increment depth
+		iDepth += 1
+                #Returns a set
+		setiRet = set()
+                #If there are children build integer set indicating structure of the tree preserved in the dict
+		if self.m_hashChildren:
+                        #Union all the results from freeze of all children
+                        #Call freeze but append the child clade to the clade in the call.
+                        #And give an incremented depth
+			for strChild, pChild in self.m_hashChildren.items( ):
+				setiRet |= pChild._freeze( hashValues, iTarget, astrClade + [strChild], iDepth, fLeaves )
+			setiRet = set( ( i + 1 ) for i in setiRet )
+		else:
+			setiRet.add( 0 )
+                #Indicate if the correct level is reached
+		if iTarget < 0:
+			if fLeaves:
+				fHit = -( iTarget + 1 ) in setiRet
+			else:
+				fHit = -( iTarget + 1 ) <= max( setiRet )
+                #if astrClade is not == [] (so you are actually in a clade of the tree)
+                #And the clade has values (should be true, if not impute should have been callded before running this method)
+                #And we are at the correct level of the tree then
+                #Add to the dict the clade and the abundance values
+		if astrClade and self.m_adValues and fHit:
+			hashValues["|".join( astrClade )] = self.m_adValues
+		return setiRet
+	
+	def freeze( self, hashValues, iTarget, fLeaves ):
+		"""
+		Call helper function setting the clade and depth to defaults (start positions)
+		The important result of this method is hashValues is updated with clade and abundance information
+		"""
+		self._freeze( hashValues, iTarget, [], 0, fLeaves )
+
+	def _repr( self, strClade ):
+		"""
+		Represent tree clade for debugging. Helper function for recursive repr.
+		"""
+
+		strRet = "<"
+		if strClade:
+			strRet += "%s %s" % (strClade, self.m_adValues)
+			if self.m_hashChildren:
+				strRet += " "
+		if self.m_hashChildren:
+			strRet += " ".join( p._repr( s ) for (s, p) in self.m_hashChildren.items( ) )
+		
+		return ( strRet + ">" )
+		
+	def __repr__( self ):
+		"""
+		Represent tree clade for debugging.
+		"""
+		return self._repr( "" )
+
+"""
+pTree = CClade( )
+pTree.get( ("A", "B") ).set( [1, 2, 3] )
+pTree.get( ("A", "C") ).set( [4, 5, 6] )
+pTree.get( ("D", "E") ).set( [7, 8, 9] )
+iTaxa = 0
+if iTaxa:
+	pTree.impute( )
+hashFeatures = {}
+pTree.freeze( hashFeatures, iTaxa )
+print( pTree )
+print( hashFeatures )
+sys.exit( 0 )
+#"""
diff --git a/lefsebiom/ConstantsBreadCrumbs.py b/lefsebiom/ConstantsBreadCrumbs.py
new file mode 100755
index 0000000..d99425c
--- /dev/null
+++ b/lefsebiom/ConstantsBreadCrumbs.py
@@ -0,0 +1,155 @@
+"""
+Author: Timothy Tickle
+Description: Project constants.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle at sph.harvard.edu"
+__status__ = "Development"
+
+##
+#Used to test the FileIO class
+class ConstantsBreadCrumbs():
+    """
+    Class to hold project constants.
+    """
+
+    #Character Constants
+    c_strComma = ','
+    c_strColon = ':'
+    c_strConfigFileHeaderChar = '['
+    c_strConfigFileCommentChar = '#'
+    c_strEndline = '\n'
+    c_strExtDelim = '.'
+    c_cFastaIDLineStart = '>'
+    c_strPathDelim = '/'
+    c_cPipe = '|'
+    c_cQuote = '\"'
+    c_cTab = '\t'
+    c_strWhiteSpace = ' '
+    c_matrixFileDelim = '\t'
+
+    c_strBreadCrumbsSVMSpace = c_strWhiteSpace
+
+    #Default values for missing data in the Abundance Table
+    c_strEmptyAbundanceData = "0"
+    c_strEmptyDataMetadata = "NA"
+    c_strSVMNoSample = "-"
+
+    lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata]))
+
+    #TODO remove
+    #Reference to circlader
+    c_strCircladerScript = "circlader/circlader.py"
+
+    #AbundanceTable
+    #Suffix given to a file that is check with the checkRawDataFile method
+    OUTPUT_SUFFIX = "-checked.pcl"
+
+    #BIOM related
+    #PCL File metadata defaults (many of these come from biom file requirements
+    #ID
+    c_strIDKey = "id"
+    c_strDefaultPCLID = None
+
+    #File date
+    c_strDateKey = "date"
+
+    #File format type
+    c_strFormatKey = "format"
+    c_strDefaultPCLFileFormateType = "PCL"
+
+    #File generation source
+    c_strSourceKey = "source"
+    c_strDefaultPCLGenerationSource = None
+
+    #File type
+    c_strTypekey = "type"
+    c_strDefaultPCLFileTpe = None
+
+    #Allowable file types for biom files
+    c_strOTUType = "OTU"
+    c_strOTUBIOMType = "OTU table"
+    c_strPathwayType = "Pathway"
+    c_strPathwayBIOMType = "Pathway table"
+    c_strFunctionType = "Function"
+    c_strFunctionBIOMType = "Function table"
+    c_strOrthologType = "Ortholog"
+    c_strOrthologBIOMType = "Ortholog table"
+    c_strGeneType = "Gene"
+    c_strGeneBIOMType = "Gene table"
+    c_strMetaboliteType = "Metabolite"
+    c_strMetaboliteBIOMType = "Metabolite table"
+    c_strTaxonType = "Taxon"
+    c_strTaxonBIOMType = "Taxon table"
+    c_dictFileType = {c_strOTUType:c_strOTUBIOMType, c_strPathwayType:c_strPathwayBIOMType, c_strFunctionType:c_strFunctionBIOMType, c_strOrthologType:c_strOrthologBIOMType, c_strGeneType:c_strGeneBIOMType, c_strMetaboliteType:c_strMetaboliteBIOMType, c_strTaxonType:c_strTaxonType}
+
+    #File URL
+    c_strURLKey = "url"
+    c_strDefaultPCLURL = None
+    c_strFormatUrl =  "format_url"
+
+    #File sparse matrix
+    c_strSparsityKey = "sparsity"
+    c_fDefaultPCLSparsity = False
+
+    # BIOM related Data
+    # Data shape
+    c_strDataShapeKey = "shape"
+
+	######################################################################
+	# Constants related to biom import and export files                  #
+	######################################################################
+    # Biom file extension
+    c_strBiomFile = "biom"
+    c_BiomTaxData = "BiomTaxData"
+    c_MetadataID = "column_metadata_id"
+    c_Metadata = "Metadata"
+    c_metadata_lowercase = "metadata"
+    c_sLastMetadata = "sLastMetadata"
+    c_columns = "columns"	
+    c_rows = "rows"
+    c_ascii = "ascii"	
+    c_ignore = "ignore"	
+    c_Dtype = "Dtype"	
+    c_ID = "ID"	
+    c_id_lowercase = "id"	
+    c_f4 = "f8"		
+    c_biom_file_generated_by = "BreadCrumbs"
+    c_strPCLFile = "pcl"
+    c_taxonomy = "taxonomy"
+    c_dRowsMetadata = "dRowsMetadata"
+    c_BiomFileInfo = "BiomFileInfo"
+    c_MatrixTtype = "matrix_type"
+    c_GeneratedBy = "generated_by"
+    c_MetadataEntriesTotal = "MetadataEntriesTotal"
+    c_MaximumLength = "MaximumLength"
+
+
+    def __init__(self):
+      pass
diff --git a/lefsebiom/ValidateData.py b/lefsebiom/ValidateData.py
new file mode 100755
index 0000000..5943240
--- /dev/null
+++ b/lefsebiom/ValidateData.py
@@ -0,0 +1,624 @@
+"""
+Author: Timothy Tickle
+Description: Validate Data containing methods for testing variables.
+"""
+
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle at sph.harvard.edu"
+__status__ = "Development"
+
+#Import local code
+from types import *
+import decimal
+import os
+import re
+import string
+
+class ValidateData:
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidBoolean(parameterValue):
+        """
+        Validates a parameter as a valid boolean.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid boolean.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a string
+        if not type(parameterValue) is BooleanType:
+            return False
+        return True
+
+    #Tested 6
+    @staticmethod
+    def funcIsTrue(parameterValue):
+        """
+        Validates a parameter as true.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is True.
+        :type	Boolean
+        """
+
+        if(ValidateData.funcIsValidBoolean(parameterValue)):
+            if(parameterValue == True):
+                return True
+        return False
+
+    #Tested 6
+    @staticmethod
+    def funcIsFalse(parameterValue):
+        """
+        Validates a parameter as false.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is False.
+        :type	Boolean
+        """
+
+        if(ValidateData.funcIsValidBoolean(parameterValue)):
+            if(parameterValue == False):
+                return True
+        return False
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidInteger(parameterValue):
+        """
+        Validates a parameter as an integer.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is an integer.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if (parameterValue == None):
+            return False
+
+        #Check to make sure it is an integer
+        if not type(parameterValue) is IntType:
+            return False
+
+        return True
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidPositiveInteger(parameterValue, tempZero = False):
+        """
+        Validates a parameter as false.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :param	tempZero:	Allows one to set what the value for zero should return.
+        :type	Boolean	The return value for zero.
+        :return	Boolean:	True indicates the parameter is a positive integer.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if not ValidateData.funcIsValidInteger(parameterValue):
+            return False
+
+        #Check to see it is positive
+        if (parameterValue < 0):
+            return False
+
+        #Check for zero value
+        if(parameterValue == 0):
+            return tempZero
+        return True
+
+    #Tested 14
+    @staticmethod
+    def funcIsValidNumeric(parameterValue):
+        """
+        Validates a parameter as an integer.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a numeric.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if (parameterValue == None):
+            return False
+        #Check to make sure it is an integer
+        if((type(parameterValue) == IntType)or(type(parameterValue) == LongType)or(type(parameterValue) == FloatType)or(type(parameterValue) == ComplexType)or(str(type(parameterValue)) == "<type 'numpy.float64'>")):
+            if(not type(parameterValue) == BooleanType):
+                return True
+        return False
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidStringType(parameterValue):
+        """
+        Validates a parameter as a string. This allows the string to be blank or empty.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a string type.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a string
+        if not type(parameterValue) is StringType:
+            return False
+
+        return True
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidString(parameterValue):
+        """
+        Validates a parameter as a string. Does NOT allow string to be blank or empty.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a string.
+        :type	Boolean
+        """
+
+        #Type check
+        if not ValidateData.funcIsValidStringType(parameterValue):
+            return False
+
+        #Check to see it is not blank
+        if parameterValue.strip() == "":
+            return False
+        return True
+
+    @staticmethod
+    def funcIsValidStringInt(parameterValue):
+        """
+        Validates a parameter that is a string as a format which is an integer.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        """
+
+        #Type string check
+        if not ValidateData.funcIsValidStringType(parameterValue):
+            return False
+
+        #Check to see if the string can be converted to an integer
+        try:
+            int(parameterValue)
+        except:
+            return False
+        return True
+
+    @staticmethod
+    def funcIsValidStringFloat(parameterValue):
+        """
+        Validates a parameter that is a string as a format which is a numeric.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        """
+
+        #Type string check
+        if not ValidateData.funcIsValidStringType(parameterValue):
+            return False
+
+        #Check to see if the string can be converted to a double
+        try:
+            float(parameterValue)
+        except:
+            return False
+        return True
+
+    #Tested 6
+    @staticmethod
+    def funcIsValidFormatString(parameterValue):
+        """
+        Validates a parameter as a valid format string.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        lettersValid = False
+        if ValidateData.funcIsValidString(parameterValue):
+            validChars = "BbcdfHhIiLlPpsx0123456789"
+            for letter in parameterValue:
+                lettersValid = letter in validChars
+                if(not lettersValid):
+                    break
+        return lettersValid
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidChar(parameterValue):
+        """
+        Validates a parameter as a valid character.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        return ValidateData.funcIsValidString(parameterValue)
+
+    #Tested 13
+    @staticmethod
+    def funcIsValidPositiveNumberChar(parameterValue):
+        """
+        Validates a parameter as a valid character representing a number.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        #Check to make sure is a valid string
+        if not ValidateData.funcIsValidString(parameterValue):
+            return False
+
+        #Try to convert to decimal
+        try:
+            decimalConversion = decimal.Decimal(parameterValue)
+            if decimalConversion < 0:
+                return False
+        except:
+            return False
+        return True
+
+    #Tested 9
+    @staticmethod
+    def funcIsValidFlagChar(parameterValue):
+        """
+        Validates a parameter as a valid character representing a boolean.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        if parameterValue == '0' or parameterValue == "0" or parameterValue == '1' or parameterValue == "1":
+            return True
+        return False
+
+    #Tested 15
+    @staticmethod
+    def funcIsValidBoundedIntegerChar(parameterValue, iValueOne, iValueTwo):
+        """
+        Validates a parameter as a valid characater that represents an integer inclusively bounded by two given values.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :param	iValueOne:	One bound for the value.
+        :type	Integer
+        :param	iValueTwo:	The other bound for the data.
+        :type	Integer
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        #Check to make sure is a valid string
+        if not ValidateData.funcIsValidString(parameterValue):
+            return False
+
+        #Check to make sure is a valid integer
+        if not ValidateData.funcIsValidInteger(iValueOne):
+            return False
+
+        #Check to make sure is a valid integer
+        if not ValidateData.funcIsValidInteger(iValueTwo):
+            return False
+
+        #Try to convert to decimal
+        try:
+            intConversion = int(parameterValue)
+            if(iValueOne < iValueTwo):
+                if ((intConversion >= iValueOne) and (intConversion <= iValueTwo)):
+                    return True
+                return False
+            if(iValueTwo < iValueOne):
+                if ((intConversion >= iValueTwo) and (intConversion <= iValueOne)):
+                    return True
+                return False
+            if(iValueOne == iValueTwo):
+                if (intConversion == iValueOne):
+                    return True
+                return False
+        except:
+            return False
+
+    #Tested 9
+    @staticmethod
+    def funcIsValidList(parameterValue):
+        """
+        Validates a parameter as a list.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a list
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a list
+        if not type(parameterValue) is ListType:
+            return False
+
+        #Check elements
+        listSize = len(parameterValue)
+        for i in range(0,listSize):
+            if parameterValue[i] == None:
+                return False
+            if type(parameterValue[i]) is ListType:
+                if ValidateData.funcIsValidList(parameterValue[i]) == False:
+                    return False
+        return True
+
+    #Tested 9
+    @staticmethod
+    def funcIsValidTuple(parameterValue):
+        """
+        Validates a parameter as a tuple.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a tuple
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a string
+        if not type(parameterValue) is TupleType:
+            return False
+
+        #Check elements
+        tupleSize = len(parameterValue)
+        for i in range(0,tupleSize):
+            if parameterValue[i] == None:
+                return False
+            if type(parameterValue[i]) is TupleType:
+                if ValidateData.funcIsValidTuple(parameterValue[i]) == False:
+                    return False
+        return True
+
+    #Tested 7
+    @staticmethod
+    def funcIsValidNumericList(parameterValue):
+        """
+        Validates a parameter as a list of numeric values.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a list of numeric values.
+        :type	Boolean
+        """
+
+        #Check is valid list
+        if(not ValidateData.funcIsValidList(parameterValue)):
+            return False
+
+        #Check elements
+        listSize = len(parameterValue)
+        for i in xrange(0,listSize):
+            if(not ValidateData.funcIsValidNumeric(parameterValue[i])):
+                return False
+        return True
+
+    #Tested 7
+    @staticmethod
+    def funcIsValidStringList(parameterValue):
+        """
+        Validates a parameter as a list of string values.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a list of string values.
+        :type	Boolean
+        """
+
+        #Check is valid list
+        if(not ValidateData.funcIsValidList(parameterValue)):
+            return False
+
+        #Check elements
+        listSize = len(parameterValue)
+        for i in xrange(0,listSize):
+            if(not ValidateData.funcIsValidString(parameterValue[i])):
+                return False
+        return True
+
+    #Tested 4
+    @staticmethod
+    def funcIsValidNPArray(parameterValue):
+        """
+        Validates a parameter as a numpy array.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a numpy array.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a structure array
+        if not str(type(parameterValue)) == "<type 'numpy.ndarray'>":
+            return False
+
+        return True
+
+    #Tested 9
+    @staticmethod
+    def funcIsValidDictionary(parameterValue):
+        """
+        Validates a parameter as a dictionary.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a dictionary.
+        :type	Boolean
+        """
+
+        #Check to make sure it is not null
+        if parameterValue == None:
+            return False
+
+        #Check to make sure it is a string
+        if not type(parameterValue) is DictType:
+            return False
+
+        #Check key elements
+        keyList = parameterValue.keys()
+        keyListSize = len(keyList)
+        for i in range(0,keyListSize):
+            if keyList[i] == None:
+                return False
+            if type(keyList[i]) is ListType:
+                if validateData.funcIsValidList(keyList[i]) == False:
+                    return False
+
+        #Check key elements
+        itemList = parameterValue.values()
+        itemListSize = len(itemList)
+
+        for i in range(0,itemListSize):
+            if itemList[i] == None:
+                return False
+            if type(itemList[i]) is ListType:
+                if ValidateData.funcIsValidList(itemList[i]) == False:
+                    return False
+        return True
+
+    #Tested 18
+    @staticmethod
+    def funcIsValidDNASequence(parameterValue):
+        """
+        Validates a parameter as a valid DNA sequence.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        if ValidateData.funcIsValidString(parameterValue):
+            expression = re.compile(r'[^atcgATCG]')
+            if not None == expression.search(parameterValue):
+                return False
+            return True
+        return False
+
+    #Tested 15
+    @staticmethod
+    def funcIsValidNucleotideBase(parameterValue):
+        """
+        Validates a parameter as a character which is a valid nucleotide representation.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        if (ValidateData.funcIsValidDNASequence(parameterValue) or (parameterValue == 'u') or (parameterValue == "U")):
+            if (len(parameterValue) == 1):
+                return True
+        return False
+
+    #Testing 4
+    @staticmethod
+    def funcIsValidFileName(parameterValue):
+        """
+        Validates a parameter as a valid file name.
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid file path.
+        :type	Boolean
+        """
+
+        if parameterValue is None:
+            return False
+        elif(ValidateData.funcIsValidString(parameterValue)):
+            return os.path.exists(parameterValue)
+        return False
+
+    #Tested 5
+    @staticmethod
+    def funcIsValidClass(parameterValue, strCorrectName):
+        """
+        Validates a parameter as a valid class (of a specifc type given by name).
+
+        :param	parameterValue:	Value to be evaluated.
+        :type	Unknown
+        :param	strCorrectName:	Name of te class the parameter should be.
+        :type	Unknown
+        :return	Boolean:	True indicates the parameter is a valid value.
+        :type	Boolean
+        """
+
+        if(parameterValue==None):
+            return False
+        if not ValidateData.funcIsValidString(strCorrectName):
+            return False
+        classType = type(parameterValue).__name__
+        if(classType == strCorrectName):
+            return True
+        if(classType == 'instance'):
+            if(parameterValue.__class__.__name__==strCorrectName):
+                return True
+            else:
+                return False
+        return False
diff --git a/lefsebiom/__init__.py b/lefsebiom/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/plot_cladogram.py b/plot_cladogram.py
new file mode 100755
index 0000000..1b95358
--- /dev/null
+++ b/plot_cladogram.py
@@ -0,0 +1,342 @@
+#!/usr/bin/env python
+
+import os,sys,matplotlib,argparse,string
+matplotlib.use('Agg')
+from pylab import *
+from lefse import *
+import numpy as np
+
+colors = ['r','g','b','m','c',[1.0,0.5,0.0],[0.0,1.0,0.0],[0.33,0.125,0.0],[0.75,0.75,0.75],'k']
+dark_colors = [[0.4,0.0,0.0],[0.0,0.2,0.0],[0.0,0.0,0.4],'m','c',[1.0,0.5,0.0],[0.0,1.0,0.0],[0.33,0.125,0.0],[0.75,0.75,0.75],'k']
+
+class CladeNode:
+	def __init__(self, name, abundance, viz = True):
+     		self.id = name
+     		self.name = name.split(".")
+		self.last_name = self.name[-1]
+		self.abundance = abundance
+		self.pos = (-1.0,-1.0)
+		self.children = {}
+		self.isleaf = True
+		self.color = 'y'
+		self.next_leaf = -1
+		self.prev_leaf = -1
+		self.viz = viz
+	def __repr__(self):
+		return self.last_name
+	def add_child(self,node):
+		self.isleaf = False
+		self.children[node.__repr__()] = node
+	def get_children(self):
+		ck = sorted(self.children.keys())
+		return [self.children[k] for k in ck]
+	def get_color(self):
+		return self.color
+	def set_color(self,c):
+		self.color = c
+	def set_pos(self,pos):
+		self.pos = pos
+
+def read_params(args):
+	parser = argparse.ArgumentParser(description='Cladoplot')
+	parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="tab delimited input file")
+	parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output image")
+	parser.add_argument('--clade_sep',dest="clade_sep", type=float, default=1.5)
+	parser.add_argument('--max_lev',dest="max_lev", type=int, default=-1)
+	parser.add_argument('--max_point_size',dest="max_point_size", type=float, default=6.0)
+	parser.add_argument('--min_point_size',dest="min_point_size", type=float, default=1)
+	parser.add_argument('--point_edge_width',dest="markeredgewidth", type=float, default=.25)
+	parser.add_argument('--siblings_connector_width',dest="siblings_connector_width", type=float, default=2)
+	parser.add_argument('--parents_connector_width',dest="parents_connector_width", type=float, default=0.75)
+	parser.add_argument('--radial_start_lev',dest="radial_start_lev", type=int, default=1)
+	parser.add_argument('--labeled_start_lev',dest="labeled_start_lev", type=int, default=2)
+	parser.add_argument('--labeled_stop_lev',dest="labeled_stop_lev", type=int, default=5)
+	parser.add_argument('--abrv_start_lev',dest="abrv_start_lev", type=int, default=3)
+	parser.add_argument('--abrv_stop_lev',dest="abrv_stop_lev", type=int, default=5)
+	parser.add_argument('--expand_void_lev',dest="expand_void_lev", type=int, default=1)
+	parser.add_argument('--class_legend_vis',dest="class_legend_vis", type=int, default=1)
+	parser.add_argument('--colored_connector',dest="colored_connectors", type=int, default=1)
+	parser.add_argument('--alpha',dest="alpha", type=float, default=0.2)
+	parser.add_argument('--title',dest="title", type=str, default="Cladogram")
+	parser.add_argument('--sub_clade',dest="sub_clade", type=str, default="")
+	parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="14")
+	parser.add_argument('--right_space_prop',dest="r_prop", type=float, default=0.1)
+	parser.add_argument('--left_space_prop',dest="l_prop", type=float, default=0.1)
+	parser.add_argument('--label_font_size',dest="label_font_size", type=str, default="6")
+	parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
+	parser.add_argument('--colored_labels',dest="col_lab", type=int, choices=[0,1], default=1, help="draw the label with class color (1) or in black (0)")
+	parser.add_argument('--class_legend_font_size',dest="class_legend_font_size", type=str, default="10")
+	parser.add_argument('--dpi',dest="dpi", type=int, default=72)
+	parser.add_argument('--format', dest="format", choices=["png","svg","pdf"], default="svg", type=str, help="the format for the output file")
+	parser.add_argument('--all_feats', dest="all_feats", type=str, default="")
+	args = parser.parse_args()
+	return vars(args) 
+
+def cmp_names(la,lb):
+	if len(la) != len(lb): return False
+	for p in [(a,b) for i,a in enumerate(la) for j,b in enumerate(lb) if i == j]:
+		if p[0] != p[1]: return False
+	return True	
+
+def build_tree(father,all_nodes,l,depth,viz):
+	cc = [n for n in all_nodes if len(n.name) > len(father.name) and cmp_names(father.name,n.name[:len(father.name)])]
+	children = [n for n in cc if len(n.name) == len(father.name)+1]
+	if len(children) == 0 and l < depth -1: # !!!
+		nc = CladeNode(father.id+"."+father.id.split(".")[-1],1.0,viz)
+		father.add_child(nc)
+		children.append(nc)
+	for child in children:
+		build_tree(child,cc,l+1,depth,viz)
+		father.add_child(child)
+
+def get_all_nodes(father):
+	ret = [father]
+	children = father.get_children()
+	for c in children:
+		ret += get_all_nodes(c)
+	return ret
+
+def read_data(input_file,params):
+	with open(input_file, 'r') as inp:
+		if params['sub_clade'] == "": rows = [line.strip().split()[:-1] for line in inp.readlines() if params['max_lev'] < 1 or line.split()[0].count(".") < params['max_lev']]
+		else: rows = [line.split(params['sub_clade']+".")[1].strip().split()[:-1] for line in inp.readlines() if ( params['max_lev'] < 1 or line.split()[0].count(".") < params['max_lev'] ) and line.startswith(params['sub_clade']+".")]
+	all_names = [lin[0] for lin in rows]
+	to_add = []
+
+	abundances = [float(v) for v in zip(*rows)[1] if v >= 0.0]
+	tree = {}
+	tree['classes'] = list(set([v[2] for v in rows if len(v)>2]))
+	tree['classes'].sort()
+	all_nodes = [CladeNode("root."+row[0],float(row[1])) for row in rows]
+
+	depth = max([len(n.name) for n in all_nodes])
+
+	n2 = ["_".join(nn.name) for nn in all_nodes]
+	for i,nn in enumerate(all_nodes):
+		n = nn
+		while "_".join(n.name[:-1]) not in n2 and len(n.name) > 1:
+			n = CladeNode(".".join(n.name[:-1]),n.abundance)
+			all_nodes.append(n)
+			n2.append("_".join(n.name))
+
+	cls2 = []
+        if params['all_feats'] != "":
+                cls2 = sorted(params['all_feats'].split(":"))
+	for i,v in enumerate(rows):
+		if len(v)>2:
+			if len(cls2) > 0: all_nodes[i].set_color(colors[cls2.index(v[2])%len(colors)])
+			else: 
+				if v[2].count('rgbcol') > 0:
+					ccc = [float(tt) for tt in v[2].split('_')[1:]]
+					all_nodes[i].set_color(ccc)
+				else: all_nodes[i].set_color(colors[sorted(tree['classes']).index(v[2])%len(colors)])	
+	root = CladeNode("root",-1.0)
+	root.set_pos((0.0,0.0))
+
+	build_tree(root,all_nodes,0,depth,params['expand_void_lev']==1)
+
+	all_nodes = get_all_nodes(root)
+	
+	tree['root'] = root
+	tree['max_abs'] = max(abundances)
+	tree['min_abs'] = min(abundances)
+	levs = []
+	for i in range(depth):
+		depthi = [n for n in all_nodes if len(n.name) == i+1]
+		levs.append(len(depthi))
+	tree['nlev'] = levs
+	return tree
+
+def add_all_pos(father,n,distn,seps,tsep,mlev,last_leaf=-1,nc=1):
+	children = father.get_children()
+	leaves = True if children[0].isleaf else False
+	for i,child in enumerate(children):
+		if leaves:
+			n += 1.0
+			men = 0.5 if len(children) == 1 else 0.0
+			child.set_pos((n*distn-men*float(distn)+tsep,(len(father.name))/float(mlev-1)))
+			if last_leaf != -1:
+				child.prev_leaf = last_leaf
+				last_leaf.next_leaf = child
+			last_leaf = child
+		else:
+			ln = n
+			ltsep = tsep 
+			n,tsep,last_leaf = add_all_pos(child,n,distn,seps,tsep,mlev,last_leaf,len(children))
+			nn = (ln + n)*0.5*distn
+			ssep = (ltsep + tsep)*0.5
+			if n-ln == 1:
+				ssep = ltsep
+			child.set_pos((nn+ssep,(len(father.name))/float(mlev-1)))
+	tsep += seps[len(father.name)-1]
+	return n,tsep,last_leaf
+
+def plot_points(father,params,pt_scale,ax):
+	children = father.get_children()
+	children.sort(key = lambda a: -int(a.get_color() == 'y')*a.abundance)
+	x,r = father.pos[0], father.pos[1]
+	for i,child in enumerate(children):
+		xc,rc = plot_points(child,params,pt_scale,ax)
+	if not father.viz: return x,r
+	ps = pt_scale[0]+father.abundance/pt_scale[1]+pt_scale[0]
+	col = father.get_color()
+	pw = params['markeredgewidth'] if col == 'y' else params['markeredgewidth']*3.0
+	if x==0 and r==0: ax.plot(x,r, 'o',markersize=ps,color=col,markeredgewidth=0.01,markeredgecolor=params['fore_color'])
+	else: ax.plot(x,r, 'o',markersize=ps,color=col,markeredgewidth=pw,markeredgecolor=params['fore_color'])
+	return x,r
+
+def plot_lines(father,params,depth,ax,xf):
+	children = father.get_children()
+	x,r = father.pos[0], father.pos[1]
+	for i,child in enumerate(children):
+		xc,rc = plot_lines(child,params,depth,ax,x)
+		if i == 0: x_first, r_first = xc, rc
+		if len(father.name) >= depth-params['radial_start_lev']: 
+			col = params['fore_color'] 
+			lw=params['parents_connector_width']
+			if not child.viz: continue
+			if father.get_color() != 'y' and father.get_color() == child.get_color() and params['colored_connectors']:
+				col = child.get_color()
+				lw *=2.5
+			if col != params['fore_color']:
+				ax.plot([x,xc],[r,rc],"-",color=params['fore_color'],lw=lw*1.5) 
+			ax.plot([x,xc],[r,rc],"-",color=col,lw=lw)
+	
+	if not father.viz or (len(children) == 1 and not children[0].viz): return x,r 
+	if len(father.name) < depth-params['radial_start_lev']:
+		col = params['fore_color'] 
+		lw=params['parents_connector_width']
+		if father.get_color() != 'y':
+			f =True
+			for child in children:
+				if child.get_color() != father.get_color() or not params['colored_connectors']:
+					f = False
+					break
+			if f: 
+				col = father.get_color()
+				lw *= 2.5 
+		if not (x==0 and r==0):
+			xx = xc if len(children) > 0 else x
+			if len(children) == 0: rc = r
+			xt = x if len(children)>1 else xx 
+			if col != params['fore_color']:
+				ax.plot([x,xt],[r,rc],"-",color=params['fore_color'],lw=lw*1.5)
+			ax.plot([x,xt],[r,rc],"-",color=col,lw=lw)
+	if len(children) > 0 and 1 < len(father.name) < depth-params['radial_start_lev']:
+		xs = arange(x_first,xc,0.01)
+		ys = [rc for t in xs]
+		ax.plot(xs,ys,"-",color=col,lw=params['siblings_connector_width'],markeredgecolor=params['fore_color'])
+	return x,r 
+
+def uniqueid():
+	for l in string.lowercase: yield l
+	for l in string.lowercase:
+		for i in range(10):
+			yield l+str(i)
+	i = 0
+   	while True:
+		yield str(i)
+		i += 1
+
+def plot_names(father,params,depth,ax,u_i,seps):
+	children = father.get_children()
+	l = len(father.name)
+	if len(children)==0:
+		if father.prev_leaf == -1 or father.next_leaf == -1:
+			fr_0, fr_1 = father.pos[0], father.pos[0]
+		else: fr_0, fr_1 =  (father.pos[0]+father.prev_leaf.pos[0])*0.5, (father.pos[0]+father.next_leaf.pos[0])*0.5
+        for i,child in enumerate(children):
+                fr,to = plot_names(child,params,depth,ax,u_i,seps)
+                if i == 0: fr_0 = fr
+		fr_1 = to 
+        if father.get_color() != 'y' and params['labeled_start_lev'] < l <= params['labeled_stop_lev']+1:
+                col = father.get_color()
+		dd = params['labeled_stop_lev'] - params['labeled_start_lev'] + 1 
+		de = depth - 1
+		dim = 1.0/float(de)
+		perc_ext = 0.65 if dim > 0.1 else 1.0 
+		clto = (de-l+1)*dim+dim*(dd+1-(l-dd-1))*perc_ext
+		clto = (de-l+1)*dim+dim*(dd-(l-params['labeled_start_lev'])+1)*perc_ext
+		des = float(180.0*(fr_0+fr_1)/np.pi)*0.5-90
+		lab = ""
+		txt = father.last_name
+		if params['abrv_start_lev']  < l <= params['abrv_stop_lev'] + 1:
+			ide = u_i.next()
+			lab = str(ide)+": "+father.last_name 
+			txt = str(ide)
+#		ax.bar(fr_0, clto, width = fr_1-fr_0, bottom = float(l-1)/float(depth-1), alpha = params['alpha'], color=col, edgecolor=col)
+		ax.bar(fr_0, clto, width = fr_1-fr_0, bottom = float(l-1)/float(de), alpha = params['alpha'], color=col, edgecolor=col)
+		ax.bar(0.0, 0.0, width = 0.0, bottom = 0.0, alpha = 1.0, color=col, edgecolor=params['fore_color'],  label=lab)
+		if l <= params['abrv_stop_lev'] + 1:
+			if not params['col_lab']: col = params['fore_color']
+			else: 
+				if col not in colors: col = params['fore_color']
+				else: col = dark_colors[colors.index(col)%len(dark_colors)]
+			ax.text((fr_0+fr_1)*0.5, clto+float(l-1)/float(de)-dim*perc_ext/2.0, txt, size = params['label_font_size'], rotation=des, ha ="center", va="center", color=col)	
+        return fr_0, fr_1
+
+def draw_tree(out_file,tree,params):
+	plt_size = 7
+	nlev = tree['nlev']
+	pt_scale = (params['min_point_size'],max(1.0,((tree['max_abs']-tree['min_abs']))/(params['max_point_size']-params['min_point_size'])))
+	depth = len(nlev)
+	sep = (2.0*np.pi)/float(nlev[-1]) 
+	seps = [params['clade_sep']*sep/float(depth-i+1) for i in range(1,len(tree['nlev'])+1)]
+	totseps = sum([s*nlev[i] for i,s in enumerate(seps[:-1])])
+	clade_sep_err = True if totseps > np.pi else False
+	while totseps > np.pi:
+		params['clade_sep'] *= 0.75 	 
+		seps = [params['clade_sep']*sep/(float(depth-i+1)*0.25) for i in range(1,len(tree['nlev'])+1)]
+		totseps = sum([s*nlev[i] for i,s in enumerate(seps[:-1])])
+	if clade_sep_err: print 'clade_sep parameter too large, lowered to',params['clade_sep']
+
+	fig = plt.figure(edgecolor=params['back_color'],facecolor=params['back_color'])
+	ax = fig.add_subplot(111, polar=True, frame_on=False, axis_bgcolor=params['back_color'] )
+	plt.subplots_adjust(right=1.0-params['r_prop'],left=params['l_prop']) 	
+	ax.grid(False)
+	xticks([])
+	yticks([])
+
+	ds = (2.0*np.pi-totseps)/float(nlev[-1])
+
+	add_all_pos(tree['root'],0.0,ds,seps,0.0,depth)
+	
+	plot_lines(tree['root'],params,depth,ax,0)
+	plot_points(tree['root'],params,pt_scale,ax)
+	plot_names(tree['root'],params,depth,ax,uniqueid(),seps)
+
+	r = np.arange(0, 3.0, 0.01)
+	theta = 2*np.pi*r
+	
+	def get_col_attr(x):
+    		return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
+
+	h, l = ax.get_legend_handles_labels()
+	if len(l) > 0:
+		leg = ax.legend(bbox_to_anchor=(1.05, 1), frameon=False, loc=2, borderaxespad=0.,prop={'size':params['label_font_size']})
+		if leg != None:
+			gca().add_artist(leg)
+			for o in leg.findobj(get_col_attr):
+	                        o.set_color(params['fore_color'])
+	
+	cll = sorted(tree['classes']) if params['all_feats'] == "" else sorted(params['all_feats'].split(":"))
+	nll = [ax.bar(0.0, 0.0, width = 0.0, bottom = 0.0, color=colors[i%len(colors)], label=c) for i,c in enumerate(cll) if c in tree['classes']]
+	cl = [c for c in cll if c in tree['classes']]
+
+	ax.set_title(params['title'],size=params['title_font_size'],color=params['fore_color'])
+
+	if params['class_legend_vis']:
+		l2 = legend(nll, cl, loc=2, prop={'size':params['class_legend_font_size']}, frameon=False)
+		if l2 != None:
+			for o in l2.findobj(get_col_attr):
+    				o.set_color(params['fore_color'])
+
+	plt.savefig(out_file,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
+	plt.close()	
+
+if __name__ == '__main__':
+	params = read_params(sys.argv)
+	params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
+	clad_tree = read_data(params['input_file'],params)	
+	draw_tree(params['output_file'],clad_tree,params)
+	
diff --git a/plot_features.py b/plot_features.py
new file mode 100755
index 0000000..5371eb3
--- /dev/null
+++ b/plot_features.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+
+import os,sys,matplotlib,zipfile,argparse,string
+matplotlib.use('Agg')
+from pylab import *
+from lefse import *
+import random as rand
+
+colors = ['r','g','b','m','c']
+
+def read_params(args):
+	parser = argparse.ArgumentParser(description='Cladoplot')
+	parser.add_argument('input_file_1', metavar='INPUT_FILE', type=str, help="dataset files")
+	parser.add_argument('input_file_2', metavar='INPUT_FILE', type=str, help="LEfSe output file")
+	parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output (the zip file if an archive is required, the output directory otherwise)")
+	parser.add_argument('--width',dest="width", type=float, default=10.0 )
+	parser.add_argument('--height',dest="height", type=float, default=4.0) 
+	parser.add_argument('--top',dest="top", type=float, default=-1.0, help="set maximum y limit (-1.0 means automatic limit)") 
+	parser.add_argument('--bot',dest="bot", type=float, default=0.0, help="set minimum y limit (default 0.0, -1.0 means automatic limit)") 
+	parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="14")
+	parser.add_argument('--class_font_size',dest="class_font_size", type=str, default="14")
+	parser.add_argument('--class_label_pos',dest="class_label_pos", type=str, choices=["up","down"], default="up")
+	parser.add_argument('--subcl_mean',dest="subcl_mean", type=str, choices=["y","n"], default="y")
+	parser.add_argument('--subcl_median',dest="subcl_median", type=str, choices=["y","n"], default="y")
+	parser.add_argument('--font_size',dest="font_size", type=str, default="10")
+	parser.add_argument('-n',dest="unused", metavar="flt", type=float, default=-1.0,help="unused")
+	parser.add_argument('--format', dest="format", default="png", choices=["png","pdf","svg"], type=str, help="the format for the output file")
+	parser.add_argument('-f', dest="f", default="diff", choices=["all","diff","one"], type=str, help="wheter to plot all features (all), only those differentially abundant according to LEfSe or only one (the one given with --feature_name) ")
+	parser.add_argument('--feature_name', dest="feature_name", default="", type=str, help="The name of the feature to plot (levels separated by .) ")
+	parser.add_argument('--feature_num', dest="feature_num", default="-1", type=int, help="The number of the feature to plot ")
+	parser.add_argument('--archive', dest="archive", default="none", choices=["zip","none"], type=str, help="")
+	parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
+	parser.add_argument('--dpi',dest="dpi", type=int, default=72)
+		
+	args = parser.parse_args()
+
+	return vars(args)
+	
+def read_data(file_data,file_feats,params):
+	with open(file_feats, 'r') as features:
+		feats_to_plot = [(f.split()[:-1],len(f.split()) == 5) for f in features.readlines()]
+	if not feats_to_plot:
+		print "No features to plot\n"
+		sys.exit(0)
+	feats,cls,class_sl,subclass_sl,class_hierarchy,params['norm_v'] = load_data(file_data, True)	 	
+	if params['feature_num'] > 0: 
+		params['feature_name'] = [line.split()[0] for line in open(params['input_file_2'])][params['feature_num']-1]
+	features = {}
+	for f in feats_to_plot:
+		if params['f'] == "diff" and not f[1]: continue
+		if params['f'] == "one" and f[0][0] != params['feature_name']: continue
+		features[f[0][0]] = {'dim':float(f[0][1]), 'abundances':feats[f[0][0]], 'sig':f[1], 'cls':cls, 'class_sl':class_sl, 'subclass_sl':subclass_sl, 'class_hierarchy':class_hierarchy} 
+	if not features:
+                print "No features to plot\n"
+                sys.exit(0)
+	return features
+
+def plot(name,k_n,feat,params):
+	fig = plt.figure(figsize=(params['width'], params['height']),edgecolor=params['fore_color'],facecolor=params['back_color'])
+	ax = fig.add_subplot(111,axis_bgcolor=params['back_color']) 
+	subplots_adjust(bottom=0.15)
+
+	max_m = 0.0
+	norm = 1.0 if float(params['norm_v']) < 0.0 else float(params['norm_v'])
+	for v in feat['subclass_sl'].values():
+		fr,to  = v[0], v[1]
+		median = numpy.mean(feat['abundances'][fr:to])
+		if median > max_m: max_m = median
+	max_m /= norm
+	max_v = max_m*3 if max_m*3 < max(feat['abundances'])*1.1/norm else max(feat['abundances'])/norm
+	min_v = max(0.0,min(feat['abundances'])*0.9/norm)
+
+	if params['top'] > 0.0: max_v = params['top'] 
+	if params['bot'] >= 0.0: min_v = params['bot']
+	
+	if max_v == 0.0: max_v = 0.0001
+	if max_v == min_v: max_v = min_v*1.1 
+
+	cl_sep = max(int(sum([vv[1]/norm - vv[0]/norm for vv in feat['class_sl'].values()])/150.0),1)
+	seps = []
+	xtics = []
+	x2tics = []
+	last_fr = 0.0
+	for i,cl in enumerate(sorted(feat['class_hierarchy'].keys())):
+		for j,subcl in enumerate(feat['class_hierarchy'][cl]):
+			fr = feat['subclass_sl'][subcl][0]
+			to = feat['subclass_sl'][subcl][1]
+			val = feat['abundances'][fr:to]
+			fr += cl_sep*i
+			to += cl_sep*i
+			pos = arange(fr,to)
+			max_x = to
+			col = colors[j%len(colors)]
+			vv = [v1/norm for v1 in val]
+			median = numpy.median(vv)
+			mean = numpy.mean(vv)
+			valv = [max(min(v/norm,max_v),min_v) for v in val]
+			ax.bar(pos,valv, align='center', color=col, edgecolor=col, linewidth=0.1 )
+			if params['subcl_median'] == 'y': ax.plot([fr,to-1],[median,median],"k--",linewidth=1,color=params['fore_color'])
+			if params['subcl_mean'] == 'y': ax.plot([fr,to-1],[mean,mean],"-",linewidth=1,color=params['fore_color'])
+			nna = subcl if subcl.count("_") == 0 or not subcl.startswith(cl) else "_".join(subcl.split(cl)[1:])
+			if nna == "subcl" or nna == "_subcl": nna = " "
+			xtics.append(((fr+(to-fr)/2),nna))
+		seps.append(float(to))
+		x2tics.append(((last_fr+(to-last_fr)/2),cl))
+		last_fr = to + float(cl_sep)
+	for s in seps[:-1]:
+		ax.plot([s,s],[min_v,max_v],"-",linewidth=5,color=params['fore_color'])	
+	ax.set_title(k_n, size=params['title_font_size'])
+	xticks([x[0] for x in xtics],[x[1] for x in xtics],rotation=-30, ha = 'left', fontsize=params['font_size'], color=params['fore_color'])
+	yticks(fontsize=params['font_size'])
+
+	ylabel('Relative abundance')
+	ax.set_ylim((min_v,max_v))
+	a,b = ax.get_xlim()
+	ax.set_xlim((0-float(last_fr)/float(b-a),max_x))		
+	ax.yaxis.grid(True)
+	
+	def get_col_attr(x):
+                return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
+	def get_edgecol_attr(x):
+                return hasattr(x, 'set_edgecolor') 
+
+
+	for o in fig.findobj(get_col_attr):
+                o.set_color(params['fore_color'])	
+	for o in fig.findobj(get_edgecol_attr):
+		if o.get_edgecolor() == params['back_color']:
+                	o.set_edgecolor(params['fore_color'])
+
+	for t in x2tics:
+		m = ax.get_ylim()[1]*0.97 if params['class_label_pos']=='up' else 0.07
+		plt.text(t[0],m, "class: "+t[1], ha ="center", size=params['class_font_size'], va="top", bbox = dict(boxstyle="round", ec='k', fc='y'))
+	
+
+	plt.savefig(name,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
+	plt.close()
+	return name 
+
+if __name__ == '__main__':
+	params = read_params(sys.argv)
+	params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
+	features = read_data(params['input_file_1'],params['input_file_2'],params)
+	if params['archive'] == "zip": file = zipfile.ZipFile(params['output_file'], "w")
+	for k,f in features.items():
+		print "Exporting ", k
+		if params['archive'] == "zip":
+			of = plot("/tmp/"+str(int(f['sig']))+"_"+"-".join(k.split("."))+"."+params['format'],k,f,params)
+			file.write(of, os.path.basename(of), zipfile.ZIP_DEFLATED)
+		else:
+			if params['f'] == 'one': plot(params['output_file'],k,f,params)
+			else: plot(params['output_file']+str(int(f['sig']))+"_"+"-".join(k.split("."))+"."+params['format'],k,f,params) 
+	if params['archive'] == "zip": file.close()
diff --git a/plot_res.py b/plot_res.py
new file mode 100755
index 0000000..f87e79e
--- /dev/null
+++ b/plot_res.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python
+
+import os,sys
+import matplotlib
+matplotlib.use('Agg')
+from pylab import *
+from collections import defaultdict
+
+from lefse import *
+import argparse
+
+colors = ['r','g','b','m','c','y','k','w']
+
+def read_params(args):
+    parser = argparse.ArgumentParser(description='Plot results')
+    parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="tab delimited input file")
+    parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output image")
+    parser.add_argument('--feature_font_size', dest="feature_font_size", type=int, default=7, help="the file for the output image")
+    parser.add_argument('--format', dest="format", choices=["png","svg","pdf"], default='png', type=str, help="the format for the output file")
+    parser.add_argument('--dpi',dest="dpi", type=int, default=72)
+    parser.add_argument('--title',dest="title", type=str, default="")
+    parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="12")
+    parser.add_argument('--class_legend_font_size',dest="class_legend_font_size", type=str, default="10")
+    parser.add_argument('--width',dest="width", type=float, default=7.0 )
+    parser.add_argument('--height',dest="height", type=float, default=4.0, help="only for vertical histograms")
+    parser.add_argument('--left_space',dest="ls", type=float, default=0.2 )
+    parser.add_argument('--right_space',dest="rs", type=float, default=0.1 )
+    parser.add_argument('--orientation',dest="orientation", type=str, choices=["h","v"], default="h" )
+    parser.add_argument('--autoscale',dest="autoscale", type=int, choices=[0,1], default=1 )
+    parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
+    parser.add_argument('--subclades', dest="n_scl", type=int, default=1, help="number of label levels to be dislayed (starting from the leaves, -1 means all the levels, 1 is default )")
+    parser.add_argument('--max_feature_len', dest="max_feature_len", type=int, default=60, help="Maximum length of feature strings (def 60)")
+    parser.add_argument('--all_feats', dest="all_feats", type=str, default="")
+    parser.add_argument('--otu_only', dest="otu_only", default=False, action='store_true', help="Plot only species resolved OTUs (as opposed to all levels)")
+    parser.add_argument('--report_features', dest="report_features", default=False, action='store_true', help="Report important features to STDOUT")
+    args = parser.parse_args()
+    return vars(args)
+
+def read_data(input_file,output_file,otu_only):
+    with open(input_file, 'r') as inp:
+        if not otu_only:
+            rows = [line.strip().split()[:-1] for line in inp.readlines() if len(line.strip().split())>3]
+        else:
+            rows = [line.strip().split()[:-1] for line in inp.readlines() if len(line.strip().split())>3 and len(line.strip().split()[0].split('.'))==8] # a feature with length 8 will have an OTU id associated with it
+    classes = list(set([v[2] for v in rows if len(v)>2]))
+    if len(classes) < 1: 
+        print "No differentially abundant features found in "+input_file
+        os.system("touch "+output_file)
+        sys.exit()
+    data = {}
+    data['rows'] = rows
+    data['cls'] = classes
+    return data
+
+def plot_histo_hor(path,params,data,bcl,report_features):
+    cls2 = []
+    if params['all_feats'] != "":
+        cls2 = sorted(params['all_feats'].split(":"))
+    cls = sorted(data['cls'])
+    if bcl: data['rows'].sort(key=lambda ab: fabs(float(ab[3]))*(cls.index(ab[2])*2-1))
+    else: 
+        mmax = max([fabs(float(a)) for a in zip(*data['rows'])[3]])
+        data['rows'].sort(key=lambda ab: fabs(float(ab[3]))/mmax+(cls.index(ab[2])+1))
+    pos = arange(len(data['rows']))
+    head = 0.75
+    tail = 0.5
+    ht = head + tail
+    ints = max(len(pos)*0.2,1.5)
+    fig = plt.figure(figsize=(params['width'], ints + ht), edgecolor=params['back_color'],facecolor=params['back_color'])
+    ax = fig.add_subplot(111,frame_on=False,axis_bgcolor=params['back_color'])
+    ls, rs = params['ls'], 1.0-params['rs']
+    plt.subplots_adjust(left=ls,right=rs,top=1-head*(1.0-ints/(ints+ht)), bottom=tail*(1.0-ints/(ints+ht)))
+
+    fig.canvas.set_window_title('LDA results')
+
+    l_align = {'horizontalalignment':'left', 'verticalalignment':'baseline'}
+    r_align = {'horizontalalignment':'right', 'verticalalignment':'baseline'}
+    added = []
+    m = 1 if data['rows'][0][2] == cls[0] else -1
+    out_data = defaultdict(list) # keep track of which OTUs result in the plot
+    for i,v in enumerate(data['rows']):
+        if report_features:
+            otu = v[0].split('.')[7].replace('_','.') # string replace retains format New.ReferenceOTUxxx
+            score = v[3]
+            otu_class = v[2]
+            out_data[otu] = [score, otu_class]
+        indcl = cls.index(v[2])
+        lab = str(v[2]) if str(v[2]) not in added else ""
+        added.append(str(v[2])) 
+        col = colors[indcl%len(colors)] 
+        if len(cls2) > 0: 
+            col = colors[cls2.index(v[2])%len(colors)]
+        vv = fabs(float(v[3])) * (m*(indcl*2-1)) if bcl else fabs(float(v[3]))
+        ax.barh(pos[i],vv, align='center', color=col, label=lab, height=0.8, edgecolor=params['fore_color'])
+    mv = max([abs(float(v[3])) for v in data['rows']])  
+    if report_features:
+        print 'OTU\tLDA_score\tCLass'
+        for i in out_data:
+            print '%s\t%s\t%s' %(i, out_data[i][0], out_data[i][1])
+    for i,r in enumerate(data['rows']):
+        indcl = cls.index(data['rows'][i][2])
+        if params['n_scl'] < 0: rr = r[0]
+        else: rr = ".".join(r[0].split(".")[-params['n_scl']:])
+        if len(rr) > params['max_feature_len']: rr = rr[:params['max_feature_len']/2-2]+" [..]"+rr[-params['max_feature_len']/2+2:]
+        if m*(indcl*2-1) < 0 and bcl: ax.text(mv/40.0,float(i)-0.3,rr, l_align, size=params['feature_font_size'],color=params['fore_color'])
+        else: ax.text(-mv/40.0,float(i)-0.3,rr, r_align, size=params['feature_font_size'],color=params['fore_color'])
+    ax.set_title(params['title'],size=params['title_font_size'],y=1.0+head*(1.0-ints/(ints+ht))*0.8,color=params['fore_color'])
+
+    ax.set_yticks([])
+    ax.set_xlabel("LDA SCORE (log 10)")
+    ax.xaxis.grid(True)
+    xlim = ax.get_xlim()
+    if params['autoscale']: 
+        ran = arange(0.0001,round(round((abs(xlim[0])+abs(xlim[1]))/10,4)*100,0)/100)
+        if len(ran) > 1 and len(ran) < 100:
+            ax.set_xticks(arange(xlim[0],xlim[1]+0.0001,min(xlim[1]+0.0001,round(round((abs(xlim[0])+abs(xlim[1]))/10,4)*100,0)/100)))
+    ax.set_ylim((pos[0]-1,pos[-1]+1))
+    leg = ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=5, borderaxespad=0., frameon=False,prop={'size':params['class_legend_font_size']})
+
+    def get_col_attr(x):
+                return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
+    for o in leg.findobj(get_col_attr):
+                o.set_color(params['fore_color'])
+    for o in ax.findobj(get_col_attr):
+                o.set_color(params['fore_color'])
+
+    
+    plt.savefig(path,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
+    plt.close()
+
+def plot_histo_ver(path,params,data,report_features):
+    cls = data['cls']
+    mmax = max([fabs(float(a)) for a in zip(*data['rows'])[1]])
+    data['rows'].sort(key=lambda ab: fabs(float(ab[3]))/mmax+(cls.index(ab[2])+1))
+    pos = arange(len(data['rows'])) 
+    if params['n_scl'] < 0: nam = [d[0] for d in data['rows']]
+    else: nam = [d[0].split(".")[-min(d[0].count("."),params['n_scl'])] for d in data['rows']]
+    fig = plt.figure(edgecolor=params['back_color'],facecolor=params['back_color'],figsize=(params['width'], params['height'])) 
+    ax = fig.add_subplot(111,axis_bgcolor=params['back_color'])
+    plt.subplots_adjust(top=0.9, left=params['ls'], right=params['rs'], bottom=0.3) 
+    fig.canvas.set_window_title('LDA results')   
+    l_align = {'horizontalalignment':'left', 'verticalalignment':'baseline'}
+    r_align = {'horizontalalignment':'right', 'verticalalignment':'baseline'} 
+    added = []
+    out_data = defaultdict(list) # keep track of which OTUs result in the plot
+    for i,v in enumerate(data['rows']):
+        if report_features:
+            otu = v[0].split('.')[7].replace('_','.') # string replace retains format New.ReferenceOTUxxx
+            score = v[3]
+            otu_class = v[2]
+            out_data[otu] = [score, otu_class]
+        indcl = data['cls'].index(v[2])
+        lab = str(v[2]) if str(v[2]) not in added else ""
+        added.append(str(v[2])) 
+        col = colors[indcl%len(colors)]
+        vv = fabs(float(v[3])) 
+        ax.bar(pos[i],vv, align='center', color=col, label=lab)
+    if report_features:
+        print 'OTU\tLDA_score\tCLass'
+        for i in out_data:
+            print '%s\t%s\t%s' %(i, out_data[i][0], out_data[i][1])
+    xticks(pos,nam,rotation=-20, ha = 'left',size=params['feature_font_size'])  
+    ax.set_title(params['title'],size=params['title_font_size'])
+    ax.set_ylabel("LDA SCORE (log 10)")
+    ax.yaxis.grid(True) 
+    a,b = ax.get_xlim()
+    dx = float(len(pos))/float((b-a))
+    ax.set_xlim((0-dx,max(pos)+dx)) 
+    plt.savefig(path,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
+    plt.close() 
+
+if __name__ == '__main__':
+    params = read_params(sys.argv)
+    params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
+    data = read_data(params['input_file'],params['output_file'],params['otu_only'])
+    if params['orientation'] == 'v': plot_histo_ver(params['output_file'],params,data,params['report_features'])
+    else: plot_histo_hor(params['output_file'],params,data,len(data['cls']) == 2,params['report_features'])
+
+    
diff --git a/qiime2lefse.py b/qiime2lefse.py
new file mode 100755
index 0000000..dffecc3
--- /dev/null
+++ b/qiime2lefse.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+import sys
+
+def read_params(args):
+    import argparse as ap
+    import textwrap
+
+    p = ap.ArgumentParser( description= '''
+    Script will convert QIIME TSV BIOM table for use with lefse.  It is imperative that this table has taxa metadata associated with it named 'Consensus Lineage', this can be down with e.g. the follow biom convert script: ----
+    biom convert -i otu.biom -o otu.txt --to-tsv --header-key Taxonomy --output-metadata-id 'Consensus Lineage'
+    ''')
+    
+    p.add_argument( '--in', metavar='INPUT_FILE', type=str, 
+                    nargs='?', default=sys.stdin,
+                    help=   "the Qiime OTU table file "
+                            "[ stdin if not present ]" )
+    p.add_argument( '--md', metavar='METADATA_FILE', type=str, 
+                    nargs='?', default=None,
+                    help=   "the Qiime OTU table file " 
+                            "[ only OTU table without metadata if not present ]" )
+    p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, 
+                    nargs = '?', default=sys.stdout,
+                    help=   "the output file "
+                            "[stdout if not present]")
+
+    p.add_argument( '-c', metavar="class attribute", 
+                    type=str,
+                    help =  "the attribute to use as class"   )
+    p.add_argument( '-s', metavar="subclass attribute", 
+                    type=str,
+                    help =  "the attribute to use as subclass"   )
+    p.add_argument( '-u', metavar="subject attribute", 
+                    type=str,
+                    help =  "the attribute to use as subject"   )
+
+
+
+    return vars(p.parse_args()) 
+
+
+
+def qiime2lefse(  fin, fmd, fout, all_md, sel_md ):
+    with (fin if fin==sys.stdin else open(fin)) as inpf :
+        lines = [list(ll) for ll in 
+                    (zip(*[l.strip().split('\t') 
+                        for l in inpf.readlines()[1:]]) ) ]
+    for i,(l1,l2) in enumerate(zip( lines[0], lines[-1] )):
+        if not l2 == 'Consensus Lineage':
+            lines[-1][i] = l2+"|"+l1
+
+    data = dict([(l[0],l[1:]) for l in lines[1:]])
+    
+    md = {}
+    if fmd:
+        with open(fmd) as inpf:
+            mdlines = [l.strip().split('\t') for l in inpf.readlines()]
+  
+        mdf = mdlines[0][1:]
+
+        for l in mdlines:
+            mdd = dict(zip(mdf,l[1:]))
+            md[l[0]] = mdd
+
+    selected_md = md.values()[0].keys() if md else []
+
+    if not all_md:
+        selected_md = [s for s in sel_md if s]
+    
+    out_m = [   selected_md + 
+                list([d.replace(";","|").replace("\"","") for d in data[ 'Consensus Lineage' ]])    ]
+    for k,v in data.items():
+        if k == 'Consensus Lineage':
+            continue
+        out_m.append( [md[k][kmd] for kmd in selected_md] + list(v) )
+
+    with (fout if fout == sys.stdout else open( fout, "w" )) as outf:
+        for l in zip(*out_m):
+            outf.write( "\t".join(l) + "\n" )
+
+if __name__ == '__main__':
+    pars = read_params( sys.argv )
+  
+    qiime2lefse(   fin     = pars['in'],
+                   fmd     = pars['md'],
+                   fout    = pars['out'],
+                   all_md  = not pars['c'] and not pars['s'] and not pars['u'],
+                   sel_md  = [pars['c'],pars['s'],pars['u']])
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e967514
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+
+- R
+- R libraries: splines, stats4, survival, mvtnorm, modeltools, coin, MASS
+- python libraries: rpy2 (v. 2.1 or higher), numpy, matplotlib (v. 1.0 or higher), argparse 
+
diff --git a/run_lefse.py b/run_lefse.py
new file mode 100755
index 0000000..df248ef
--- /dev/null
+++ b/run_lefse.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import os,sys,math,pickle
+from lefse import *
+
+def read_params(args):
+        parser = argparse.ArgumentParser(description='LEfSe 1.0')
+        parser.add_argument('input_file', metavar='INPUT_FILE', type=str, help="the input file")
+        parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str,
+                help="the output file containing the data for the visualization module")
+        parser.add_argument('-o',dest="out_text_file", metavar='str', type=str, default="",
+                help="set the file for exporting the result (only concise textual form)")
+        parser.add_argument('-a',dest="anova_alpha", metavar='float', type=float, default=0.05,
+                help="set the alpha value for the Anova test (default 0.05)")
+        parser.add_argument('-w',dest="wilcoxon_alpha", metavar='float', type=float, default=0.05,
+                help="set the alpha value for the Wilcoxon test (default 0.05)")
+        parser.add_argument('-l',dest="lda_abs_th", metavar='float', type=float, default=2.0,
+                help="set the threshold on the absolute value of the logarithmic LDA score (default 2.0)")
+        parser.add_argument('--nlogs',dest="nlogs", metavar='int', type=int, default=3,
+		help="max log ingluence of LDA coeff")
+        parser.add_argument('--verbose',dest="verbose", metavar='int', choices=[0,1], type=int, default=0,
+		help="verbose execution (default 0)")
+        parser.add_argument('--wilc',dest="wilc", metavar='int', choices=[0,1], type=int, default=1,
+		help="wheter to perform the Wicoxon step (default 1)")
+	parser.add_argument('-r',dest="rank_tec", metavar='str', choices=['lda','svm'], type=str, default='lda',
+		help="select LDA or SVM for effect size (default LDA)")
+	parser.add_argument('--svm_norm',dest="svm_norm", metavar='int', choices=[0,1], type=int, default=1,
+		help="whether to normalize the data in [0,1] for SVM feature waiting (default 1 strongly suggested)")
+        parser.add_argument('-b',dest="n_boots", metavar='int', type=int, default=30,
+                help="set the number of bootstrap iteration for LDA (default 30)")
+        parser.add_argument('-e',dest="only_same_subcl", metavar='int', type=int, default=0,
+                help="set whether perform the wilcoxon test only among the subclasses with the same name (default 0)")
+        parser.add_argument('-c',dest="curv", metavar='int', type=int, default=0,
+                help="set whether perform the wilcoxon test ing the Curtis's approach [BETA VERSION] (default 0)")
+        parser.add_argument('-f',dest="f_boots", metavar='float', type=float, default=0.67,
+                help="set the subsampling fraction value for each bootstrap iteration (default 0.66666)")
+        parser.add_argument('-s',dest="strict", choices=[0,1,2], type=int, default=0,
+                help="set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison")
+#       parser.add_argument('-m',dest="m_boots", type=int, default=5, 
+#               help="minimum cardinality of classes in each bootstrapping iteration (default 5)")
+        parser.add_argument('--min_c',dest="min_c", metavar='int', type=int, default=10,
+                help="minimum number of samples per subclass for performing wilcoxon test (default 10)")
+        parser.add_argument('-t',dest="title", metavar='str', type=str, default="",
+                help="set the title of the analysis (default input file without extension)")
+        parser.add_argument('-y',dest="multiclass_strat", choices=[0,1], type=int, default=0,
+                help="(for multiclass tasks) set whether the test is performed in a one-against-one ( 1 - more strict!) or in a one-against-all setting ( 0 - less strict) (default 0)")
+        args = parser.parse_args()
+          
+        params = vars(args)
+        if params['title'] == "": params['title'] = params['input_file'].split("/")[-1].split('.')[0]
+        return params 
+
+
+
+if __name__ == '__main__':
+	init()
+	params = read_params(sys.argv)
+	feats,cls,class_sl,subclass_sl,class_hierarchy = load_data(params['input_file'])
+	kord,cls_means = get_class_means(class_sl,feats)
+	wilcoxon_res = {}
+	kw_n_ok = 0
+	nf = 0
+	for feat_name,feat_values in feats.items():
+		if params['verbose']:
+			print "Testing feature",str(nf),": ",feat_name,
+			nf += 1
+		kw_ok,pv = test_kw_r(cls,feat_values,params['anova_alpha'],sorted(cls.keys()))
+		if not kw_ok:
+			if params['verbose']: print "\tkw ko" 
+			del feats[feat_name]
+			wilcoxon_res[feat_name] = "-"
+			continue
+		if params['verbose']: print "\tkw ok\t",
+		
+		if not params['wilc']: continue
+		kw_n_ok += 1	
+		res_wilcoxon_rep = test_rep_wilcoxon_r(subclass_sl,class_hierarchy,feat_values,params['wilcoxon_alpha'],params['multiclass_strat'],params['strict'],feat_name,params['min_c'],params['only_same_subcl'],params['curv'])
+		wilcoxon_res[feat_name] = str(pv) if res_wilcoxon_rep else "-"
+		if not res_wilcoxon_rep:
+			if params['verbose']: print "wilc ko" 
+			del feats[feat_name]
+		elif params['verbose']: print "wilc ok\t"
+		
+	if len(feats) > 0:
+		print "Number of significantly discriminative features:", len(feats), "(", kw_n_ok, ") before internal wilcoxon"
+		if params['lda_abs_th'] < 0.0:
+			lda_res,lda_res_th = dict([(k,0.0) for k,v in feats.items()]), dict([(k,v) for k,v in feats.items()])
+		else:
+			if params['rank_tec'] == 'lda': lda_res,lda_res_th = test_lda_r(cls,feats,class_sl,params['n_boots'],params['f_boots'],params['lda_abs_th'],0.0000000001,params['nlogs'])
+			elif params['rank_tec'] == 'svm': lda_res,lda_res_th = test_svm(cls,feats,class_sl,params['n_boots'],params['f_boots'],params['lda_abs_th'],0.0,params['svm_norm'])	
+			else: lda_res,lda_res_th = dict([(k,0.0) for k,v in feats.items()]), dict([(k,v) for k,v in feats.items()])
+	else: 
+		print "Number of significantly discriminative features:", len(feats), "(", kw_n_ok, ") before internal wilcoxon"
+		print "No features with significant differences between the two classes"
+		lda_res,lda_res_th = {},{}
+	outres = {}
+	outres['lda_res_th'] = lda_res_th
+	outres['lda_res'] = lda_res
+	outres['cls_means'] = cls_means
+	outres['cls_means_kord'] = kord
+	outres['wilcox_res'] = wilcoxon_res
+	print "Number of discriminative features with abs LDA score >",params['lda_abs_th'],":",len(lda_res_th) 
+	save_res(outres,params["output_file"])

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-lefse.git



More information about the debian-med-commit mailing list