Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1   
  2  # 
  3  # Trees.py  
  4  # 
  5  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license. Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  # Tree class handles phylogenetic trees. Provides a set of methods to read and write newick-format tree 
 11  # descriptions, get information about trees (monphyly of taxon sets, congruence between trees, common ancestors,...) 
 12  # and to manipulate trees (reroot trees, split terminal nodes). 
 13  # 
 14  # Bug reports welcome: fkauff@biologie.uni-kl.de 
 15  # 
 16   
 17  import sys, random, copy 
 18  import Nodes 
 19  try: 
 20      #Check the built in set function is present (python 2.4+) 
 21      set = set 
 22  except NameError: 
 23      #For python 2.3 fall back on the sets module (deprecated in python 2.6) 
 24      from sets import Set as set 
 25      from sets import ImmutableSet as frozenset 
 26   
 27  PRECISION_BRANCHLENGTH=6 
 28  PRECISION_SUPPORT=6 
 29  NODECOMMENT_START='[&' 
 30  NODECOMMENT_END=']' 
 31   
32 -class TreeError(Exception): pass
33
34 -class NodeData:
35 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
36 - def __init__(self,taxon=None,branchlength=0.0,support=None,comment=None):
37 self.taxon=taxon 38 self.branchlength=branchlength 39 self.support=support 40 self.comment=comment
41
42 -class Tree(Nodes.Chain):
43 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 44 and multiple successors (=subclades). 45 """ 46 # A newick tree is parsed into nested list and then converted to a node list in two stages 47 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 48 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 49 # easier when parsing trees. 50 51 ## NOTE: Tree should store its data class in something like self.dataclass=data, 52 ## so that nodes that are generated have easy access to the data class 53 ## Some routines use automatically NodeData, this needs to be more concise 54
55 - def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
56 """Ntree(self,tree).""" 57 Nodes.Chain.__init__(self) 58 self.dataclass=data 59 self.__values_are_support=values_are_support 60 self.max_support=max_support 61 self.weight=weight 62 self.rooted=rooted 63 self.name=name 64 root=Nodes.Node(data()) 65 self.add(root) 66 self.root=root.id 67 if tree: # use the tree we have 68 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 69 tree=tree.strip().replace('\n','').replace('\r','') 70 # there's discrepancy whether newick allows semicolons et the end 71 tree=tree.rstrip(';') 72 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
73
74 - def _parse(self,tree):
75 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 76 77 #Remove any leading/trailing white space - want any string starting 78 #with " (..." should be recognised as a leaf, "(..." 79 tree = tree.strip() 80 if tree.count('(')!=tree.count(')'): 81 raise TreeError('Parentheses do not match in (sub)tree: '+tree) 82 if tree.count('(')==0: # a leaf 83 #check if there's a colon, or a special comment, or both after the taxon name 84 nodecomment=tree.find(NODECOMMENT_START) 85 colon=tree.find(':') 86 if colon==-1 and nodecomment==-1: # none 87 return [tree,[None]] 88 elif colon==-1 and nodecomment>-1: # only special comment 89 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 90 elif colon>-1 and nodecomment==-1: # only numerical values 91 return [tree[:colon],self._get_values(tree[colon+1:])] 92 elif colon < nodecomment: # taxon name ends at first colon or with special comment 93 return [tree[:colon],self._get_values(tree[colon+1:])] 94 else: 95 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 96 else: 97 closing=tree.rfind(')') 98 val=self._get_values(tree[closing+1:]) 99 if not val: 100 val=[None] 101 subtrees=[] 102 plevel=0 103 prev=1 104 for p in range(1,closing): 105 if tree[p]=='(': 106 plevel+=1 107 elif tree[p]==')': 108 plevel-=1 109 elif tree[p]==',' and plevel==0: 110 subtrees.append(tree[prev:p]) 111 prev=p+1 112 subtrees.append(tree[prev:closing]) 113 subclades=[self._parse(subtree) for subtree in subtrees] 114 return [subclades,val]
115
116 - def _add_subtree(self,parent_id=None,tree=None):
117 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree).""" 118 119 if parent_id is None: 120 raise TreeError('Need node_id to connect to.') 121 for st in tree: 122 if type(st[0])==list: # it's a subtree 123 nd=self.dataclass() 124 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START): # last element of values is a text and starts with [& 125 nd.comment=st[1].pop(-1) 126 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 127 nd.support=st[1][0] 128 if st[1][1] is not None: 129 nd.branchlength=st[1][1] 130 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 131 if not self.__values_are_support: # default 132 if st[1][0] is not None: 133 nd.branchlength=st[1][0] 134 else: 135 nd.support=st[1][0] 136 sn=Nodes.Node(nd) 137 self.add(sn,parent_id) 138 self._add_subtree(sn.id,st[0]) 139 else: # it's a leaf 140 nd=self.dataclass() 141 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START): 142 nd.comment=st[1].pop(-1) 143 nd.taxon=st[0] 144 if len(st)>1: 145 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 146 nd.support=st[1][0] 147 if st[1][1] is not None: 148 nd.branchlength=st[1][1] 149 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 150 if not self.__values_are_support: # default 151 if st[1][0] is not None: 152 nd.branchlength=st[1][0] 153 else: 154 nd.support=st[1][0] 155 leaf=Nodes.Node(nd) 156 self.add(leaf,parent_id)
157
158 - def _get_values(self, text):
159 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 160 161 if text=='': 162 return None 163 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 164 nc_start=text.find(NODECOMMENT_START) 165 nc_end=text.find(NODECOMMENT_END) 166 if nc_end==-1: 167 raise TreeError('Error in tree description: Found %s without matching %s' \ 168 % (NODECOMMENT_START, NODECOMMENT_END)) 169 nodecomment=text[nc_start:nc_end+1] 170 text=text[:nc_start]+text[nc_end+1:] 171 values=[float(t) for t in text.split(':') if t.strip()] 172 values.append(nodecomment) 173 else: 174 values=[float(t) for t in text.split(':') if t.strip()] 175 return values
176
177 - def _walk(self,node=None):
178 """Return all node_ids downwards from a node.""" 179 180 if node is None: 181 node=self.root 182 for n in self.node(node).succ: 183 yield n 184 for sn in self._walk(n): 185 yield sn
186
187 - def node(self,node_id):
188 """Return the instance of node_id. 189 190 node = node(self,node_id) 191 """ 192 if node_id not in self.chain: 193 raise TreeError('Unknown node_id: %d' % node_id) 194 return self.chain[node_id]
195
196 - def split(self,parent_id=None,n=2,branchlength=1.0):
197 """Speciation: generates n (default two) descendants of a node. 198 199 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 200 """ 201 if parent_id is None: 202 raise TreeError('Missing node_id.') 203 ids=[] 204 parent_data=self.chain[parent_id].data 205 for i in range(n): 206 node=Nodes.Node() 207 if parent_data: 208 node.data=self.dataclass() 209 # each node has taxon and branchlength attribute 210 if parent_data.taxon: 211 node.data.taxon=parent_data.taxon+str(i) 212 node.data.branchlength=branchlength 213 ids.append(self.add(node,parent_id)) 214 return ids
215
216 - def search_taxon(self,taxon):
217 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 218 219 node_id = search_taxon(self,taxon) 220 """ 221 for id,node in self.chain.items(): 222 if node.data.taxon==taxon: 223 return id 224 return None
225
226 - def prune(self,taxon):
227 """Prunes a terminal taxon from the tree. 228 229 id_of_previous_node = prune(self,taxon) 230 If taxon is from a bifurcation, the connectiong node will be collapsed 231 and its branchlength added to remaining terminal node. This might be no 232 longer a meaningful value' 233 """ 234 235 id=self.search_taxon(taxon) 236 if id is None: 237 raise TreeError('Taxon not found: %s' % taxon) 238 elif id not in self.get_terminals(): 239 raise TreeError('Not a terminal taxon: %s' % taxon) 240 else: 241 prev=self.unlink(id) 242 self.kill(id) 243 if len(self.node(prev).succ)==1: 244 if prev==self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 245 self.root=self.node(self.root).succ[0] 246 self.node(self.root).branchlength=0.0 247 self.kill(prev) 248 else: 249 succ=self.node(prev).succ[0] 250 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength 251 self.collapse(prev) 252 self.node(succ).data.branchlength=new_bl 253 return prev
254
255 - def get_taxa(self,node_id=None):
256 """Return a list of all otus downwards from a node (self, node_id). 257 258 nodes = get_taxa(self,node_id=None) 259 """ 260 261 if node_id is None: 262 node_id=self.root 263 if node_id not in self.chain: 264 raise TreeError('Unknown node_id: %d.' % node_id) 265 if self.chain[node_id].succ==[]: 266 if self.chain[node_id].data: 267 return [self.chain[node_id].data.taxon] 268 else: 269 return None 270 else: 271 list=[] 272 for succ in self.chain[node_id].succ: 273 list.extend(self.get_taxa(succ)) 274 return list
275
276 - def get_terminals(self):
277 """Return a list of all terminal nodes.""" 278 return [i for i in self.all_ids() if self.node(i).succ==[]]
279
280 - def is_terminal(self,node):
281 """Returns True if node is a terminal node.""" 282 return self.node(node).succ==[]
283
284 - def is_internal(self,node):
285 """Returns True if node is an internal node.""" 286 return len(self.node(node).succ)>0
287
288 - def is_preterminal(self,node):
289 """Returns True if all successors of a node are terminal ones.""" 290 if self.is_terminal(node): 291 return False not in [self.is_terminal(n) for n in self.node(node).succ] 292 else: 293 return False
294 - def count_terminals(self,node=None):
295 """Counts the number of terminal nodes that are attached to a node.""" 296 if node is None: 297 node=self.root 298 return len([n for n in self._walk(node) if self.is_terminal(n)])
299
300 - def collapse_genera(self,space_equals_underscore=True):
301 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name.""" 302 303 while True: 304 for n in self._walk(): 305 if self.is_terminal(n): 306 continue 307 taxa=self.get_taxa(n) 308 genera=[] 309 for t in taxa: 310 if space_equals_underscore: 311 t=t.replace(' ','_') 312 try: 313 genus=t.split('_',1)[0] 314 except: 315 genus='None' 316 if genus not in genera: 317 genera.append(genus) 318 if len(genera)==1: 319 self.node(n).data.taxon=genera[0]+' <collapsed>' 320 #now we kill all nodes downstream 321 nodes2kill=[kn for kn in self._walk(node=n)] 322 for kn in nodes2kill: 323 self.kill(kn) 324 self.node(n).succ=[] 325 break # break out of for loop because node list from _walk will be inconsistent 326 else: # for loop exhausted: no genera to collapse left 327 break # while
328 329
330 - def sum_branchlength(self,root=None,node=None):
331 """Adds up the branchlengths from root (default self.root) to node. 332 333 sum = sum_branchlength(self,root=None,node=None) 334 """ 335 336 if root is None: 337 root=self.root 338 if node is None: 339 raise TreeError('Missing node id.') 340 blen=0.0 341 while node is not None and node is not root: 342 blen+=self.node(node).data.branchlength 343 node=self.node(node).prev 344 return blen
345
346 - def set_subtree(self,node):
347 """Return subtree as a set of nested sets. 348 349 sets = set_subtree(self,node) 350 """ 351 352 if self.node(node).succ==[]: 353 return self.node(node).data.taxon 354 else: 355 try: 356 return frozenset([self.set_subtree(n) for n in self.node(node).succ]) 357 except: 358 print node 359 print self.node(node).succ 360 for n in self.node(node).succ: 361 print n, self.set_subtree(n) 362 print [self.set_subtree(n) for n in self.node(node).succ] 363 raise
364
365 - def is_identical(self,tree2):
366 """Compare tree and tree2 for identity. 367 368 result = is_identical(self,tree2) 369 """ 370 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
371
372 - def is_compatible(self,tree2,threshold,strict=True):
373 """Compares branches with support>threshold for compatibility. 374 375 result = is_compatible(self,tree2,threshold) 376 """ 377 378 # check if both trees have the same set of taxa. strict=True enforces this. 379 missing2=set(self.get_taxa())-set(tree2.get_taxa()) 380 missing1=set(tree2.get_taxa())-set(self.get_taxa()) 381 if strict and (missing1 or missing2): 382 if missing1: 383 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name) 384 if missing2: 385 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name) 386 raise TreeError('Can\'t compare trees with different taxon compositions.') 387 t1=[(set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \ 388 self.node(n).succ and\ 389 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)] 390 t2=[(set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \ 391 tree2.node(n).succ and\ 392 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)] 393 conflict=[] 394 for (st1,sup1) in t1: 395 for (st2,sup2) in t2: 396 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 397 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets 398 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 399 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 400 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2)) 401 return conflict
402
403 - def common_ancestor(self,node1,node2):
404 """Return the common ancestor that connects two nodes. 405 406 node_id = common_ancestor(self,node1,node2) 407 """ 408 409 l1=[self.root]+self.trace(self.root,node1) 410 l2=[self.root]+self.trace(self.root,node2) 411 return [n for n in l1 if n in l2][-1]
412 413
414 - def distance(self,node1,node2):
415 """Add and return the sum of the branchlengths between two nodes. 416 dist = distance(self,node1,node2) 417 """ 418 419 ca=self.common_ancestor(node1,node2) 420 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
421
422 - def is_monophyletic(self,taxon_list):
423 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 424 425 result = is_monophyletic(self,taxon_list) 426 """ 427 if isinstance(taxon_list,str): 428 taxon_set=set([taxon_list]) 429 else: 430 taxon_set=set(taxon_list) 431 node_id=self.root 432 while 1: 433 subclade_taxa=set(self.get_taxa(node_id)) 434 if subclade_taxa==taxon_set: # are we there? 435 return node_id 436 else: # check subnodes 437 for subnode in self.chain[node_id].succ: 438 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 439 node_id=subnode 440 break # out of for loop 441 else: 442 return -1 # taxon set was not with successors, for loop exhausted
443
444 - def is_bifurcating(self,node=None):
445 """Return True if tree downstream of node is strictly bifurcating.""" 446 if node is None: 447 node=self.root 448 if node==self.root and len(self.node(node).succ)==3: #root can be trifurcating, because it has no ancestor 449 return self.is_bifurcating(self.node(node).succ[0]) and \ 450 self.is_bifurcating(self.node(node).succ[1]) and \ 451 self.is_bifurcating(self.node(node).succ[2]) 452 if len(self.node(node).succ)==2: 453 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 454 elif len(self.node(node).succ)==0: 455 return True 456 else: 457 return False
458 459 460
461 - def branchlength2support(self):
462 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 463 464 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 465 been read in as branchlength. 466 """ 467 468 for n in self.chain.keys(): 469 self.node(n).data.support=self.node(n).data.branchlength 470 self.node(n).data.branchlength=0.0
471
472 - def convert_absolute_support(self,nrep):
473 """Convert absolute support (clade-count) to rel. frequencies. 474 475 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 476 calculating relative frequencies.""" 477 478 for n in self._walk(): 479 if self.node(n).data.support: 480 self.node(n).data.support/=float(nrep)
481
482 - def has_support(self,node=None):
483 """Returns True if any of the nodes has data.support != None.""" 484 for n in self._walk(node): 485 if self.node(n).data.support: 486 return True 487 else: 488 return False
489
490 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
491 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 492 493 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 494 Trees are bifurcating by default. (Polytomies not yet supported). 495 """ 496 497 if not ntax and taxon_list: 498 ntax=len(taxon_list) 499 elif not taxon_list and ntax: 500 taxon_list=['taxon'+str(i+1) for i in range(ntax)] 501 elif not ntax and not taxon_list: 502 raise TreeError('Either numer of taxa or list of taxa must be specified.') 503 elif ntax != len(taxon_list): 504 raise TreeError('Length of taxon list must correspond to ntax.') 505 # initiate self with empty root 506 self.__init__() 507 terminals=self.get_terminals() 508 # bifurcate randomly at terminal nodes until ntax is reached 509 while len(terminals)<ntax: 510 newsplit=random.choice(terminals) 511 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength) 512 # if desired, give some variation to the branch length 513 if branchlength_sd: 514 for nt in new_terminals: 515 bl=random.gauss(branchlength,branchlength_sd) 516 if bl<0: 517 bl=0 518 self.node(nt).data.branchlength=bl 519 terminals.extend(new_terminals) 520 terminals.remove(newsplit) 521 # distribute taxon labels randomly 522 random.shuffle(taxon_list) 523 for (node,name) in zip(terminals,taxon_list): 524 self.node(node).data.taxon=name
525
526 - def display(self):
527 """Quick and dirty lists of all nodes.""" 528 table=[('#','taxon','prev','succ','brlen','blen (sum)','support','comment')] 529 for i in self.all_ids(): 530 n=self.node(i) 531 if not n.data: 532 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-','-')) 533 else: 534 tx=n.data.taxon 535 if not tx: 536 tx='-' 537 blength=n.data.branchlength 538 if blength is None: 539 blength='-' 540 sum_blength='-' 541 else: 542 sum_blength=self.sum_branchlength(node=i) 543 support=n.data.support 544 if support is None: 545 support='-' 546 comment=n.data.comment 547 if comment is None: 548 comment='-' 549 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support,comment)) 550 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table]) 551 print '\nRoot: ',self.root
552
553 - def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False,ladderize=None):
554 """Return a paup compatible tree line. 555 556 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True) 557 """ 558 # if there's a conflict in the arguments, we override plain=True 559 if support_as_branchlengths or branchlengths_only: 560 plain=False 561 self.support_as_branchlengths=support_as_branchlengths 562 self.branchlengths_only=branchlengths_only 563 self.plain=plain 564 565 def make_info_string(data,terminal=False): 566 """Creates nicely formatted support/branchlengths.""" 567 # CHECK FORMATTING 568 if self.plain: # plain tree only. That's easy. 569 return '' 570 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 571 if terminal: # terminal branches have 100% support 572 return ':%1.2f' % self.max_support 573 else: 574 return ':%1.2f' % (data.support) 575 elif self.branchlengths_only: # write only branchlengths, ignore support 576 return ':%1.5f' % (data.branchlength) 577 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 578 if terminal: 579 return ':%1.5f' % (data.branchlength) 580 else: 581 if data.branchlength is not None and data.support is not None: # we have blen and suppport 582 return '%1.2f:%1.5f' % (data.support,data.branchlength) 583 elif data.branchlength is not None: # we have only blen 584 return '0.00000:%1.5f' % (data.branchlength) 585 elif data.support is not None: # we have only support 586 return '%1.2f:0.00000' % (data.support) 587 else: 588 return '0.00:0.00000'
589 def ladderize_nodes(nodes,ladderize=None): 590 """Sorts node numbers according to the number of terminal nodes.""" 591 if ladderize in ['left','LEFT','right','RIGHT']: 592 succnode_terminals=[(self.count_terminals(node=n),n) for n in nodes] 593 succnode_terminals.sort() 594 if (ladderize=='right' or ladderize=='RIGHT'): 595 succnode_terminals.reverse() 596 if succnode_terminals: 597 succnodes=zip(*succnode_terminals)[1] 598 else: 599 succnodes=[] 600 else: 601 succnodes=nodes 602 return succnodes
603 604 def newickize(node,ladderize=None): 605 """Convert a node tree to a newick tree recursively.""" 606 607 if not self.node(node).succ: #terminal 608 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True) 609 else: 610 succnodes=ladderize_nodes(self.node(node).succ,ladderize=ladderize) 611 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 612 return '(%s)%s' % (','.join(subtrees),make_info_string(self.node(node).data)) 613 614 treeline=['tree'] 615 if self.name: 616 treeline.append(self.name) 617 else: 618 treeline.append('a_tree') 619 treeline.append('=') 620 if self.weight != 1: 621 treeline.append('[&W%s]' % str(round(float(self.weight),3))) 622 if self.rooted: 623 treeline.append('[&R]') 624 succnodes=ladderize_nodes(self.node(self.root).succ) 625 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 626 treeline.append('(%s)' % ','.join(subtrees)) 627 if plain_newick: 628 return treeline[-1] 629 else: 630 return ' '.join(treeline)+';' 631
632 - def __str__(self):
633 """Short version of to_string(), gives plain tree""" 634 return self.to_string(plain=True)
635
636 - def unroot(self):
637 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 638 639 # travel down the rooted tree structure and save all branches and the nodes they connect 640 641 def _get_branches(node): 642 branches=[] 643 for b in self.node(node).succ: 644 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support]) 645 branches.extend(_get_branches(b)) 646 return branches
647 648 self.unrooted=_get_branches(self.root) 649 # if root is bifurcating, then it is eliminated 650 if len(self.node(self.root).succ)==2: 651 # find the two branches that connect to root 652 rootbranches=[b for b in self.unrooted if self.root in b[:2]] 653 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0])) 654 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1])) 655 # Connect them two each other. If both have support, it should be identical (or one set to None?). 656 # If both have branchlengths, they will be added 657 newbranch=[b1[1],b2[1],b1[2]+b2[2]] 658 if b1[3] is None: 659 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 660 elif b2[3] is None: 661 newbranch.append(b1[3]) # dito 662 elif b1[3]==b2[3]: 663 newbranch.append(b1[3]) # identical support 664 elif b1[3]==0 or b2[3]==0: 665 newbranch.append(b1[3]+b2[3]) # one is 0, take the other 666 else: 667 raise TreeError('Support mismatch in bifurcating root: %f, %f' \ 668 % (float(b1[3]),float(b2[3]))) 669 self.unrooted.append(newbranch) 670
671 - def root_with_outgroup(self,outgroup=None):
672 673 def _connect_subtree(parent,child): 674 """Hook subtree starting with node child to parent.""" 675 for i,branch in enumerate(self.unrooted): 676 if parent in branch[:2] and child in branch[:2]: 677 branch=self.unrooted.pop(i) 678 break 679 else: 680 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' \ 681 % (parent,child)) 682 self.link(parent,child) 683 self.node(child).data.branchlength=branch[2] 684 self.node(child).data.support=branch[3] 685 #now check if there are more branches connected to the child, and if so, connect them 686 child_branches=[b for b in self.unrooted if child in b[:2]] 687 for b in child_branches: 688 if child==b[0]: 689 succ=b[1] 690 else: 691 succ=b[0] 692 _connect_subtree(child,succ)
693 694 # check the outgroup we're supposed to root with 695 if outgroup is None: 696 return self.root 697 outgroup_node=self.is_monophyletic(outgroup) 698 if outgroup_node==-1: 699 return -1 700 # if tree is already rooted with outgroup on a bifurcating root, 701 # or the outgroup includes all taxa on the tree, then we're fine 702 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root: 703 return self.root 704 705 self.unroot() 706 # now we find the branch that connects outgroup and ingroup 707 #print self.node(outgroup_node).prev 708 for i,b in enumerate(self.unrooted): 709 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 710 root_branch=self.unrooted.pop(i) 711 break 712 else: 713 raise TreeError('Unrooted and rooted Tree do not match') 714 if outgroup_node==root_branch[1]: 715 ingroup_node=root_branch[0] 716 else: 717 ingroup_node=root_branch[1] 718 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 719 for n in self.all_ids(): 720 self.node(n).prev=None 721 self.node(n).succ=[] 722 # now we just add both subtrees (outgroup and ingroup) branch for branch 723 root=Nodes.Node(data=NodeData()) # new root 724 self.add(root) # add to tree description 725 self.root=root.id # set as root 726 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree 727 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree 728 _connect_subtree(root.id,ingroup_node) # add ingroup 729 _connect_subtree(root.id,outgroup_node) # add outgroup 730 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 731 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root] 732 if len(oldroot)>1: 733 raise TreeError('Isolated nodes in tree description: %s' \ 734 % ','.join(oldroot)) 735 elif len(oldroot)==1: 736 self.kill(oldroot[0]) 737 return self.root 738
739 - def merge_with_support(self,bstrees=None,constree=None,threshold=0.5,outgroup=None):
740 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 741 742 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 743 or 744 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 745 """ 746 747 if bstrees and constree: 748 raise TreeError('Specify either list of boostrap trees or consensus tree, not both') 749 if not (bstrees or constree): 750 raise TreeError('Specify either list of boostrap trees or consensus tree.') 751 # no outgroup specified: use the smallest clade of the root 752 if outgroup is None: 753 try: 754 succnodes=self.node(self.root).succ 755 smallest=min([(len(self.get_taxa(n)),n) for n in succnodes]) 756 outgroup=self.get_taxa(smallest[1]) 757 except: 758 raise TreeError("Error determining outgroup.") 759 else: # root with user specified outgroup 760 self.root_with_outgroup(outgroup) 761 762 if bstrees: # calculate consensus 763 constree=consensus(bstrees,threshold=threshold,outgroup=outgroup) 764 else: 765 if not constree.has_support(): 766 constree.branchlength2support() 767 constree.root_with_outgroup(outgroup) 768 # now we travel all nodes, and add support from consensus, if the clade is present in both 769 for pnode in self._walk(): 770 cnode=constree.is_monophyletic(self.get_taxa(pnode)) 771 if cnode>-1: 772 self.node(pnode).data.support=constree.node(cnode).data.support
773 774
775 -def consensus(trees, threshold=0.5,outgroup=None):
776 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 777 778 total=len(trees) 779 if total==0: 780 return None 781 # shouldn't we make sure that it's NodeData or subclass?? 782 dataclass=trees[0].dataclass 783 max_support=trees[0].max_support 784 clades={} 785 #countclades={} 786 alltaxa=set(trees[0].get_taxa()) 787 # calculate calde frequencies 788 c=0 789 for t in trees: 790 c+=1 791 #print c 792 #if c%1==0: 793 # print c 794 if alltaxa!=set(t.get_taxa()): 795 raise TreeError('Trees for consensus must contain the same taxa') 796 t.root_with_outgroup(outgroup=outgroup) 797 for st_node in t._walk(t.root): 798 subclade_taxa=t.get_taxa(st_node) 799 subclade_taxa.sort() 800 subclade_taxa=str(subclade_taxa) # lists are not hashable 801 if subclade_taxa in clades: 802 clades[subclade_taxa]+=float(t.weight)/total 803 else: 804 clades[subclade_taxa]=float(t.weight)/total 805 #if subclade_taxa in countclades: 806 # countclades[subclade_taxa]+=t.weight 807 #else: 808 # countclades[subclade_taxa]=t.weight 809 # weed out clades below threshold 810 for (c,p) in clades.items(): 811 if p<threshold: 812 del clades[c] 813 # create a tree with a root node 814 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass) 815 # each clade needs a node in the new tree, add them as isolated nodes 816 for (c,s) in clades.items(): 817 node=Nodes.Node(data=dataclass()) 818 node.data.support=s 819 node.data.taxon=set(eval(c)) 820 consensus.add(node) 821 # set root node data 822 consensus.node(consensus.root).data.support=None 823 consensus.node(consensus.root).data.taxon=alltaxa 824 # we sort the nodes by no. of taxa in the clade, so root will be the last 825 consensus_ids=consensus.all_ids() 826 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon)) 827 # now we just have to hook each node to the next smallest node that includes all taxa of the current 828 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 829 #print '----' 830 #print 'current: ',consensus.node(current).data.taxon 831 # search remaining nodes 832 for parent in consensus_ids[i+1:]: 833 #print 'parent: ',consensus.node(parent).data.taxon 834 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 835 break 836 else: 837 sys.exit('corrupt tree structure?') 838 # internal nodes don't have taxa 839 if len(consensus.node(current).data.taxon)==1: 840 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop() 841 # reset the support for terminal nodes to maximum 842 #consensus.node(current).data.support=max_support 843 else: 844 consensus.node(current).data.taxon=None 845 consensus.link(parent,current) 846 # eliminate root taxon name 847 consensus.node(consensus_ids[-1]).data.taxon=None 848 return consensus
849