1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 import sys, random, copy
18 import Nodes
19 try:
20
21 set = set
22 except NameError:
23
24 from sets import Set as set
25 from sets import ImmutableSet as frozenset
26
27 PRECISION_BRANCHLENGTH=6
28 PRECISION_SUPPORT=6
29 NODECOMMENT_START='[&'
30 NODECOMMENT_END=']'
31
33
35 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
36 - def __init__(self,taxon=None,branchlength=0.0,support=None,comment=None):
41
42 -class Tree(Nodes.Chain):
43 """Represents a tree using a chain of nodes with on predecessor (=ancestor)
44 and multiple successors (=subclades).
45 """
46
47
48
49
50
51
52
53
54
56 """Ntree(self,tree)."""
57 Nodes.Chain.__init__(self)
58 self.dataclass=data
59 self.__values_are_support=values_are_support
60 self.max_support=max_support
61 self.weight=weight
62 self.rooted=rooted
63 self.name=name
64 root=Nodes.Node(data())
65 self.add(root)
66 self.root=root.id
67 if tree:
68
69 tree=tree.strip().replace('\n','').replace('\r','')
70
71 tree=tree.rstrip(';')
72 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
73
75 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""
76
77
78
79 tree = tree.strip()
80 if tree.count('(')!=tree.count(')'):
81 raise TreeError('Parentheses do not match in (sub)tree: '+tree)
82 if tree.count('(')==0:
83
84 nodecomment=tree.find(NODECOMMENT_START)
85 colon=tree.find(':')
86 if colon==-1 and nodecomment==-1:
87 return [tree,[None]]
88 elif colon==-1 and nodecomment>-1:
89 return [tree[:nodecomment],self._get_values(tree[nodecomment:])]
90 elif colon>-1 and nodecomment==-1:
91 return [tree[:colon],self._get_values(tree[colon+1:])]
92 elif colon < nodecomment:
93 return [tree[:colon],self._get_values(tree[colon+1:])]
94 else:
95 return [tree[:nodecomment],self._get_values(tree[nodecomment:])]
96 else:
97 closing=tree.rfind(')')
98 val=self._get_values(tree[closing+1:])
99 if not val:
100 val=[None]
101 subtrees=[]
102 plevel=0
103 prev=1
104 for p in range(1,closing):
105 if tree[p]=='(':
106 plevel+=1
107 elif tree[p]==')':
108 plevel-=1
109 elif tree[p]==',' and plevel==0:
110 subtrees.append(tree[prev:p])
111 prev=p+1
112 subtrees.append(tree[prev:closing])
113 subclades=[self._parse(subtree) for subtree in subtrees]
114 return [subclades,val]
115
117 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree)."""
118
119 if parent_id is None:
120 raise TreeError('Need node_id to connect to.')
121 for st in tree:
122 if type(st[0])==list:
123 nd=self.dataclass()
124 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START):
125 nd.comment=st[1].pop(-1)
126 if len(st[1])>=2:
127 nd.support=st[1][0]
128 if st[1][1] is not None:
129 nd.branchlength=st[1][1]
130 elif len(st[1])==1:
131 if not self.__values_are_support:
132 if st[1][0] is not None:
133 nd.branchlength=st[1][0]
134 else:
135 nd.support=st[1][0]
136 sn=Nodes.Node(nd)
137 self.add(sn,parent_id)
138 self._add_subtree(sn.id,st[0])
139 else:
140 nd=self.dataclass()
141 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START):
142 nd.comment=st[1].pop(-1)
143 nd.taxon=st[0]
144 if len(st)>1:
145 if len(st[1])>=2:
146 nd.support=st[1][0]
147 if st[1][1] is not None:
148 nd.branchlength=st[1][1]
149 elif len(st[1])==1:
150 if not self.__values_are_support:
151 if st[1][0] is not None:
152 nd.branchlength=st[1][0]
153 else:
154 nd.support=st[1][0]
155 leaf=Nodes.Node(nd)
156 self.add(leaf,parent_id)
157
176
177 - def _walk(self,node=None):
178 """Return all node_ids downwards from a node."""
179
180 if node is None:
181 node=self.root
182 for n in self.node(node).succ:
183 yield n
184 for sn in self._walk(n):
185 yield sn
186
187 - def node(self,node_id):
188 """Return the instance of node_id.
189
190 node = node(self,node_id)
191 """
192 if node_id not in self.chain:
193 raise TreeError('Unknown node_id: %d' % node_id)
194 return self.chain[node_id]
195
196 - def split(self,parent_id=None,n=2,branchlength=1.0):
197 """Speciation: generates n (default two) descendants of a node.
198
199 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0):
200 """
201 if parent_id is None:
202 raise TreeError('Missing node_id.')
203 ids=[]
204 parent_data=self.chain[parent_id].data
205 for i in range(n):
206 node=Nodes.Node()
207 if parent_data:
208 node.data=self.dataclass()
209
210 if parent_data.taxon:
211 node.data.taxon=parent_data.taxon+str(i)
212 node.data.branchlength=branchlength
213 ids.append(self.add(node,parent_id))
214 return ids
215
217 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
218
219 node_id = search_taxon(self,taxon)
220 """
221 for id,node in self.chain.items():
222 if node.data.taxon==taxon:
223 return id
224 return None
225
227 """Prunes a terminal taxon from the tree.
228
229 id_of_previous_node = prune(self,taxon)
230 If taxon is from a bifurcation, the connectiong node will be collapsed
231 and its branchlength added to remaining terminal node. This might be no
232 longer a meaningful value'
233 """
234
235 id=self.search_taxon(taxon)
236 if id is None:
237 raise TreeError('Taxon not found: %s' % taxon)
238 elif id not in self.get_terminals():
239 raise TreeError('Not a terminal taxon: %s' % taxon)
240 else:
241 prev=self.unlink(id)
242 self.kill(id)
243 if len(self.node(prev).succ)==1:
244 if prev==self.root:
245 self.root=self.node(self.root).succ[0]
246 self.node(self.root).branchlength=0.0
247 self.kill(prev)
248 else:
249 succ=self.node(prev).succ[0]
250 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength
251 self.collapse(prev)
252 self.node(succ).data.branchlength=new_bl
253 return prev
254
256 """Return a list of all otus downwards from a node (self, node_id).
257
258 nodes = get_taxa(self,node_id=None)
259 """
260
261 if node_id is None:
262 node_id=self.root
263 if node_id not in self.chain:
264 raise TreeError('Unknown node_id: %d.' % node_id)
265 if self.chain[node_id].succ==[]:
266 if self.chain[node_id].data:
267 return [self.chain[node_id].data.taxon]
268 else:
269 return None
270 else:
271 list=[]
272 for succ in self.chain[node_id].succ:
273 list.extend(self.get_taxa(succ))
274 return list
275
277 """Return a list of all terminal nodes."""
278 return [i for i in self.all_ids() if self.node(i).succ==[]]
279
281 """Returns True if node is a terminal node."""
282 return self.node(node).succ==[]
283
285 """Returns True if node is an internal node."""
286 return len(self.node(node).succ)>0
287
295 """Counts the number of terminal nodes that are attached to a node."""
296 if node is None:
297 node=self.root
298 return len([n for n in self._walk(node) if self.is_terminal(n)])
299
301 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name."""
302
303 while True:
304 for n in self._walk():
305 if self.is_terminal(n):
306 continue
307 taxa=self.get_taxa(n)
308 genera=[]
309 for t in taxa:
310 if space_equals_underscore:
311 t=t.replace(' ','_')
312 try:
313 genus=t.split('_',1)[0]
314 except:
315 genus='None'
316 if genus not in genera:
317 genera.append(genus)
318 if len(genera)==1:
319 self.node(n).data.taxon=genera[0]+' <collapsed>'
320
321 nodes2kill=[kn for kn in self._walk(node=n)]
322 for kn in nodes2kill:
323 self.kill(kn)
324 self.node(n).succ=[]
325 break
326 else:
327 break
328
329
331 """Adds up the branchlengths from root (default self.root) to node.
332
333 sum = sum_branchlength(self,root=None,node=None)
334 """
335
336 if root is None:
337 root=self.root
338 if node is None:
339 raise TreeError('Missing node id.')
340 blen=0.0
341 while node is not None and node is not root:
342 blen+=self.node(node).data.branchlength
343 node=self.node(node).prev
344 return blen
345
347 """Return subtree as a set of nested sets.
348
349 sets = set_subtree(self,node)
350 """
351
352 if self.node(node).succ==[]:
353 return self.node(node).data.taxon
354 else:
355 try:
356 return frozenset([self.set_subtree(n) for n in self.node(node).succ])
357 except:
358 print node
359 print self.node(node).succ
360 for n in self.node(node).succ:
361 print n, self.set_subtree(n)
362 print [self.set_subtree(n) for n in self.node(node).succ]
363 raise
364
366 """Compare tree and tree2 for identity.
367
368 result = is_identical(self,tree2)
369 """
370 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
371
373 """Compares branches with support>threshold for compatibility.
374
375 result = is_compatible(self,tree2,threshold)
376 """
377
378
379 missing2=set(self.get_taxa())-set(tree2.get_taxa())
380 missing1=set(tree2.get_taxa())-set(self.get_taxa())
381 if strict and (missing1 or missing2):
382 if missing1:
383 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name)
384 if missing2:
385 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name)
386 raise TreeError('Can\'t compare trees with different taxon compositions.')
387 t1=[(set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \
388 self.node(n).succ and\
389 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)]
390 t2=[(set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \
391 tree2.node(n).succ and\
392 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)]
393 conflict=[]
394 for (st1,sup1) in t1:
395 for (st2,sup2) in t2:
396 if not st1.issubset(st2) and not st2.issubset(st1):
397 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2
398
399 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)):
400 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2))
401 return conflict
402
404 """Return the common ancestor that connects two nodes.
405
406 node_id = common_ancestor(self,node1,node2)
407 """
408
409 l1=[self.root]+self.trace(self.root,node1)
410 l2=[self.root]+self.trace(self.root,node2)
411 return [n for n in l1 if n in l2][-1]
412
413
415 """Add and return the sum of the branchlengths between two nodes.
416 dist = distance(self,node1,node2)
417 """
418
419 ca=self.common_ancestor(node1,node2)
420 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
421
423 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
424
425 result = is_monophyletic(self,taxon_list)
426 """
427 if isinstance(taxon_list,str):
428 taxon_set=set([taxon_list])
429 else:
430 taxon_set=set(taxon_list)
431 node_id=self.root
432 while 1:
433 subclade_taxa=set(self.get_taxa(node_id))
434 if subclade_taxa==taxon_set:
435 return node_id
436 else:
437 for subnode in self.chain[node_id].succ:
438 if set(self.get_taxa(subnode)).issuperset(taxon_set):
439 node_id=subnode
440 break
441 else:
442 return -1
443
458
459
460
462 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0
463
464 This is necessary when support has been stored as branchlength (e.g. paup), and has thus
465 been read in as branchlength.
466 """
467
468 for n in self.chain.keys():
469 self.node(n).data.support=self.node(n).data.branchlength
470 self.node(n).data.branchlength=0.0
471
473 """Convert absolute support (clade-count) to rel. frequencies.
474
475 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
476 calculating relative frequencies."""
477
478 for n in self._walk():
479 if self.node(n).data.support:
480 self.node(n).data.support/=float(nrep)
481
483 """Returns True if any of the nodes has data.support != None."""
484 for n in self._walk(node):
485 if self.node(n).data.support:
486 return True
487 else:
488 return False
489
490 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
491 """Generates a random tree with ntax taxa and/or taxa from taxlabels.
492
493 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True)
494 Trees are bifurcating by default. (Polytomies not yet supported).
495 """
496
497 if not ntax and taxon_list:
498 ntax=len(taxon_list)
499 elif not taxon_list and ntax:
500 taxon_list=['taxon'+str(i+1) for i in range(ntax)]
501 elif not ntax and not taxon_list:
502 raise TreeError('Either numer of taxa or list of taxa must be specified.')
503 elif ntax != len(taxon_list):
504 raise TreeError('Length of taxon list must correspond to ntax.')
505
506 self.__init__()
507 terminals=self.get_terminals()
508
509 while len(terminals)<ntax:
510 newsplit=random.choice(terminals)
511 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength)
512
513 if branchlength_sd:
514 for nt in new_terminals:
515 bl=random.gauss(branchlength,branchlength_sd)
516 if bl<0:
517 bl=0
518 self.node(nt).data.branchlength=bl
519 terminals.extend(new_terminals)
520 terminals.remove(newsplit)
521
522 random.shuffle(taxon_list)
523 for (node,name) in zip(terminals,taxon_list):
524 self.node(node).data.taxon=name
525
527 """Quick and dirty lists of all nodes."""
528 table=[('#','taxon','prev','succ','brlen','blen (sum)','support','comment')]
529 for i in self.all_ids():
530 n=self.node(i)
531 if not n.data:
532 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-','-'))
533 else:
534 tx=n.data.taxon
535 if not tx:
536 tx='-'
537 blength=n.data.branchlength
538 if blength is None:
539 blength='-'
540 sum_blength='-'
541 else:
542 sum_blength=self.sum_branchlength(node=i)
543 support=n.data.support
544 if support is None:
545 support='-'
546 comment=n.data.comment
547 if comment is None:
548 comment='-'
549 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support,comment))
550 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table])
551 print '\nRoot: ',self.root
552
554 """Return a paup compatible tree line.
555
556 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True)
557 """
558
559 if support_as_branchlengths or branchlengths_only:
560 plain=False
561 self.support_as_branchlengths=support_as_branchlengths
562 self.branchlengths_only=branchlengths_only
563 self.plain=plain
564
565 def make_info_string(data,terminal=False):
566 """Creates nicely formatted support/branchlengths."""
567
568 if self.plain:
569 return ''
570 elif self.support_as_branchlengths:
571 if terminal:
572 return ':%1.2f' % self.max_support
573 else:
574 return ':%1.2f' % (data.support)
575 elif self.branchlengths_only:
576 return ':%1.5f' % (data.branchlength)
577 else:
578 if terminal:
579 return ':%1.5f' % (data.branchlength)
580 else:
581 if data.branchlength is not None and data.support is not None:
582 return '%1.2f:%1.5f' % (data.support,data.branchlength)
583 elif data.branchlength is not None:
584 return '0.00000:%1.5f' % (data.branchlength)
585 elif data.support is not None:
586 return '%1.2f:0.00000' % (data.support)
587 else:
588 return '0.00:0.00000'
589 def ladderize_nodes(nodes,ladderize=None):
590 """Sorts node numbers according to the number of terminal nodes."""
591 if ladderize in ['left','LEFT','right','RIGHT']:
592 succnode_terminals=[(self.count_terminals(node=n),n) for n in nodes]
593 succnode_terminals.sort()
594 if (ladderize=='right' or ladderize=='RIGHT'):
595 succnode_terminals.reverse()
596 if succnode_terminals:
597 succnodes=zip(*succnode_terminals)[1]
598 else:
599 succnodes=[]
600 else:
601 succnodes=nodes
602 return succnodes
603
604 def newickize(node,ladderize=None):
605 """Convert a node tree to a newick tree recursively."""
606
607 if not self.node(node).succ:
608 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True)
609 else:
610 succnodes=ladderize_nodes(self.node(node).succ,ladderize=ladderize)
611 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes]
612 return '(%s)%s' % (','.join(subtrees),make_info_string(self.node(node).data))
613
614 treeline=['tree']
615 if self.name:
616 treeline.append(self.name)
617 else:
618 treeline.append('a_tree')
619 treeline.append('=')
620 if self.weight != 1:
621 treeline.append('[&W%s]' % str(round(float(self.weight),3)))
622 if self.rooted:
623 treeline.append('[&R]')
624 succnodes=ladderize_nodes(self.node(self.root).succ)
625 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes]
626 treeline.append('(%s)' % ','.join(subtrees))
627 if plain_newick:
628 return treeline[-1]
629 else:
630 return ' '.join(treeline)+';'
631
633 """Short version of to_string(), gives plain tree"""
634 return self.to_string(plain=True)
635
637 """Defines a unrooted Tree structure, using data of a rooted Tree."""
638
639
640
641 def _get_branches(node):
642 branches=[]
643 for b in self.node(node).succ:
644 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support])
645 branches.extend(_get_branches(b))
646 return branches
647
648 self.unrooted=_get_branches(self.root)
649
650 if len(self.node(self.root).succ)==2:
651
652 rootbranches=[b for b in self.unrooted if self.root in b[:2]]
653 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0]))
654 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1]))
655
656
657 newbranch=[b1[1],b2[1],b1[2]+b2[2]]
658 if b1[3] is None:
659 newbranch.append(b2[3])
660 elif b2[3] is None:
661 newbranch.append(b1[3])
662 elif b1[3]==b2[3]:
663 newbranch.append(b1[3])
664 elif b1[3]==0 or b2[3]==0:
665 newbranch.append(b1[3]+b2[3])
666 else:
667 raise TreeError('Support mismatch in bifurcating root: %f, %f' \
668 % (float(b1[3]),float(b2[3])))
669 self.unrooted.append(newbranch)
670
672
673 def _connect_subtree(parent,child):
674 """Hook subtree starting with node child to parent."""
675 for i,branch in enumerate(self.unrooted):
676 if parent in branch[:2] and child in branch[:2]:
677 branch=self.unrooted.pop(i)
678 break
679 else:
680 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' \
681 % (parent,child))
682 self.link(parent,child)
683 self.node(child).data.branchlength=branch[2]
684 self.node(child).data.support=branch[3]
685
686 child_branches=[b for b in self.unrooted if child in b[:2]]
687 for b in child_branches:
688 if child==b[0]:
689 succ=b[1]
690 else:
691 succ=b[0]
692 _connect_subtree(child,succ)
693
694
695 if outgroup is None:
696 return self.root
697 outgroup_node=self.is_monophyletic(outgroup)
698 if outgroup_node==-1:
699 return -1
700
701
702 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root:
703 return self.root
704
705 self.unroot()
706
707
708 for i,b in enumerate(self.unrooted):
709 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]:
710 root_branch=self.unrooted.pop(i)
711 break
712 else:
713 raise TreeError('Unrooted and rooted Tree do not match')
714 if outgroup_node==root_branch[1]:
715 ingroup_node=root_branch[0]
716 else:
717 ingroup_node=root_branch[1]
718
719 for n in self.all_ids():
720 self.node(n).prev=None
721 self.node(n).succ=[]
722
723 root=Nodes.Node(data=NodeData())
724 self.add(root)
725 self.root=root.id
726 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]])
727 self.unrooted.append([root.id,outgroup_node,0.0,0.0])
728 _connect_subtree(root.id,ingroup_node)
729 _connect_subtree(root.id,outgroup_node)
730
731 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root]
732 if len(oldroot)>1:
733 raise TreeError('Isolated nodes in tree description: %s' \
734 % ','.join(oldroot))
735 elif len(oldroot)==1:
736 self.kill(oldroot[0])
737 return self.root
738
740 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny.
741
742 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>)
743 or
744 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support)
745 """
746
747 if bstrees and constree:
748 raise TreeError('Specify either list of boostrap trees or consensus tree, not both')
749 if not (bstrees or constree):
750 raise TreeError('Specify either list of boostrap trees or consensus tree.')
751
752 if outgroup is None:
753 try:
754 succnodes=self.node(self.root).succ
755 smallest=min([(len(self.get_taxa(n)),n) for n in succnodes])
756 outgroup=self.get_taxa(smallest[1])
757 except:
758 raise TreeError("Error determining outgroup.")
759 else:
760 self.root_with_outgroup(outgroup)
761
762 if bstrees:
763 constree=consensus(bstrees,threshold=threshold,outgroup=outgroup)
764 else:
765 if not constree.has_support():
766 constree.branchlength2support()
767 constree.root_with_outgroup(outgroup)
768
769 for pnode in self._walk():
770 cnode=constree.is_monophyletic(self.get_taxa(pnode))
771 if cnode>-1:
772 self.node(pnode).data.support=constree.node(cnode).data.support
773
774
775 -def consensus(trees, threshold=0.5,outgroup=None):
849