1
2
3
4
5
6
7
8
9 """Represent a Sequence Feature holding info about a part of a sequence.
10
11 This is heavily modeled after the Biocorba SeqFeature objects, and
12 may be pretty biased towards GenBank stuff since I'm writing it
13 for the GenBank parser output...
14
15 What's here:
16
17 Base class to hold a Feature.
18 ----------------------------
19 classes:
20 o SeqFeature
21
22 Hold information about a Reference.
23 ----------------------------------
24
25 This is an attempt to create a General class to hold Reference type
26 information.
27
28 classes:
29 o Reference
30
31 Specify locations of a feature on a Sequence.
32 ---------------------------------------------
33
34 This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in
35 much the same way as Biocorba. This has the advantages of allowing us
36 to handle fuzzy stuff in case anyone needs it, and also be compatible
37 with Biocorba.
38
39 classes:
40 o FeatureLocation - Specify the start and end location of a feature.
41
42 o ExactPosition - Specify the position as being exact.
43 o WithinPosition - Specify a position occuring within some range.
44 o BetweenPosition - Specify a position occuring between a range (OBSOLETE?).
45 o BeforePosition - Specify the position as being found before some base.
46 o AfterPosition - Specify the position as being found after some base.
47 o OneOfPosition - Specify a position where the location can be multiple positions.
48 """
49
50 from Bio.Seq import MutableSeq, reverse_complement
51
53 """Represent a Sequence Feature on an object.
54
55 Attributes:
56 o location - the location of the feature on the sequence (FeatureLocation)
57 o type - the specified type of the feature (ie. CDS, exon, repeat...)
58 o location_operator - a string specifying how this SeqFeature may
59 be related to others. For example, in the example GenBank feature
60 shown below, the location_operator would be "join"
61 o strand - A value specifying on which strand (of a DNA sequence, for
62 instance) the feature deals with. 1 indicates the plus strand, -1
63 indicates the minus strand, 0 indicates both strands, and None indicates
64 that strand doesn't apply (ie. for proteins) or is not known.
65 o id - A string identifier for the feature.
66 o ref - A reference to another sequence. This could be an accession
67 number for some different sequence.
68 o ref_db - A different database for the reference accession number.
69 o qualifiers - A dictionary of qualifiers on the feature. These are
70 analagous to the qualifiers from a GenBank feature table. The keys of
71 the dictionary are qualifier names, the values are the qualifier
72 values.
73 o sub_features - Additional SeqFeatures which fall under this 'parent'
74 feature. For instance, if we having something like:
75
76 CDS join(1..10,30..40,50..60)
77
78 Then the top level feature would be of type 'CDS' from 1 to 60 (actually 0
79 to 60 in Python counting) with location_operator='join', and the three sub-
80 features would also be of type 'CDS', and would be from 1 to 10, 30 to
81 40 and 50 to 60, respectively (although actually using Python counting).
82
83 To get the nucleotide sequence for this CDS, you would need to take the
84 parent sequence and do seq[0:10]+seq[29:40]+seq[49:60] (Python counting).
85 Things are more complicated with strands and fuzzy positions. To save you
86 dealing with all these special cases, the SeqFeature provides an extract
87 method to do this for you.
88 """
89 - def __init__(self, location = None, type = '', location_operator = '',
90 strand = None, id = "<unknown id>",
91 qualifiers = None, sub_features = None,
92 ref = None, ref_db = None):
93 """Initialize a SeqFeature on a Sequence.
94
95 location can either be a FeatureLocation (with strand argument also
96 given if required), or None.
97
98 e.g. With no strand, on the forward strand, and on the reverse strand:
99
100 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
101 >>> f1 = SeqFeature(FeatureLocation(5,10), type="domain")
102 >>> f2 = SeqFeature(FeatureLocation(7,110), strand=1, type="CDS")
103 >>> f3 = SeqFeature(FeatureLocation(9,108), strand=-1, type="CDS")
104
105 An invalid strand will trigger an exception:
106
107 >>> f4 = SeqFeature(FeatureLocation(50,60), strand=2)
108 Traceback (most recent call last):
109 ...
110 ValueError: Strand should be +1, -1, 0 or None, not 2
111
112 For exact start/end positions, an integer can be used (as shown above)
113 as shorthand for the ExactPosition object. For non-exact locations, the
114 FeatureLocation must be specified via the appropriate position objects.
115 """
116 if strand not in [-1, 0, 1, None] :
117 raise ValueError("Strand should be +1, -1, 0 or None, not %s" \
118 % repr(strand))
119 if location is not None and not isinstance(location, FeatureLocation):
120 raise TypeError("FeatureLocation (or None) required for the location")
121 self.location = location
122
123 self.type = type
124 self.location_operator = location_operator
125 self.strand = strand
126 self.id = id
127 if qualifiers is None:
128 qualifiers = {}
129 self.qualifiers = qualifiers
130 if sub_features is None:
131 sub_features = []
132 self.sub_features = sub_features
133 self.ref = ref
134 self.ref_db = ref_db
135
137 """A string representation of the record for debugging."""
138 answer = "%s(%s" % (self.__class__.__name__, repr(self.location))
139 if self.type:
140 answer += ", type=%s" % repr(self.type)
141 if self.location_operator:
142 answer += ", location_operator=%s" % repr(self.location_operator)
143 if self.strand:
144 answer += ", strand=%s" % repr(self.strand)
145 if self.id and self.id != "<unknown id>":
146 answer += ", id=%s" % repr(self.id)
147 if self.ref:
148 answer += ", ref=%s" % repr(self.ref)
149 if self.ref_db:
150 answer += ", ref_db=%s" % repr(self.ref_db)
151 answer += ")"
152 return answer
153
155 """A readable summary of the feature intended to be printed to screen.
156 """
157 out = "type: %s\n" % self.type
158 out += "location: %s\n" % self.location
159 if self.id and self.id != "<unknown id>":
160 out += "id: %s\n" % self.id
161 if self.ref or self.ref_db:
162 out += "ref: %s:%s\n" % (self.ref, self.ref_db)
163 out += "strand: %s\n" % self.strand
164 out += "qualifiers: \n"
165 for qual_key in sorted(self.qualifiers):
166 out += " Key: %s, Value: %s\n" % (qual_key,
167 self.qualifiers[qual_key])
168 if len(self.sub_features) != 0:
169 out += "Sub-Features\n"
170 for sub_feature in self.sub_features:
171 out +="%s\n" % sub_feature
172 return out
173
175 """Returns a copy of the feature with its location shifted (PRIVATE).
176
177 The annotation qaulifiers are copied."""
178 return SeqFeature(location = self.location._shift(offset),
179 type = self.type,
180 location_operator = self.location_operator,
181 strand = self.strand,
182 id = self.id,
183 qualifiers = dict(self.qualifiers.iteritems()),
184 sub_features = [f._shift(offset) for f in self.sub_features],
185 ref = self.ref,
186 ref_db = self.ref_db)
187
189 """Extract feature sequence from the supplied parent sequence.
190
191 The parent_sequence can be a Seq like object or a string, and will
192 generally return an object of the same type. The exception to this is
193 a MutableSeq as the parent sequence will return a Seq object.
194
195 This should cope with complex locations including complements, joins
196 and fuzzy positions. Even mixed strand features should work! This
197 also covers features on protein sequences (e.g. domains), although
198 here reverse strand features are not permitted.
199
200 >>> from Bio.Seq import Seq
201 >>> from Bio.Alphabet import generic_protein
202 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
203 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein)
204 >>> f = SeqFeature(FeatureLocation(8,15), type="domain")
205 >>> f.extract(seq)
206 Seq('VALIVIC', ProteinAlphabet())
207
208 Note - currently only sub-features of type "join" are supported.
209 """
210 if isinstance(parent_sequence, MutableSeq):
211
212
213 parent_sequence = parent_sequence.toseq()
214 if self.sub_features:
215 if self.location_operator!="join":
216 raise ValueError(self.location_operator)
217 if self.strand == -1:
218
219
220 parts = []
221 for f_sub in self.sub_features:
222 assert f_sub.strand==-1
223 parts.append(parent_sequence[f_sub.location.nofuzzy_start:\
224 f_sub.location.nofuzzy_end])
225 else:
226
227 parts = [f_sub.extract(parent_sequence) \
228 for f_sub in self.sub_features]
229
230 f_seq = parts[0]
231 for part in parts[1:] : f_seq += part
232 else:
233 f_seq = parent_sequence[self.location.nofuzzy_start:\
234 self.location.nofuzzy_end]
235 if self.strand == -1:
236
237 try:
238 f_seq = f_seq.reverse_complement()
239 except AttributeError:
240 assert isinstance(f_seq, str)
241 f_seq = reverse_complement(f_seq)
242 return f_seq
243
245 """Returns True regardless of the length of the feature.
246
247 This behaviour is for backwards compatibility, since until the
248 __len__ method was added, a SeqFeature always evaluated as True.
249
250 Note that in comparison, Seq objects, strings, lists, etc, will all
251 evaluate to False if they have length zero.
252
253 WARNING: The SeqFeature may in future evaluate to False when its
254 length is zero (in order to better match normal python behaviour)!
255 """
256 return True
257
259 """Returns the length of the region described by a feature.
260
261 >>> from Bio.Seq import Seq
262 >>> from Bio.Alphabet import generic_protein
263 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
264 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein)
265 >>> f = SeqFeature(FeatureLocation(8,15), type="domain")
266 >>> len(f)
267 7
268 >>> f.extract(seq)
269 Seq('VALIVIC', ProteinAlphabet())
270 >>> len(f.extract(seq))
271 7
272
273 For simple features without subfeatures this is the same as the region
274 spanned (end position minus start position). However, for a feature
275 defined by combining several subfeatures (e.g. a CDS as the join of
276 several exons) the gaps are not counted (e.g. introns). This ensures
277 that len(f) == len(f.extract(parent_seq)), and also makes sure things
278 work properly with features wrapping the origin etc.
279 """
280 if self.sub_features:
281 return sum(len(f) for f in self.sub_features)
282 else:
283 return len(self.location)
284
286 """Iterate over the parent positions within the feature.
287
288 The iteration order is strand aware, and can be thought of as moving
289 along the feature using the parent sequence coordinates:
290
291 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
292 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1)
293 >>> len(f)
294 5
295 >>> for i in f: print i
296 9
297 8
298 7
299 6
300 5
301 >>> list(f)
302 [9, 8, 7, 6, 5]
303 """
304 if self.sub_features:
305 if self.strand == -1:
306 for f in self.sub_features[::-1]:
307 for i in f.location:
308 yield i
309 else:
310 for f in self.sub_features:
311 for i in f.location:
312 yield i
313 elif self.strand == -1:
314 for i in range(self.location.nofuzzy_end-1,
315 self.location.nofuzzy_start-1, -1):
316 yield i
317 else:
318 for i in range(self.location.nofuzzy_start,
319 self.location.nofuzzy_end):
320 yield i
321
323 """Check if an integer position is within the feature.
324
325 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
326 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1)
327 >>> len(f)
328 5
329 >>> [i for i in range(15) if i in f]
330 [5, 6, 7, 8, 9]
331
332 For example, to see which features include a SNP position, you could
333 use this:
334
335 >>> from Bio import SeqIO
336 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb")
337 >>> for f in record.features:
338 ... if 1750 in f:
339 ... print f.type, f.strand, f.location
340 source 1 [0:154478]
341 gene -1 [1716:4347]
342 tRNA -1 [1716:4347]
343
344 Note that for a feature defined as a join of several subfeatures (e.g.
345 the union of several exons) the gaps are not checked (e.g. introns).
346 In this example, the tRNA location is defined in the GenBank file as
347 complement(join(1717..1751,4311..4347)), so that position 1760 falls
348 in the gap:
349
350 >>> for f in record.features:
351 ... if 1760 in f:
352 ... print f.type, f.strand, f.location
353 source 1 [0:154478]
354 gene -1 [1716:4347]
355
356 Note that additional care may be required with fuzzy locations, for
357 example just before a BeforePosition:
358
359 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
360 >>> from Bio.SeqFeature import BeforePosition
361 >>> f = SeqFeature(FeatureLocation(BeforePosition(3),8), type="domain")
362 >>> len(f)
363 5
364 >>> [i for i in range(10) if i in f]
365 [3, 4, 5, 6, 7]
366 """
367 if not isinstance(value, int):
368 raise ValueError("Currently we only support checking for integer "
369 "positions being within a SeqFeature.")
370 if self.sub_features:
371 for f in self.sub_features:
372 if value in f:
373 return True
374 return False
375 else:
376 return value in self.location
377
378
379
380
382 """Represent a Generic Reference object.
383
384 Attributes:
385 o location - A list of Location objects specifying regions of
386 the sequence that the references correspond to. If no locations are
387 specified, the entire sequence is assumed.
388 o authors - A big old string, or a list split by author, of authors
389 for the reference.
390 o title - The title of the reference.
391 o journal - Journal the reference was published in.
392 o medline_id - A medline reference for the article.
393 o pubmed_id - A pubmed reference for the article.
394 o comment - A place to stick any comments about the reference.
395 """
405
407 """Output an informative string for debugging.
408 """
409 out = ""
410 for single_location in self.location:
411 out += "location: %s\n" % single_location
412 out += "authors: %s\n" % self.authors
413 if self.consrtm:
414 out += "consrtm: %s\n" % self.consrtm
415 out += "title: %s\n" % self.title
416 out += "journal: %s\n" % self.journal
417 out += "medline id: %s\n" % self.medline_id
418 out += "pubmed id: %s\n" % self.pubmed_id
419 out += "comment: %s\n" % self.comment
420 return out
421
423
424 return "%s(title=%s, ...)" % (self.__class__.__name__,
425 repr(self.title))
426
427
428
430 """Specify the location of a feature along a sequence.
431
432 This attempts to deal with fuzziness of position ends, but also
433 make it easy to get the start and end in the 'normal' case (no
434 fuzziness).
435
436 You should access the start and end attributes with
437 your_location.start and your_location.end. If the start and
438 end are exact, this will return the positions, if not, we'll return
439 the approriate Fuzzy class with info about the position and fuzziness.
440
441 Note that the start and end location numbering follow Python's scheme,
442 thus a GenBank entry of 123..150 (one based counting) becomes a location
443 of [122:150] (zero based counting).
444 """
446 """Specify the start and end of a sequence feature.
447
448 start and end arguments specify the values where the feature begins
449 and ends. These can either by any of the *Position objects that
450 inherit from AbstractPosition, or can just be integers specifying the
451 position. In the case of integers, the values are assumed to be
452 exact and are converted in ExactPosition arguments. This is meant
453 to make it easy to deal with non-fuzzy ends.
454
455 i.e. Short form:
456
457 >>> from Bio.SeqFeature import FeatureLocation
458 >>> loc = FeatureLocation(5,10)
459
460 Explicit form:
461
462 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition
463 >>> loc = FeatureLocation(ExactPosition(5),ExactPosition(10))
464
465 Other fuzzy positions are used similarly,
466
467 >>> from Bio.SeqFeature import FeatureLocation
468 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
469 >>> loc2 = FeatureLocation(BeforePosition(5),AfterPosition(10))
470
471 """
472 if isinstance(start, AbstractPosition):
473 self._start = start
474 else:
475 self._start = ExactPosition(start)
476
477 if isinstance(end, AbstractPosition):
478 self._end = end
479 else:
480 self._end = ExactPosition(end)
481
483 """Returns a representation of the location (with python counting).
484
485 For the simple case this uses the python splicing syntax, [122:150]
486 (zero based counting) which GenBank would call 123..150 (one based
487 counting).
488 """
489 return "[%s:%s]" % (self._start, self._end)
490
492 """A string representation of the location for debugging."""
493 return "%s(%s,%s)" \
494 % (self.__class__.__name__, repr(self.start), repr(self.end))
495
497 """Returns True regardless of the length of the feature.
498
499 This behaviour is for backwards compatibility, since until the
500 __len__ method was added, a FeatureLocation always evaluated as True.
501
502 Note that in comparison, Seq objects, strings, lists, etc, will all
503 evaluate to False if they have length zero.
504
505 WARNING: The FeatureLocation may in future evaluate to False when its
506 length is zero (in order to better match normal python behaviour)!
507 """
508 return True
509
511 """Returns the length of the region described by the FeatureLocation.
512
513 Note that extra care may be needed for fuzzy locations, e.g.
514
515 >>> from Bio.SeqFeature import FeatureLocation
516 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
517 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
518 >>> len(loc)
519 5
520 """
521
522 return self._end.position + self._end.extension - self._start.position
523
525 """Check if an integer position is within the FeatureLocation.
526
527 Note that extra care may be needed for fuzzy locations, e.g.
528
529 >>> from Bio.SeqFeature import FeatureLocation
530 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
531 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
532 >>> len(loc)
533 5
534 >>> [i for i in range(15) if i in loc]
535 [5, 6, 7, 8, 9]
536 """
537 if not isinstance(value, int):
538 raise ValueError("Currently we only support checking for integer "
539 "positions being within a FeatureLocation.")
540
541 if value < self._start.position \
542 or value >= self._end.position + self._end.extension:
543 return False
544 else:
545 return True
546
548 """Iterate over the parent positions within the FeatureLocation.
549
550 >>> from Bio.SeqFeature import FeatureLocation
551 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
552 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
553 >>> len(loc)
554 5
555 >>> for i in loc: print i
556 5
557 6
558 7
559 8
560 9
561 >>> list(loc)
562 [5, 6, 7, 8, 9]
563 >>> [i for i in range(15) if i in loc]
564 [5, 6, 7, 8, 9]
565 """
566
567 for i in range(self._start.position,
568 self._end.position + self._end.extension):
569 yield i
570
575
576 start = property(fget= lambda self : self._start,
577 doc="Start location (possibly a fuzzy position, read only).")
578
579 end = property(fget= lambda self : self._end,
580 doc="End location (possibly a fuzzy position, read only).")
581
582 nofuzzy_start = property(
583 fget=lambda self: self._start.position,
584 doc="""Start position (integer, approximated if fuzzy, read only).
585
586 To get non-fuzzy attributes (ie. the position only) ask for
587 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return
588 the largest range of the fuzzy position. So something like:
589 (10.20)..(30.40) should return 10 for start, and 40 for end.
590 """)
591
592 nofuzzy_end = property(
593 fget=lambda self: self._end.position + self._end.extension,
594 doc="""End position (integer, approximated if fuzzy, read only).
595
596 To get non-fuzzy attributes (ie. the position only) ask for
597 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return
598 the largest range of the fuzzy position. So something like:
599 (10.20)..(30.40) should return 10 for start, and 40 for end.
600 """)
601
603 """Abstract base class representing a position.
604 """
605 - def __init__(self, position, extension):
606 self.position = position
607 assert extension >= 0, extension
608 self.extension = extension
609
611 """String representation of the location for debugging."""
612 return "%s(%s,%s)" % (self.__class__.__name__, \
613 repr(self.position), repr(self.extension))
614
616 """Simple position based hash."""
617
618 return hash(self.position)
619
621 """A simple equality for positions.
622
623 This is very simple-minded and just compares the position attribute
624 of the features; extensions are not considered at all. This could
625 potentially be expanded to try to take advantage of extensions.
626 """
627 assert isinstance(other, AbstractPosition), \
628 "We can only do comparisons between Biopython Position objects."
629 return self.position == other.position
630
632 """A simple non-equality for positions.
633
634 This is very simple-minded and just compares the position attribute
635 of the features; extensions are not considered at all. This could
636 potentially be expanded to try to take advantage of extensions.
637 """
638 assert isinstance(other, AbstractPosition), \
639 "We can only do comparisons between Biopython Position objects."
640 return self.position != other.position
641
643 """A simple less than or equal for positions.
644
645 This is very simple-minded and just compares the position attribute
646 of the features; extensions are not considered at all. This could
647 potentially be expanded to try to take advantage of extensions.
648 """
649 assert isinstance(other, AbstractPosition), \
650 "We can only do comparisons between Biopython Position objects."
651 return self.position <= other.position
652
654 """A simple less than or equal for positions.
655
656 This is very simple-minded and just compares the position attribute
657 of the features; extensions are not considered at all. This could
658 potentially be expanded to try to take advantage of extensions.
659 """
660 assert isinstance(other, AbstractPosition), \
661 "We can only do comparisons between Biopython Position objects."
662 return self.position < other.position
663
665 """A simple less than or equal for positions.
666
667 This is very simple-minded and just compares the position attribute
668 of the features; extensions are not considered at all. This could
669 potentially be expanded to try to take advantage of extensions.
670 """
671 assert isinstance(other, AbstractPosition), \
672 "We can only do comparisons between Biopython Position objects."
673 return self.position >= other.position
674
676 """A simple less than or equal for positions.
677
678 This is very simple-minded and just compares the position attribute
679 of the features; extensions are not considered at all. This could
680 potentially be expanded to try to take advantage of extensions.
681 """
682 assert isinstance(other, AbstractPosition), \
683 "We can only do comparisons between Biopython Position objects."
684 return self.position > other.position
685
687
688 return self.__class__(self.position + offset, self.extension)
689
691 """Specify the specific position of a boundary.
692
693 o position - The position of the boundary.
694 o extension - An optional argument which must be zero since we don't
695 have an extension. The argument is provided so that the same number of
696 arguments can be passed to all position types.
697
698 In this case, there is no fuzziness associated with the position.
699 """
700 - def __init__(self, position, extension = 0):
701 if extension != 0:
702 raise AttributeError("Non-zero extension %s for exact position."
703 % extension)
704 AbstractPosition.__init__(self, position, 0)
705
707 """String representation of the ExactPosition location for debugging."""
708 assert self.extension == 0
709 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
710
712 return str(self.position)
713
715 """Specify a specific position which is uncertain.
716
717 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the
718 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL.
719 """
720 pass
721
723 """Specify a specific position which is unknown (has no position).
724
725 This is used in UniProt, e.g. ? or in the XML as unknown.
726 """
728 self.position = None
729 self.extension = None
730 pass
731
733 """String representation of the UnknownPosition location for debugging."""
734 return "%s()" % self.__class__.__name__
735
737 """Specify the position of a boundary within some coordinates.
738
739 Arguments:
740 o position - The start position of the boundary
741 o extension - The range to which the boundary can extend.
742
743 This allows dealing with a position like ((1.4)..100). This
744 indicates that the start of the sequence is somewhere between 1
745 and 4. To represent that with this class we would set position as
746 1 and extension as 3.
747 """
748 - def __init__(self, position, extension = 0):
750
752 return "(%s.%s)" % (self.position, self.position + self.extension)
753
755 """Specify the position of a boundary between two coordinates (OBSOLETE?).
756
757 Arguments:
758 o position - The start position of the boundary.
759 o extension - The range to the other position of a boundary.
760
761 This specifies a coordinate which is found between the two positions.
762 So this allows us to deal with a position like ((1^2)..100). To
763 represent that with this class we set position as 1 and the
764 extension as 1.
765 """
766 - def __init__(self, position, extension = 0):
768
770 return "(%s^%s)" % (self.position, self.position + self.extension)
771
773 """Specify a position where the actual location occurs before it.
774
775 Arguments:
776 o position - The upper boundary of where the location can occur.
777 o extension - An optional argument which must be zero since we don't
778 have an extension. The argument is provided so that the same number of
779 arguments can be passed to all position types.
780
781 This is used to specify positions like (<10..100) where the location
782 occurs somewhere before position 10.
783 """
784 - def __init__(self, position, extension = 0):
785 if extension != 0:
786 raise AttributeError("Non-zero extension %s for exact position."
787 % extension)
788 AbstractPosition.__init__(self, position, 0)
789
791 """A string representation of the location for debugging."""
792 assert self.extension == 0
793 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
794
796 return "<%s" % self.position
797
799 """Specify a position where the actual location is found after it.
800
801 Arguments:
802 o position - The lower boundary of where the location can occur.
803 o extension - An optional argument which must be zero since we don't
804 have an extension. The argument is provided so that the same number of
805 arguments can be passed to all position types.
806
807 This is used to specify positions like (>10..100) where the location
808 occurs somewhere after position 10.
809 """
810 - def __init__(self, position, extension = 0):
811 if extension != 0:
812 raise AttributeError("Non-zero extension %s for exact position."
813 % extension)
814 AbstractPosition.__init__(self, position, 0)
815
817 """A string representation of the location for debugging."""
818 assert self.extension == 0
819 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
820
822 return ">%s" % self.position
823
825 """Specify a position where the location can be multiple positions.
826
827 This models the GenBank 'one-of(1888,1901)' function, and tries
828 to make this fit within the Biopython Position models. In our case
829 the position of the "one-of" is set as the lowest choice, and the
830 extension is the range to the highest choice.
831 """
833 """Initialize with a set of posssible positions.
834
835 position_list is a list of AbstractPosition derived objects,
836 specifying possible locations.
837 """
838
839 self.position_choices = position_list
840
841 smallest = None
842 largest = None
843 for position_choice in self.position_choices:
844 assert isinstance(position_choice, AbstractPosition), \
845 "Expected position objects, got %r" % position_choice
846 if smallest is None and largest is None:
847 smallest = position_choice.position
848 largest = position_choice.position
849 elif position_choice.position > largest:
850 largest = position_choice.position
851 elif position_choice.position < smallest:
852 smallest = position_choice.position
853
854 AbstractPosition.__init__(self, smallest, largest - smallest)
855
857 """String representation of the OneOfPosition location for debugging."""
858 return "%s(%s)" % (self.__class__.__name__, \
859 repr(self.position_choices))
860
862 out = "one-of("
863 for position in self.position_choices:
864 out += "%s," % position
865
866 out = out[:-1] + ")"
867 return out
868
870 return self.__class__([position_choice._shift(offset) \
871 for position_choice in self.position_choices])
872
874 """Simple class to hold information about a gap between positions.
875 """
877 """Intialize with a position object containing the gap information.
878 """
879 self.gap_size = gap_size
880
882 """A string representation of the position gap for debugging."""
883 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
884
886 out = "gap(%s)" % self.gap_size
887 return out
888
890 """Run the Bio.SeqFeature module's doctests (PRIVATE).
891
892 This will try and locate the unit tests directory, and run the doctests
893 from there in order that the relative paths used in the examples work.
894 """
895 import doctest
896 import os
897 if os.path.isdir(os.path.join("..","Tests")):
898 print "Runing doctests..."
899 cur_dir = os.path.abspath(os.curdir)
900 os.chdir(os.path.join("..","Tests"))
901 doctest.testmod()
902 os.chdir(cur_dir)
903 del cur_dir
904 print "Done"
905 elif os.path.isdir(os.path.join("Tests")) :
906 print "Runing doctests..."
907 cur_dir = os.path.abspath(os.curdir)
908 os.chdir(os.path.join("Tests"))
909 doctest.testmod()
910 os.chdir(cur_dir)
911 del cur_dir
912 print "Done"
913
914
915 if __name__ == "__main__":
916 _test()
917