1 """This module contains code to access EZRetrieve.
2
3 Functions:
4 retrieve_single Retrieve a single sequence from EZRetrieve.
5 parse_single Parse the results from EZRetrieve into FASTA format.
6
7 """
8
9 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None,
10 parse_results=1):
11 import urllib
12
13 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp"
14 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2}
15 organism = organism or "Hs"
16 assert organism in org2value
17
18 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3}
19 retrieve_by = retrieve_by or "GenBank"
20 retrieve_by = retrieve_by.lower()
21 assert retrieve_by in acctype2value
22
23 params = {
24 "input" : str(id),
25 "from" : str(from_),
26 "to" : str(to),
27 "org" : org2value[organism],
28 "AccType" : acctype2value[retrieve_by],
29 }
30 options = urllib.urlencode(params)
31 handle = urllib.urlopen(CGI, options)
32 if parse_results:
33 results = parse_single(handle)
34 else:
35 results = handle.read()
36 return results
37
39 """Return a FASTA-formatted string for the sequence. May raise an
40 AssertionError if there was a problem retrieving the sequence.
41
42 """
43 import re
44 results = handle.read()
45 lresults = results.lower()
46
47 i = results.find("Error: ")
48 if i >= 0:
49 j = lresults.index("<br>", i)
50 errmsg = results[i:j].strip()
51 raise AssertionError(errmsg)
52
53 i = lresults.find("<b>>")
54 assert i >= 0, "Couldn't find sequence."
55 j = lresults.find("<br><br>", i)
56 seqdata = results[i:j]
57 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL)
58 seqdata = reobj.sub("", seqdata)
59 seqdata = re.sub(r"\s+", r"\n", seqdata)
60 seqdata = seqdata.strip() + "\n"
61 return seqdata
62