1
2
3
4
5
6
7
8
9 """Connect with a BioSQL database and load Biopython like objects from it.
10
11 This provides interfaces for loading biological objects from a relational
12 database, and is compatible with the BioSQL standards.
13 """
14 import BioSeq
15 import Loader
16 import DBUtils
17
19 """Main interface for loading a existing BioSQL-style database.
20
21 This function is the easiest way to retrieve a connection to a
22 database, doing something like:
23
24 >>> from BioSeq import BioSeqDatabase
25 >>> server = BioSeqDatabase.open_database(user = "root", db="minidb")
26
27 the various options are:
28 driver -> The name of the database driver to use for connecting. The
29 driver should implement the python DB API. By default, the MySQLdb
30 driver is used.
31 user -> the username to connect to the database with.
32 password, passwd -> the password to connect with
33 host -> the hostname of the database
34 database or db -> the name of the database
35 """
36 module = __import__(driver)
37 connect = getattr(module, "connect")
38
39
40 kw = kwargs.copy()
41 if driver == "MySQLdb":
42 if "database" in kw:
43 kw["db"] = kw["database"]
44 del kw["database"]
45 if "password" in kw:
46 kw["passwd"] = kw["password"]
47 del kw["password"]
48 else:
49
50 if "db" in kw:
51 kw["database"] = kw["db"]
52 del kw["db"]
53 if "passwd" in kw:
54 kw["password"] = kw["passwd"]
55 del kw["passwd"]
56 if driver in ["psycopg", "psycopg2"] and not kw.get("database"):
57 kw["database"] = "template1"
58 try:
59 conn = connect(**kw)
60 except module.InterfaceError:
61
62
63 if "database" in kw:
64 kw["dbname"] = kw["database"]
65 del kw["database"]
66 elif "db" in kw:
67 kw["dbname"] = kw["db"]
68 del kw["db"]
69
70 dsn = ' '.join(['='.join(i) for i in kw.items()])
71 conn = connect(dsn)
72
73 return DBServer(conn, module)
74
76 - def __init__(self, conn, module, module_name=None):
77 self.module = module
78 if module_name is None:
79 module_name = module.__name__
80 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name))
81 self.module_name = module_name
82
84 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
93
100
101 - def new_database(self, db_name, authority=None, description=None):
102 """Add a new database to the server and return it.
103 """
104
105 sql = r"INSERT INTO biodatabase (name, authority, description)" \
106 r" VALUES (%s, %s, %s)"
107 self.adaptor.execute(sql, (db_name,authority, description))
108 return BioSeqDatabase(self.adaptor, db_name)
109
111 """Load a database schema into the given database.
112
113 This is used to create tables, etc when a database is first created.
114 sql_file should specify the complete path to a file containing
115 SQL entries for building the tables.
116 """
117
118
119
120
121
122 sql_handle = open(sql_file, "rb")
123 sql = r""
124 for line in sql_handle.xreadlines():
125 if line.find("--") == 0:
126 pass
127 elif line.find("#") == 0:
128 pass
129 elif line.strip():
130 sql += line.strip()
131 sql += ' '
132
133
134
135
136
137 if self.module_name in ["psycopg", "psycopg2"]:
138 self.adaptor.cursor.execute(sql)
139
140
141 elif self.module_name in ["MySQLdb"]:
142 sql_parts = sql.split(";")
143 for sql_line in sql_parts[:-1]:
144 self.adaptor.cursor.execute(sql_line)
145 else:
146 raise ValueError("Module %s not supported by the loader." %
147 (self.module_name))
148
151 self.conn = conn
152 self.cursor = conn.cursor()
153 self.dbutils = dbutils
154
157
159 """Set the autocommit mode. True values enable; False value disable."""
160 return self.dbutils.autocommit(self.conn, y)
161
163 """Commits the current transaction."""
164 return self.conn.commit()
165
167 """Rolls backs the current transaction."""
168 return self.conn.rollback()
169
171 """Close the connection. No further activity possible."""
172 return self.conn.close()
173
175 self.cursor.execute(
176 r"select biodatabase_id from biodatabase where name = %s",
177 (dbname,))
178 rv = self.cursor.fetchall()
179 if not rv:
180 raise KeyError("Cannot find biodatabase with name %r" % dbname)
181
182
183 return rv[0][0]
184
186 sql = r"select bioentry_id from bioentry where name = %s"
187 fields = [name]
188 if dbid:
189 sql += " and biodatabase_id = %s"
190 fields.append(dbid)
191 self.cursor.execute(sql, fields)
192 rv = self.cursor.fetchall()
193 if not rv:
194 raise IndexError("Cannot find display id %r" % name)
195 if len(rv) > 1:
196 raise IndexError("More than one entry with display id %r" % name)
197 return rv[0][0]
198
200 sql = r"select bioentry_id from bioentry where accession = %s"
201 fields = [name]
202 if dbid:
203 sql += " and biodatabase_id = %s"
204 fields.append(dbid)
205 self.cursor.execute(sql, fields)
206 rv = self.cursor.fetchall()
207 if not rv:
208 raise IndexError("Cannot find accession %r" % name)
209 if len(rv) > 1:
210 raise IndexError("More than one entry with accession %r" % name)
211 return rv[0][0]
212
214 sql = r"select bioentry_id from bioentry where accession = %s"
215 fields = [name]
216 if dbid:
217 sql += " and biodatabase_id = %s"
218 fields.append(dbid)
219 return self.execute_and_fetch_col0(sql, fields)
220
222 acc_version = name.split(".")
223 if len(acc_version) > 2:
224 raise IndexError("Bad version %r" % name)
225 acc = acc_version[0]
226 if len(acc_version) == 2:
227 version = acc_version[1]
228 else:
229 version = "0"
230 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
231 r" AND version = %s"
232 fields = [acc, version]
233 if dbid:
234 sql += " and biodatabase_id = %s"
235 fields.append(dbid)
236 self.cursor.execute(sql, fields)
237 rv = self.cursor.fetchall()
238 if not rv:
239 raise IndexError("Cannot find version %r" % name)
240 if len(rv) > 1:
241 raise IndexError("More than one entry with version %r" % name)
242 return rv[0][0]
243
245
246 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
247 fields = [identifier]
248 if dbid:
249 sql += " and biodatabase_id = %s"
250 fields.append(dbid)
251 self.cursor.execute(sql, fields)
252 rv = self.cursor.fetchall()
253 if not rv:
254 raise IndexError("Cannot find display id %r" % identifier)
255 return rv[0][0]
256
260
261 - def list_bioentry_ids(self, dbid):
262 return self.execute_and_fetch_col0(
263 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
264 (dbid,))
265
267 return self.execute_and_fetch_col0(
268 "SELECT name FROM bioentry WHERE biodatabase_id = %s",
269 (dbid,))
270
272 """Return ids given a SQL statement to select for them.
273
274 This assumes that the given SQL does a SELECT statement that
275 returns a list of items. This parses them out of the 2D list
276 they come as and just returns them in a list.
277 """
278 return self.cursor.execute_and_fetch_col0(sql, args)
279
281 self.cursor.execute(sql, args or ())
282 rv = self.cursor.fetchall()
283 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
284 return rv[0]
285
286 - def execute(self, sql, args=None):
287 """Just execute an sql command.
288 """
289 self.cursor.execute(sql, args or ())
290
297
299 self.cursor.execute(sql, args or ())
300 return [field[0] for field in self.cursor.fetchall()]
301
305
306 _allowed_lookups = {
307
308 'primary_id': "fetch_seqid_by_identifier",
309 'gi': "fetch_seqid_by_identifier",
310 'display_id': "fetch_seqid_by_display_id",
311 'name': "fetch_seqid_by_display_id",
312 'accession': "fetch_seqid_by_accession",
313 'version': "fetch_seqid_by_version",
314 }
315
322 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
323
332
341
350
352 """Gets a *list* of Bio::Seq objects by accession number
353
354 Example: seqs = db.get_Seq_by_acc('X77802')
355
356 """
357 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name)
358 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
359
361
362
363
364
365 raise NotImplementedError("waiting for Python 2.2's iter")
366
368 """Array of all the primary_ids of the sequences in the database.
369
370 These maybe ids (display style) or accession numbers or
371 something else completely different - they *are not*
372 meaningful outside of this database implementation.
373 """
374 return self.adaptor.list_bioentry_ids(self.dbid)
375
384
386 if len(kwargs) != 1:
387 raise TypeError("single key/value parameter expected")
388 k, v = kwargs.items()[0]
389 if k not in _allowed_lookups:
390 raise TypeError("lookup() expects one of %s, not %r" % \
391 (repr(_allowed_lookups.keys())[1:-1], repr(k)))
392 lookup_name = _allowed_lookups[k]
393 lookup_func = getattr(self.adaptor, lookup_name)
394 seqid = lookup_func(self.dbid, v)
395 return BioSeq.DBSeqRecord(self.adaptor, seqid)
396
398 """Gets a Bio::Seq object by the primary (internal) id.
399
400 The primary id in these cases has to come from
401 $db->get_all_primary_ids. There is no other way to get (or
402 guess) the primary_ids in a database.
403 """
404 return self[seqid]
405
406 - def load(self, record_iterator, fetch_NCBI_taxonomy=False):
407 """Load a set of SeqRecords into the BioSQL database.
408
409 record_iterator is either a list of SeqRecord objects, or an
410 Iterator object that returns SeqRecord objects (such as the
411 output from the Bio.SeqIO.parse() function), which will be
412 used to populate the database.
413
414 fetch_NCBI_taxonomy is boolean flag allowing or preventing
415 connection to the taxonomic database on the NCBI server
416 (via Bio.Entrez) to fetch a detailed taxonomy for each
417 SeqRecord.
418
419 Example:
420 from Bio import SeqIO
421 count = db.load(SeqIO.parse(open(filename), format))
422
423 Returns the number of records loaded.
424 """
425 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \
426 fetch_NCBI_taxonomy)
427 num_records = 0
428 for cur_record in record_iterator :
429 num_records += 1
430 db_loader.load_seqrecord(cur_record)
431 return num_records
432