/[clarity]/alignment-functions.lisp.lisp
ViewVC logotype

Contents of /alignment-functions.lisp.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations)
Wed Aug 16 20:18:39 2006 UTC (7 years, 8 months ago) by skleinberg
File size: 25686 byte(s)
Initial import
1 ;;; -*- Mode: Lisp -*-
2
3 #|CLARITY: Common Lisp Data Alignment Repository
4 Copyright (c) 2006 Samantha Kleinberg
5 All rights reserved.
6
7 This library is free software; you can redistribute it and/or modify it under the terms of the GNU
8 Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the
9 License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
12 the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
13 General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along with this library;
16 if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
18 contact: Samantha AT Bioinformatics DOT nyu DOT edu
19 715 Broadway, 10th floor
20 New York, NY 10003|#
21
22 (in-package "CLARITY")
23
24 (eval-when (:compile-toplevel :load-toplevel :execute)
25 (require "sql")
26 (require "odbc"))
27
28 #.(sql:enable-sql-reader-syntax)
29
30 (defparameter *gap* #\-)
31 (defparameter *true* 1)
32 (defparameter *false* 0)
33
34 (defstruct (alignment
35 (:constructor new-alignment))
36 (score)
37 (items)) ;;type is pair of alignment items
38
39 (defstruct (alignment-item
40 (:constructor new-alignment-item))
41 (dataset-id)
42 (chart-id)
43 (term-name)
44 (string))
45
46
47 (defgeneric align-charts (clh id_1 id_2)
48 (:documentation "Aligns two datasets, given their timecourse data ids.
49 arguments: clarity-handle timecourse-id-1 timecourse-id-2
50 usage: (clarity:align-charts clarity-handle timecourse-id-1 timecourse-id-2)
51 notes: Returns hash table of alignments hashed by score"
52 ))
53
54 (defgeneric score-pair (clh id_1 id_2)
55 (:documentation "Given two timecourse data ids, computes their alignment and
56 returns score computed from hash table
57 arguments: clarity-handle timecourse-data-id-1 timecourse-data-id-2
58 usage: (clarity:score-pair clarity-handle timecourse-data-id-1 timecourse-data-id-2)
59 notes: Returns single numerical value"
60 ))
61
62 (defgeneric align-to-consensus (clh timecourse-id consensus-id)
63 (:documentation "Aligns timecourse data to consensus data"
64 ))
65
66 (defgeneric string-alignment (string1 string2)
67 (:documentation "Given two strings, performs dynamic programming alignment and returns the alignment score
68 arguments: string1 string2
69 usage: (clarity:string-alignment ""UUDDUI"" ""IUDDDI"")"
70 ))
71
72 (defgeneric score (char1 char2)
73 (:documentation "Given two characters, this function returns the numerical value associated with alignign one to the other
74 arguments: character1 character2
75 usage (clarity:score U D)"
76 ))
77
78 (defgeneric get-root (clh)
79 (:documentation "Returns root of phylogenetic tree.
80 arguments: clarity-handle
81 usage: (clarity:get-root clarity-handle)
82 notes: returns id number of root node."
83 ))
84
85 ;;right now, choose charts based on data ids
86 (defmethod align-charts ((clh clarity-handle) id_1 id_2)
87 ;;get intersection of datasets
88 (let* ((alignment-hash (make-hash-table :test #'equal))
89 (intersection-list (sql:select [fir.term_name] [fir.timecourse_data_id] [fir.id] [fir.sequence]
90 [sec.timecourse_data_id] [sec.id] [sec.sequence]
91 :from '(#.(sql::make-db-identifier :val "gantt_chart FIR")
92 #.(sql::make-db-identifier :val "gantt_chart SEC"))
93 :where [and [= ["FIR" timecourse_data_id] id_1]
94 [= ["SEC" timecourse_data_id] id_2]
95 [= ["FIR" term_name] ["SEC" term_name]]]
96 :database (connection clh)
97 ))
98 (alignment-items (loop for (term time_id_1 first_id first_sequence
99 time_id_2 second_id second_sequence) in intersection-list
100 collecting (list (new-alignment-item :dataset-id time_id_1
101 :chart-id first_id
102 :term-name term
103 :string first_sequence)
104 (new-alignment-item :dataset-id time_id_2
105 :chart-id second_id
106 :term-name term
107 :string second_sequence))
108 )))
109
110 (loop for value in (loop for (left right) in alignment-items
111 collecting (new-alignment :score (string-alignment
112 (alignment-item-string left)
113 (alignment-item-string right))
114 :items (list left right))
115 )
116 do (push value (gethash (alignment-score value) alignment-hash))
117 finally (return alignment-hash))
118 ))
119
120
121 (defmethod score-pair ((clh clarity-handle) id_1 id_2)
122 (let ((hash (align-charts clh
123 (first (first (sql:select [timecourse_data_id]
124 :from [|tree|]
125 :where [= [id] id_1]
126 :database (connection clh))))
127 (first (first (sql:select [timecourse_data_id]
128 :from [|tree|]
129 :where [= [id] id_2]
130 :database (connection clh)))))))
131 (loop for key being the hash-keys of hash
132 with alignmentSum=0
133 summing (* key (length (gethash key hash))) into alignmentSum
134 finally (return alignmentSum)
135 ))
136 )
137
138
139 (defmethod align-to-consensus ((clh clarity-handle) timecourse-id consensus-id)
140 ;;get intersection of datasets
141 (let* ((alignment-hash (make-hash-table :test #'equal))
142 (intersection-list (sql:select [fir.term_name] [fir.timecourse_data_id] [fir.id] [fir.sequence]
143 [sec.tree_id] [sec.id] [sec.sequence]
144 :from '(#.(sql::make-db-identifier :val "gantt_chart FIR")
145 #.(sql::make-db-identifier :val "consensus SEC"))
146 :where [and [= ["FIR" timecourse_data_id] timecourse-id]
147 [= ["SEC" tree_id] consensus-id]
148 [= ["FIR" term_name] ["SEC" term_name]]]
149 :database (connection clh)
150 ))
151
152 (alignment-items (loop for (term time_id first_id first_sequence
153 tree_id second_id second_sequence) in intersection-list
154 collecting (list (new-alignment-item :dataset-id time_id
155 :chart-id first_id
156 :term-name term
157 :string first_sequence)
158 (new-alignment-item :dataset-id tree_id
159 :chart-id second_id
160 :term-name term
161 :string second_sequence))
162 )))
163
164 (loop for value in (loop for (left right) in alignment-items
165 collecting (new-alignment :score (string-alignment
166 (alignment-item-string left)
167 (alignment-item-string right))
168 :items (list left right))
169 )
170 do (push value (gethash (alignment-score value) alignment-hash))
171 finally (return alignment-hash))
172 ))
173
174 (defmethod score-pair-consensus ((clh clarity-handle) timecourse-id consensus-id)
175 (let ((hash (align-to-consensus clh
176 (first (first (sql:select [timecourse_data_id]
177 :from [|tree|]
178 :where [= [id] timecourse-id]
179 :database (connection clh))))
180 consensus-id)))
181
182
183 (loop for key being the hash-keys of hash
184 with alignmentSum=0
185 summing (* key (length (gethash key hash))) into alignmentSum
186 finally (return alignmentSum)
187 )
188 )
189 )
190
191
192 (defmethod score (char1 char2)
193 (declare (type character char1 char2))
194 (cond ((char= char1 char2) 3)
195 ;((char= char1 *gap*) -1)
196 ;((char= char2 *gap*) -1)
197 ((and (char= char1 #\U) (char= char2 #\D)) -2)
198 ((and (char= char1 #\D) (char= char2 #\U)) -2)
199 (t -1)
200 ))
201
202 (defmethod string-alignment (string1 string2)
203 (let* ((l1 (length string1))
204 (l2 (length string2))
205 (table (make-array (list (+ l1 1) (+ l2 1))
206 :initial-element 0)))
207
208 ;;initialize table
209 (loop for i from 1 to l1
210 do (setf (aref table i 0) (+ (aref table (- i 1) 0) (score (char string1 (- i 1)) *gap*)))
211 )
212 (loop for k from 1 to l2
213 do (setf (aref table 0 k) (+ (aref table 0 (- k 1)) (score *gap* (char string2 (- k 1))))))
214
215 ;;fill in rest of table and return score in lower right corner
216 (loop for k from 1 to l2
217 do (loop for i from 1 to l1
218 do (setf (aref table i k)
219 (max (+ (aref table (- i 1) k) (score (char string1 (- i 1)) *gap*))
220 (+ (aref table (- i 1) (- k 1)) (score (char string1 (- i 1))
221 (char string2 (- k 1))))
222 (+ (aref table i (- k 1)) (score *gap* (char string2 (- k 1)))))))
223 finally (return (aref table l1 l2 )))
224
225 ))
226
227
228
229
230 (defmethod get-root ((clh clarity-handle))
231 ;;returns tree id of root node
232 (first (sql:with-transaction
233 (sql:select [id]
234 :from [|tree|]
235 :where [= [is_root] 1]
236 :database (connection clh)
237 :flatp t))))
238
239
240
241 (defmethod tree-insert ((clh clarity-handle) timecourse_data_id)
242 (let ((root (get-root clh)))
243 (if root
244 ;there is a root
245 (let ((new-node-id))
246 ;;make node for new-node
247 (sql:with-transaction
248 (sql:insert-records :into [|tree|]
249 :attributes '([|timecourse_data_id|] [|node_left|] [|node_right|])
250 :values (list timecourse_data_id 0 0)
251 :database (connection clh)))
252 (setf new-node-id (lastid clh :table "tree"))
253 (setf *new-best-node* 0)
254 (setf *new-best-score* 0)
255
256 (multiple-value-bind (best-node best-score)
257 (tree-insert-recursive clh new-node-id root)
258 ;;if there is no best node, insert as sibling of root
259 (insert-node clh new-node-id
260 (cond ((= best-node 0) root)
261 (t best-node)))))
262
263 ;no root, data to insert becomes root. this is the base case
264 (sql:with-transaction
265 (sql:insert-records :into [|tree|]
266 :attributes '([|timecourse_data_id|]
267 [|is_root|]
268 [|node_left|]
269 [|node_right|])
270 :values (list timecourse_data_id *true* 0 0)
271 :database (connection clh)))
272 )
273 ))
274
275
276 ;;node referred to by node-id. if node has no children, it has timecourse data.
277 ;; if it's an internal node, it has simply a consensus sequence"
278
279 (defvar *new-best-node*)
280 (defvar *new-best-score*)
281
282 (defmethod tree-insert-recursive ((clh clarity-handle) new-node current-node)
283 #| (if (eq depth 0)
284 (score-pair clh new-node current-node)
285 )|#
286 (destructuring-bind (left right)
287 (first (sql:select [node_left] [node_right]
288 :from [|tree|]
289 :where [= [id] current-node]
290 :database (connection clh)))
291 (let ((score (cond ((and (= 0 left) (= 0 right)) ;;if leaf node, use reg-score-pair
292 (score-pair clh new-node current-node))
293 (t (score-pair-consensus clh new-node current-node))))
294 ; (new-best-node best-node)
295 ; (new-best-score best-score))
296 )
297
298 (when (> score *new-best-score*)
299 (setf *new-best-node* current-node)
300 (setf *new-best-score* score)
301 )
302
303 (if (> left 0)
304 (tree-insert-recursive clh new-node left))
305
306 (if (> right 0)
307 (tree-insert-recursive clh new-node right))
308
309 (values *new-best-node* *new-best-score*)
310 )))
311
312
313
314 (defmethod insert-node ((clh clarity-handle) new-node-id sibling-node-id)
315 (let ((new-parent-id)
316 (new-node-data-id (first (first (sql:select [timecourse_data_id]
317 :from [|tree|]
318 :where [= [id] new-node-id]
319 :database (connection clh)))))
320 (new-consensus))
321
322 (destructuring-bind (sib-left sib-right sib-data-id is-root)
323 (first (sql:select [node_left] [node_right][timecourse_data_id][is_root]
324 :from [|tree|]
325 :where [= [|id|] sibling-node-id]
326 :database (connection clh)
327 ))
328
329 (when is-root
330 ;;if sibling was root, it no longer is, then parent becomes root
331 (sql:update-records [|tree|]
332 :where [= [|id|] sibling-node-id]
333 :attributes '([|is_root|])
334 :values '(0)
335 :database (connection clh))
336 )
337
338 ;;make new-node for parent
339 (sql:with-transaction
340 (sql:insert-records :into [|tree|]
341 :attributes '([|timecourse_data_id|] [|node_left|] [|node_right|][is_root])
342 :values (list 0 sibling-node-id new-node-id is-root)
343 :database (connection clh)))
344
345 (setf new-parent-id (lastid clh :table "tree"))
346 ;;update parent's consensus seq with it's id
347 ;;actualyl consensus id IS node id!
348
349 ;;update sibling's original parent
350 ;;could have been a left child or a right child. try both.
351 ;;get id to update old parent's consensus
352 (let ((old-parent-id
353 (or (first (sql:with-transaction
354 (sql:select [|id|] :from [|tree|]
355 :where [and [= [node_left] sibling-node-id]
356 [not [= [id] new-parent-id]]]
357 :database (connection clh) :flatp t)))
358 (first (sql:with-transaction
359 (sql:select [|id|] :from [|tree|]
360 :where [and [= [node_right] sibling-node-id]
361 [not [= [id] new-parent-id]]]
362 :database (connection clh) :flatp t))))))
363 (sql:with-transaction
364 (sql:update-records [|tree|]
365 :where [and [= [node_left] sibling-node-id]
366 [= [id] old-parent-id]]
367 :attributes '([|node_left|])
368 :values (list new-parent-id)
369 :database (connection clh)))
370
371 (sql:with-transaction
372 (sql:update-records [|tree|]
373 :where [and [= [node_right] sibling-node-id]
374 [= [id] old-parent-id]]
375 :attributes '([|node_right|])
376 :values (list new-parent-id)
377 :database (connection clh)))
378
379
380
381 ;;build consensus sequence
382 ;;when buildng consensus: right is always real data, since new. left may or may not be consensus
383 (build-consensus clh new-parent-id sibling-node-id new-node-data-id)
384
385
386 (when old-parent-id
387 ;;after making consensus sequence, for new node, need to update parent of new node's consensus!
388 ;;delete old consensus from table
389 (sql:with-transaction
390 (sql:delete-records :from [|consensus|]
391 :where [= [tree_id] old-parent-id]
392 :database (connection clh)))
393
394 ;;build consensus to replace old
395 (build-internal-consensus clh old-parent-id))
396
397
398 ))))
399
400
401 (defvar *test*)
402 (defvar *test-list*)
403
404 (defmethod build-consensus ((clh clarity-handle) node-id data-left data-right)
405 ;;right is always timecourse, left may be either
406
407 (destructuring-bind (data-id)
408 (first (sql:select [timecourse_data_id]
409 :from [|tree|]
410 :where [= [id] data-left]
411 :database (connection clh)))
412 (let ((term-seq-list
413 (cond ((> data-id 0)
414 ;;if left is timecourse_data
415 (sql:select [lchild.term_name] [lchild.sequence]
416 :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD")
417 #.(sql::make-db-identifier :val "gantt_chart RCHILD"))
418 :where [and [= ["LCHILD" timecourse_data_id] data-id]
419 [= ["RCHILD" timecourse_data_id] data-right]
420 [= ["LCHILD" term_name] ["RCHILD" term_name]]
421 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
422 :database (connection clh)))
423 ; ((> consensus-id 0)
424 ;;left is consensus data
425 (t
426 (sql:select [lchild.term_name] [lchild.sequence]
427 :from '(#.(sql::make-db-identifier :val "consensus LCHILD")
428 #.(sql::make-db-identifier :val "gantt_chart RCHILD"))
429 :where [and [= ["LCHILD" tree_id] data-left]
430 [= ["RCHILD" timecourse_data_id] data-right]
431 [= ["LCHILD" term_name] ["RCHILD" term_name]]
432 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
433 :database (connection clh)))
434 #|(t
435 (print "error!!!"))|#
436 )))
437
438 (setf *test-list* term-seq-list)
439
440 (when term-seq-list
441 (loop for (term seq) in term-seq-list
442 do(store-consensus-sequence clh node-id term seq)
443 ))
444
445 )))
446
447 (defmethod build-internal-consensus ((clh clarity-handle) node-id)
448 ;;at least one node is consensus, other may be either
449 (destructuring-bind (data-left data-right)
450 (first (sql:select [|node_left|] [|node_right|]
451 :from [|tree|]
452 :where [= [|id|] node-id]
453 :database (connection clh)))
454
455 (let ((left-data-id
456 (first (sql:select [timecourse_data_id]
457 :from [|tree|]
458 :where [= [id] data-left]
459 :database (connection clh)
460 :flatp t)))
461 (right-data-id
462 (first (sql:select [timecourse_data_id]
463 :from [|tree|]
464 :where [= [id] data-right]
465 :database (connection clh)
466 :flatp t))))
467 (let ((term-seq-list
468 (cond ((and (> left-data-id 0)
469 (> right-data-id 0))
470 ;;if left is timecourse_data
471 (sql:select [lchild.term_name] [lchild.sequence]
472 :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD")
473 #.(sql::make-db-identifier :val "gantt_chart RCHILD"))
474 :where [and [= ["LCHILD" timecourse_data_id] left-data-id]
475 [= ["RCHILD" timecourse_data_id] right-data-id]
476 [= ["LCHILD" term_name] ["RCHILD" term_name]]
477 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
478 :database (connection clh)))
479 ((and (> left-data-id 0)
480 (= right-data-id 0))
481 ;;if left is timecourse_data
482 (sql:select [lchild.term_name] [lchild.sequence]
483 :from '(#.(sql::make-db-identifier :val "gantt_chart LCHILD")
484 #.(sql::make-db-identifier :val "consensus RCHILD"))
485 :where [and [= ["LCHILD" timecourse_data_id] left-data-id]
486 [= ["RCHILD" tree_id] data-right]
487 [= ["LCHILD" term_name] ["RCHILD" term_name]]
488 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
489 :database (connection clh)))
490
491 ((and (= left-data-id 0)
492 (= right-data-id 0))
493 ;;if left is timecourse_data
494 (sql:select [lchild.term_name] [lchild.sequence]
495 :from '(#.(sql::make-db-identifier :val "consensus LCHILD")
496 #.(sql::make-db-identifier :val "consensus RCHILD"))
497 :where [and [= ["LCHILD" tree_id] data-left]
498 [= ["RCHILD" tree_id] data-right]
499 [= ["LCHILD" term_name] ["RCHILD" term_name]]
500 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
501 :database (connection clh)))
502
503 (t
504 (sql:select [lchild.term_name] [lchild.sequence]
505 :from '(#.(sql::make-db-identifier :val "consensus LCHILD")
506 #.(sql::make-db-identifier :val "gantt_chart RCHILD"))
507 :where [and [= ["LCHILD" tree_id] data-left]
508 [= ["RCHILD" timecourse_data_id] right-data-id]
509 [= ["LCHILD" term_name] ["RCHILD" term_name]]
510 [= ["LCHILD" sequence] ["RCHILD" sequence]]]
511 :database (connection clh)))
512 )))
513
514 term-seq-list
515 (when term-seq-list
516 (loop for (term seq) in term-seq-list
517 do(store-consensus-sequence clh node-id term seq)
518 ))
519
520 ))))
521
522 (defmethod store-consensus-sequence ((clh clarity-handle) node-id term-name sequence)
523 (sql:with-transaction
524 (sql:insert-records :into [|consensus|]
525 :attributes '([|tree_id|] [|term_name|] [|sequence|])
526 :values (list node-id term-name sequence)
527 :database (connection clh)))
528 )
529
530 (defmethod get-consensus-terms ((clh clarity-handle) node-id)
531 ;;node-id is tree id
532 (sql:with-transaction
533 (sql:select [term_name]
534 :from [|consensus|]
535 :where [= [tree_id] node-id]
536 :database (connection clh)))
537 )
538
539 (defmethod get-data-terms ((clh clarity-handle) data-id)
540 (sql:with-transaction
541 (sql:select [term_name]
542 :from [|gantt_chart|]
543 :where [= [timecourse_data_id] data-id]
544 :database (connection clh)))
545
546 )
547
548 #.(sql:disable-sql-reader-syntax)
549
550
551 ;;; end of file -- alignment-functions.lisp --

  ViewVC Help
Powered by ViewVC 1.1.5