/[meta-cvs]/meta-cvs/F-26D2A2DCE1CF180F943606C59DC63A51.lisp
ViewVC logotype

Contents of /meta-cvs/F-26D2A2DCE1CF180F943606C59DC63A51.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations)
Fri Jun 28 21:52:12 2002 UTC (11 years, 9 months ago) by kaz
Branch: MAIN
Changes since 1.4: +1 -1 lines
Change similarity threshold to 30%.
1 ;;; This source file is part of the Meta-CVS program,
2 ;;; which is distributed under the GNU license.
3 ;;; Copyright 2002 Kaz Kylheku
4
5 (require "dirwalk")
6 (require "system")
7 (require "split")
8 (require "seqfuncs")
9 (require "mapping")
10 (require "types")
11 (require "chatter")
12 (require "options")
13 (require "checkout")
14 (require "remove")
15 (require "add")
16 (provide "grab")
17
18 (defun read-word-hash (&optional (byte-stream t))
19 (let ((word-hash (make-hash-table :test #'equalp))
20 token
21 (state :junk))
22 (labels ((new-token ()
23 (if token
24 (setf (fill-pointer token) 0)
25 (setf token (make-array '(8)
26 :element-type 'character
27 :adjustable t
28 :fill-pointer 0))))
29 (save-token ()
30 (unless (gethash token word-hash)
31 (setf (gethash token word-hash) token)
32 (setf token nil))))
33 (do ((byte (read-byte byte-stream nil)
34 (read-byte byte-stream nil)))
35 ((null byte) (if (eq state :word) (save-token)) word-hash)
36 (let ((ch (int-char byte)))
37 (ecase state
38 ((:junk)
39 (when (or (alpha-char-p ch) (digit-char-p ch)
40 (char= ch #\_))
41 (new-token)
42 (vector-push-extend ch token)
43 (setf state :word)))
44 ((:word)
45 (cond
46 ((or (alpha-char-p ch) (digit-char-p ch)
47 (char= ch #\_) (char= ch #\-))
48 (vector-push-extend ch token))
49 (t (setf state :junk)
50 (save-token))))))))))
51
52 (defun word-hash-file (name)
53 (with-open-file (s (parse-posix-namestring name)
54 :direction :input
55 :element-type 'unsigned-byte)
56 (read-word-hash s)))
57
58 (defun correlate-word-hashes (hash-1 hash-2)
59 (let ((hc-1 (hash-table-count hash-1))
60 (hc-2 (hash-table-count hash-2)))
61 (when (> hc-1 hc-2)
62 (psetf hash-1 hash-2 hash-2 hash-1
63 hc-1 hc-2 hc-2 hc-1))
64 (let ((common-count 0))
65 (maphash #'(lambda (key element)
66 (declare (ignore key))
67 (when (gethash element hash-2)
68 (incf common-count)))
69 hash-1)
70 (let ((total-count (- (+ hc-1 hc-2) common-count)))
71 (if (zerop total-count)
72 0
73 (/ common-count total-count))))))
74
75
76 (defun correlate-paths (path-1 path-2)
77 (let* ((split-1 (split-fields path-1 *path-sep*))
78 (split-2 (split-fields path-2 *path-sep*))
79 (longer (max (length split-1) (length split-2)))
80 (lcs-len (length (longest-common-subsequence split-1 split-2
81 :test #'string=))))
82 (case (- longer lcs-len)
83 ((0 1) 1)
84 ((2) 95/100)
85 ((3) 90/100)
86 ((4) 85/100)
87 (otherwise 80/100))))
88
89 (defun added-removed (old-paths new-paths
90 &key (key-old #'values) (key-new #'values))
91 (let (added-files removed-files stable-files)
92 (let ((old-iter old-paths)
93 (new-iter new-paths))
94 (symbol-macrolet ((old-key (funcall key-old (first old-iter)))
95 (new-key (funcall key-new (first new-iter))))
96 (flet ((inc-old ()
97 (push (first old-iter) removed-files)
98 (setf old-iter (rest old-iter)))
99 (inc-new ()
100 (push (first new-iter) added-files)
101 (setf new-iter (rest new-iter))))
102 (loop
103 (cond
104 ((and (null old-iter) (null new-iter))
105 (return))
106 ((null old-iter)
107 (inc-new))
108 ((null new-iter)
109 (inc-old))
110 ((string< old-key new-key)
111 (inc-old))
112 ((string= new-key old-key)
113 (push (first old-iter) stable-files)
114 (setf old-iter (rest old-iter))
115 (setf new-iter (rest new-iter)))
116 (t (inc-new)))))))
117 (values added-files removed-files stable-files)))
118
119 (defun determine-common-words (common-hash added-or-removed-files)
120 (dolist (file added-or-removed-files common-hash)
121 (maphash #'(lambda (key value)
122 (declare (ignore value))
123 (let ((existing (gethash key common-hash)))
124 (if existing
125 (setf (gethash key common-hash)
126 (1+ existing))
127 (setf (gethash key common-hash)
128 1))))
129 (second file))))
130
131 (defun eliminate-common-words (common-hash files threshold)
132 (dolist (file files)
133 (maphash #'(lambda (key value)
134 (declare (ignore value))
135 (let ((count (gethash key common-hash)))
136 (if (and count (>= count threshold))
137 (remhash key (second file)))))
138 (second file))))
139
140 (defun move-candidates (added-files removed-files)
141 (let (pairs candidates taken-added taken-removed)
142 (dolist (added added-files)
143 (dolist (removed removed-files)
144 (let ((correlation (correlate-word-hashes (second added)
145 (second removed))))
146 (when (>= correlation 30/100)
147 (push (list added removed
148 (* correlation (correlate-paths (first added)
149 (first removed))))
150 pairs)))))
151 (setf pairs (sort pairs #'> :key #'third))
152 (dolist (pair pairs)
153 (unless (or (member (first pair) taken-added :test #'eq)
154 (member (second pair) taken-removed :test #'eq))
155 (push (first pair) taken-added)
156 (push (second pair) taken-removed)
157 (push pair candidates)))
158 (values candidates
159 (set-difference added-files taken-added :test #'eq)
160 (set-difference removed-files taken-removed :test #'eq))))
161
162 (defun mcvs-grab (global-options command-options module branch)
163 (declare (ignore command-options))
164 (mcvs-checkout module global-options
165 `(("d" ,*this-dir*) ("r" ,branch))
166 :no-generate t)
167 (in-sandbox-root-dir
168 (let ((old-paths (sort (mapping-read *mcvs-map*)
169 #'string< :key #'second))
170 new-paths added-files removed-files stable-files)
171 (chatter-info "Scanning directory structure.~%")
172
173 (for-each-file-info (fi *this-dir*)
174 (let ((path (canonicalize-path (file-name fi))))
175 (cond
176 ((regular-p fi)
177 (push path new-paths))
178 ((directory-p fi)
179 (when (path-equal path *mcvs-dir*)
180 (skip))))))
181
182 (setf new-paths (sort new-paths #'string<))
183
184 (multiple-value-setq (added-files removed-files stable-files)
185 (added-removed old-paths new-paths
186 :key-old #'second))
187
188 (chatter-terse "Analyzing ~a added files.~%" (length added-files))
189 (setf added-files (mapcar #'(lambda (name)
190 (list name (word-hash-file name)))
191 added-files))
192
193 (chatter-terse "Analyzing ~a removed files.~%" (length removed-files))
194 (setf removed-files (mapcar #'(lambda (map-entry)
195 (destructuring-bind (f-file path)
196 map-entry
197 (list path
198 (word-hash-file f-file)
199 f-file)))
200 removed-files))
201 (let ((common-word-hash (make-hash-table :test #'equalp)))
202 (determine-common-words common-word-hash added-files)
203 (determine-common-words common-word-hash removed-files)
204 (let ((threshold (min 5 (* 1/5 (+ (length added-files)
205 (length removed-files))))))
206 (eliminate-common-words common-word-hash added-files threshold)
207 (eliminate-common-words common-word-hash removed-files threshold)))
208 (chatter-terse "Determining move candidates.~%")
209 (multiple-value-bind (moved-files added-files removed-files)
210 (move-candidates added-files removed-files)
211 (let ((mapping (mapping-read *mcvs-map*))
212 (f-hash (make-hash-table :test #'equal)))
213 (when moved-files
214 (dolist (pair moved-files)
215 (destructuring-bind ((target-name hash-2)
216 (source-name hash-1 f-file) confidence)
217 pair
218 (declare (ignore hash-1 hash-2))
219 (chatter-terse "moving ~a -> ~a (confidence ~a%)~%"
220 source-name target-name (round (* confidence 100)))
221 (setf (gethash f-file f-hash) target-name)))
222
223 (setf mapping
224 (mapcar #'(lambda (entry)
225 (let ((replacement (gethash (first entry)
226 f-hash)))
227 (if replacement
228 (list (first entry) replacement)
229 entry)))
230 mapping))
231 (mapping-write mapping *mcvs-map*))
232 (mapping-write mapping *mcvs-map-local*)
233 (dolist (entry mapping)
234 (when (gethash (first entry) f-hash)
235 (unlink (first entry))
236 (link (second entry) (first entry))))
237 (dolist (entry stable-files)
238 (unlink (first entry))
239 (link (second entry) (first entry))))
240 (when removed-files
241 (mcvs-remove nil (mapcar #'first removed-files)))
242 (when added-files
243 (mcvs-add nil global-options nil (mapcar #'first added-files)))))))
244
245 (defun mcvs-grab-wrapper (global-options command-options args)
246 (when (/= (length args) 2)
247 (error "mcvs-grab: specify module name and branch."))
248 (destructuring-bind (module branch) args
249 (mcvs-grab global-options command-options module branch)))

  ViewVC Help
Powered by ViewVC 1.1.5