/[meta-cvs]/meta-cvs/F-26D2A2DCE1CF180F943606C59DC63A51.lisp
ViewVC logotype

Contents of /meta-cvs/F-26D2A2DCE1CF180F943606C59DC63A51.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations)
Wed Jun 26 22:21:44 2002 UTC (11 years, 10 months ago) by kaz
Branch: MAIN
Changes since 1.1: +37 -14 lines
* grab.lisp (read-word-hash): Memory use optimizations: use smaller
initial size for tokens; re-use the same token object when the
hash already contains a duplicate.
(correlate): Renamed to correlate-word-hashes.
(correlate-paths): New function; computes a confidence-reducing
factor based on the differences between two paths.
(move-candidates): Use correlate-paths to lower the confidence
for distant moves. This could help sort out ambiguities when
projects contain very similar or duplicate files which are subject
to parallel moves.

* seqfuncs.lisp (lcs-list, lcs-vector,
longest-common-subsequence): New functions.
1 kaz 1.1 (require "dirwalk")
2     (require "system")
3 kaz 1.2 (require "split")
4     (require "seqfuncs")
5 kaz 1.1 (require "mapping")
6     (require "types")
7     (require "chatter")
8     (require "options")
9     (require "checkout")
10     (require "remove")
11     (require "add")
12     (provide "grab")
13    
14     (defun read-word-hash (&optional (byte-stream t))
15     (let ((word-hash (make-hash-table :test #'equal))
16     token
17     (state :junk))
18     (labels ((new-token ()
19 kaz 1.2 (if token
20     (setf (fill-pointer token) 0)
21     (setf token (make-array '(8)
22     :element-type 'character
23     :adjustable t
24     :fill-pointer 0))))
25     (save-token ()
26     (unless (gethash token word-hash)
27     (setf (gethash token word-hash) token)
28     (setf token nil))))
29 kaz 1.1 (do ((byte (read-byte byte-stream nil)
30     (read-byte byte-stream nil)))
31 kaz 1.2 ((null byte) (if (eq state :word) (save-token)) word-hash)
32 kaz 1.1 (let ((ch (int-char byte)))
33     (ecase state
34     ((:junk)
35     (when (or (alpha-char-p ch) (digit-char-p ch)
36     (char= ch #\_))
37 kaz 1.2 (new-token)
38 kaz 1.1 (vector-push-extend ch token)
39     (setf state :word)))
40     ((:word)
41     (cond
42     ((or (alpha-char-p ch) (digit-char-p ch)
43     (char= ch #\_) (char= ch #\-))
44     (vector-push-extend ch token))
45     (t (setf state :junk)
46 kaz 1.2 (save-token))))))))))
47 kaz 1.1
48     (defun word-hash-file (name)
49     (with-open-file (s (parse-posix-namestring name)
50     :direction :input
51     :element-type 'unsigned-byte)
52     (read-word-hash s)))
53    
54 kaz 1.2 (defun correlate-word-hashes (hash-1 hash-2)
55 kaz 1.1 (let ((hc-1 (hash-table-count hash-1))
56     (hc-2 (hash-table-count hash-2)))
57     (when (> hc-1 hc-2)
58     (psetf hash-1 hash-2 hash-2 hash-1
59     hc-1 hc-2 hc-2 hc-1))
60     (let ((common-count 0))
61     (maphash #'(lambda (key element)
62     (declare (ignore key))
63     (when (gethash element hash-2)
64     (incf common-count)))
65     hash-1)
66     (let ((total-count (- (+ hc-1 hc-2) common-count)))
67     (if (zerop total-count)
68     0
69     (/ common-count total-count))))))
70    
71 kaz 1.2
72     (defun correlate-paths (path-1 path-2)
73     (let* ((split-1 (split-fields path-1 *path-sep*))
74     (split-2 (split-fields path-2 *path-sep*))
75     (longer (max (length split-1) (length split-2)))
76     (lcs-len (length (longest-common-subsequence split-1 split-2
77     :test #'string=))))
78     (case (- longer lcs-len)
79     ((0 1) 1)
80     ((2) 95/100)
81     ((3) 90/100)
82     ((4) 85/100)
83     (otherwise 80/100))))
84    
85 kaz 1.1 (defun added-removed (old-paths new-paths
86     &key (key-old #'values) (key-new #'values))
87     (let (added-files removed-files stable-files)
88     (let ((old-iter old-paths)
89     (new-iter new-paths))
90     (symbol-macrolet ((old-key (funcall key-old (first old-iter)))
91     (new-key (funcall key-new (first new-iter))))
92     (flet ((inc-old ()
93     (push (first old-iter) removed-files)
94     (setf old-iter (rest old-iter)))
95     (inc-new ()
96     (push (first new-iter) added-files)
97     (setf new-iter (rest new-iter))))
98     (loop
99     (cond
100     ((and (null old-iter) (null new-iter))
101     (return))
102     ((null old-iter)
103     (inc-new))
104     ((null new-iter)
105     (inc-old))
106     ((string-lessp old-key new-key)
107     (inc-old))
108     ((string= new-key old-key)
109     (push (first old-iter) stable-files)
110     (setf old-iter (rest old-iter))
111     (setf new-iter (rest new-iter)))
112     (t (inc-new)))))))
113     (values added-files removed-files stable-files)))
114    
115     (defun move-candidates (added-files removed-files)
116     (let (pairs candidates taken-added taken-removed)
117     (dolist (added added-files)
118     (dolist (removed removed-files)
119 kaz 1.2 (let ((correlation (correlate-word-hashes (second added)
120     (second removed))))
121     (when (>= correlation 7/10)
122     (push (list added removed
123     (* correlation (correlate-paths (first added)
124     (first removed))))
125     pairs)))))
126 kaz 1.1 (setf pairs (sort pairs #'> :key #'third))
127     (dolist (pair pairs)
128     (unless (or (member (first pair) taken-added :test #'eq)
129     (member (second pair) taken-removed :test #'eq))
130     (push (first pair) taken-added)
131     (push (second pair) taken-removed)
132     (push pair candidates)))
133     (values candidates
134     (set-difference added-files taken-added :test #'eq)
135     (set-difference removed-files taken-removed :test #'eq))))
136    
137     (defun mcvs-grab (global-options command-options module branch)
138     (declare (ignore command-options))
139     (mcvs-checkout module global-options
140     `(("d" ,*this-dir*) ("r" ,branch))
141     :no-generate t)
142     (in-sandbox-root-dir
143     (let ((old-paths (sort (mapping-read *mcvs-map*)
144     #'string-lessp :key #'second))
145     new-paths added-files removed-files stable-files)
146     (chatter-info "Scanning directory structure.~%")
147    
148     (for-each-file-info (fi *this-dir*)
149     (let ((path (canonicalize-path (file-name fi))))
150     (cond
151     ((regular-p fi)
152     (push path new-paths))
153     ((directory-p fi)
154     (when (path-equal path *mcvs-dir*)
155     (skip))))))
156    
157     (setf new-paths (sort new-paths #'string-lessp))
158    
159     (multiple-value-setq (added-files removed-files stable-files)
160     (added-removed old-paths new-paths
161     :key-old #'second))
162    
163     (chatter-terse "Analyzing ~a added files.~%" (length added-files))
164     (setf added-files (mapcar #'(lambda (name)
165     (list name (word-hash-file name)))
166     added-files))
167    
168     (chatter-terse "Analyzing ~a removed files.~%" (length removed-files))
169     (setf removed-files (mapcar #'(lambda (map-entry)
170     (destructuring-bind (f-file path)
171     map-entry
172     (list path
173     (word-hash-file f-file)
174     f-file)))
175     removed-files))
176     (chatter-terse "Determining move candidates.~%")
177     (multiple-value-bind (moved-files added-files removed-files)
178     (move-candidates added-files removed-files)
179     (let ((mapping (mapping-read *mcvs-map*))
180     (f-hash (make-hash-table :test #'equal)))
181     (when moved-files
182     (dolist (pair moved-files)
183     (destructuring-bind ((target-name hash-2)
184     (source-name hash-1 f-file) confidence)
185     pair
186     (declare (ignore hash-1 hash-2))
187     (chatter-terse "moving ~a -> ~a (confidence ~a%)~%"
188     source-name target-name (round (* confidence 100)))
189     (setf (gethash f-file f-hash) target-name)))
190    
191     (setf mapping
192     (mapcar #'(lambda (entry)
193     (let ((replacement (gethash (first entry)
194     f-hash)))
195     (if replacement
196     (list (first entry) replacement)
197     entry)))
198     mapping))
199     (mapping-write mapping *mcvs-map*))
200     (mapping-write mapping *mcvs-map-local*)
201     (dolist (entry mapping)
202     (when (gethash (first entry) f-hash)
203     (unlink (first entry))
204     (link (second entry) (first entry))))
205     (dolist (entry stable-files)
206     (unlink (first entry))
207     (link (second entry) (first entry))))
208     (when removed-files
209     (mcvs-remove nil (mapcar #'first removed-files)))
210     (when added-files
211     (mcvs-add nil global-options nil (mapcar #'first added-files)))))))
212    
213     (defun mcvs-grab-wrapper (global-options command-options args)
214     (when (/= (length args) 2)
215     (error "mcvs-grab: specify module name and branch."))
216     (destructuring-bind (module branch) args
217     (mcvs-grab global-options command-options module branch)))

  ViewVC Help
Powered by ViewVC 1.1.5