/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (show annotations)
Fri Mar 11 10:25:58 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.9: +42 -2 lines
recognize the <a> and </a> tags
1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2
3 ;;; (c) copyright 2005 by
4 ;;; Robert Strandh (strandh@labri.fr)
5
6 ;;; This library is free software; you can redistribute it and/or
7 ;;; modify it under the terms of the GNU Library General Public
8 ;;; License as published by the Free Software Foundation; either
9 ;;; version 2 of the License, or (at your option) any later version.
10 ;;;
11 ;;; This library is distributed in the hope that it will be useful,
12 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;;; Library General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU Library General Public
17 ;;; License along with this library; if not, write to the
18 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 ;;; Boston, MA 02111-1307 USA.
20
21 ;;; Syntax for analysing HTML
22
23 (in-package :climacs-html-syntax)
24
25 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 ;;;
27 ;;; this should really go in syntax.lisp
28
29 (defclass parse-tree ()
30 ((start-mark :initarg :start-mark :reader start-mark)
31 (size :initarg :size)))
32
33 (defgeneric start-offset (parse-tree))
34
35 (defmethod start-offset ((tree parse-tree))
36 (offset (start-mark tree)))
37
38 (defgeneric end-offset (parse-tree))
39
40 (defmethod end-offset ((tree parse-tree))
41 (with-slots (start-mark size) tree
42 (+ (offset start-mark) size)))
43
44 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45 ;;;
46 ;;; grammar classes
47
48 (defclass html-sym (parse-tree)
49 ((badness :initform 0 :initarg :badness :reader badness)
50 (message :initform "" :initarg :message :reader message)))
51
52 (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
53 (and (eq (class-of t1) (class-of t2))
54 (< (badness t1) (badness t2))))
55
56 (defclass html-nonterminal (html-sym) ())
57
58 (defclass words (html-nonterminal) ())
59
60 (defclass empty-words (words) ())
61
62 (defclass nonempty-words (words)
63 ((words :initarg :words)
64 (word :initarg :word)))
65
66 (defclass html-balanced (html-nonterminal)
67 ((start :initarg :start)
68 (end :initarg :end)))
69
70 (defclass html (html-balanced)
71 ((head :initarg :head)
72 (body :initarg :body)))
73
74 (defclass head (html-balanced)
75 ((title :initarg :title)))
76
77 (defclass html-words (html-balanced)
78 ((words :initarg :words)))
79
80 (defclass title (html-words) ())
81 (defclass body (html-words) ())
82 (defclass h1 (html-words) ())
83 (defclass h2 (html-words) ())
84 (defclass h3 (html-words) ())
85 (defclass a (html-words) ())
86 (defclass para (html-words) ())
87
88 (defclass html-token (html-sym)
89 ((ink) (face)))
90
91 (defclass html-tag (html-token) ())
92
93 (defclass <html> (html-tag) () (:default-initargs :size 6))
94 (defclass </html> (html-tag) ()(:default-initargs :size 7))
95 (defclass <head> (html-tag) () (:default-initargs :size 6))
96 (defclass </head> (html-tag) () (:default-initargs :size 7))
97 (defclass <title> (html-tag) () (:default-initargs :size 7))
98 (defclass </title> (html-tag) () (:default-initargs :size 8))
99 (defclass <body> (html-tag) () (:default-initargs :size 6))
100 (defclass </body> (html-tag) () (:default-initargs :size 7))
101 (defclass <h1> (html-tag) () (:default-initargs :size 4))
102 (defclass </h1> (html-tag) () (:default-initargs :size 5))
103 (defclass <h2> (html-tag) () (:default-initargs :size 4))
104 (defclass </h2> (html-tag) () (:default-initargs :size 5))
105 (defclass <h3> (html-tag) () (:default-initargs :size 4))
106 (defclass </h3> (html-tag) () (:default-initargs :size 5))
107 (defclass <p> (html-tag) () (:default-initargs :size 3))
108 (defclass </p> (html-tag) () (:default-initargs :size 4))
109 (defclass <ul> (html-tag) () (:default-initargs :size 4))
110 (defclass </ul> (html-tag) () (:default-initargs :size 5))
111 (defclass <li> (html-tag) () (:default-initargs :size 4))
112 (defclass </li> (html-tag) () (:default-initargs :size 5))
113 (defclass <a> (html-tag)
114 ((start :initarg :start)
115 (word :initarg :word)
116 (words :initarg :words)
117 (end :initarg :end)))
118 (defclass </a> (html-tag) () (:default-initargs :size 4))
119
120
121 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
122 ;;;
123 ;;; lexer
124
125 (defclass html-element (html-token)
126 ((state :initarg :state)))
127
128 (defclass start-element (html-element) ())
129 (defclass tag-start (html-element) ())
130 (defclass tag-end (html-element) ())
131 (defclass slash (html-element) ())
132 (defclass word (html-element) ())
133 (defclass delimiter (html-element) ())
134
135 (defun next-token (scan)
136 (let ((start-mark (clone-mark scan)))
137 (flet ((fo () (forward-object scan)))
138 (macrolet ((make-entry (type)
139 `(return-from next-token
140 (make-instance ,type :start-mark start-mark
141 :size (- (offset scan) (offset start-mark))))))
142 (loop with object = (object-after scan)
143 until (end-of-buffer-p scan)
144 do (case object
145 (#\< (fo) (make-entry 'tag-start))
146 (#\> (fo) (make-entry 'tag-end))
147 (#\/ (fo) (make-entry 'slash))
148 (t (cond ((alphanumericp object)
149 (loop until (end-of-buffer-p scan)
150 while (alphanumericp (object-after scan))
151 do (fo))
152 (make-entry 'word))
153 (t
154 (fo) (make-entry 'delimiter))))))))))
155
156 (define-syntax html-syntax ("HTML" (basic-syntax))
157 ((tokens :initform (make-instance 'standard-flexichain))
158 (guess-pos :initform 1)
159 (valid-parse :initform 1)
160 (parser)))
161
162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
163 ;;;
164 ;;; parser
165
166 (defun word-is (word string)
167 (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
168 string))
169
170 (defparameter *html-grammar*
171 (grammar
172 (<html> -> (tag-start
173 (word (and (= (end-offset tag-start) (start-offset word))
174 (word-is word "html")))
175 (tag-end (= (end-offset word) (start-offset tag-end))))
176 :start-mark (start-mark tag-start))
177 (</html> -> (tag-start
178 (slash (= (end-offset tag-start) (start-offset slash)))
179 (word (and (= (end-offset slash) (start-offset word))
180 (word-is word "html")))
181 (tag-end (= (end-offset word) (start-offset tag-end))))
182 :start-mark (start-mark tag-start))
183 (<head> -> (tag-start
184 (word (and (= (end-offset tag-start) (start-offset word))
185 (word-is word "head")))
186 (tag-end (= (end-offset word) (start-offset tag-end))))
187 :start-mark (start-mark tag-start))
188 (</head> -> (tag-start
189 (slash (= (end-offset tag-start) (start-offset slash)))
190 (word (and (= (end-offset slash) (start-offset word))
191 (word-is word "head")))
192 (tag-end (= (end-offset word) (start-offset tag-end))))
193 :start-mark (start-mark tag-start))
194 (<title> -> (tag-start
195 (word (and (= (end-offset tag-start) (start-offset word))
196 (word-is word "title")))
197 (tag-end (= (end-offset word) (start-offset tag-end))))
198 :start-mark (start-mark tag-start))
199 (</title> -> (tag-start
200 (slash (= (end-offset tag-start) (start-offset slash)))
201 (word (and (= (end-offset slash) (start-offset word))
202 (word-is word "title")))
203 (tag-end (= (end-offset word) (start-offset tag-end))))
204 :start-mark (start-mark tag-start))
205 (<body> -> (tag-start
206 (word (and (= (end-offset tag-start) (start-offset word))
207 (word-is word "body")))
208 (tag-end (= (end-offset word) (start-offset tag-end))))
209 :start-mark (start-mark tag-start))
210 (</body> -> (tag-start
211 (slash (= (end-offset tag-start) (start-offset slash)))
212 (word (and (= (end-offset slash) (start-offset word))
213 (word-is word "body")))
214 (tag-end (= (end-offset word) (start-offset tag-end))))
215 :start-mark (start-mark tag-start))
216 (<a> -> (tag-start
217 (word (and (= (end-offset tag-start) (start-offset word))
218 (word-is word "a")))
219 words
220 tag-end)
221 :start-mark (start-mark tag-start)
222 :size (- (end-offset tag-end) (start-offset tag-start))
223 :start tag-start :word word :words words :end tag-end)
224 (</a> -> (tag-start
225 (slash (= (end-offset tag-start) (start-offset slash)))
226 (word (and (= (end-offset slash) (start-offset word))
227 (word-is word "a")))
228 (tag-end (= (end-offset word) (start-offset tag-end))))
229 :start-mark (start-mark tag-start))
230 (html -> (<html> head body </html>)
231 :start-mark (start-mark <html>)
232 :size (- (end-offset </html>) (start-offset <html>))
233 :start <html> :head head :body body :end </html>)
234 (head -> (<head> title </head>)
235 :start-mark (start-mark <head>)
236 :size (- (end-offset </head>) (start-offset <head>))
237 :start <head> :title title :end </head>)
238 (title -> (<title> words </title>)
239 :start-mark (start-mark <title>)
240 :size (- (end-offset </title>) (start-offset <title>))
241 :start <title> :words words :end </title>)
242 (body -> (<body> words </body>)
243 :start-mark (start-mark <body>)
244 :size (- (end-offset </body>) (start-offset <body>))
245 :start <body> :words words :end </body>)
246 (a -> (<a> words </a>)
247 :start-mark (start-mark <a>)
248 :size (- (end-offset </a>) (start-offset <a>))
249 :start <a> :words words :end </a>)
250 (words -> ()
251 (make-instance 'empty-words :start-mark nil))
252 (words -> (words word)
253 (make-instance 'nonempty-words
254 :start-mark (or (start-mark words) (start-mark word))
255 :size (- (end-offset word) (offset (or (start-mark words) (start-mark word))))
256 :words words :word word))
257 (word -> (a)
258 :start-mark (start-mark a)
259 :size (- (end-offset a) (start-offset a)))
260 (word -> (delimiter)
261 :start-mark (start-mark delimiter)
262 :size (- (end-offset delimiter) (start-offset delimiter)))))
263
264
265 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
266 (declare (ignore args))
267 (with-slots (parser tokens buffer) syntax
268 (setf parser (make-instance 'parser
269 :grammar *html-grammar*
270 :target 'html))
271 (insert* tokens 0 (make-instance 'start-element
272 :start-mark (make-instance 'standard-left-sticky-mark
273 :buffer buffer
274 :offset 0)
275 :size 0
276 :state (initial-state parser)))))
277
278 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
279 ;;;
280 ;;; update syntax
281
282 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
283 (with-slots (parser tokens valid-parse) syntax
284 (loop until (= valid-parse (nb-elements tokens))
285 while (mark<= (end-offset (element* tokens valid-parse)) bot)
286 do (let ((current-token (element* tokens (1- valid-parse)))
287 (next-token (element* tokens valid-parse)))
288 (setf (slot-value next-token 'state)
289 (advance-parse parser (list next-token) (slot-value current-token 'state))))
290 (incf valid-parse))))
291
292 (defmethod update-syntax (buffer (syntax html-syntax))
293 (let ((low-mark (low-mark buffer))
294 (high-mark (high-mark buffer))
295 (scan))
296 (with-slots (tokens guess-pos valid-parse) syntax
297 (when (mark<= low-mark high-mark)
298 ;; go back to a position before low-mark
299 (loop until (or (= guess-pos 1)
300 (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
301 do (decf guess-pos))
302 ;; go forward to the last position before low-mark
303 (loop with nb-elements = (nb-elements tokens)
304 until (or (= guess-pos nb-elements)
305 (mark>= (end-offset (element* tokens guess-pos)) low-mark))
306 do (incf guess-pos))
307 ;; mark valid parse
308 (setf valid-parse guess-pos)
309 ;; delete entries that must be reparsed
310 (loop until (or (= guess-pos (nb-elements tokens))
311 (mark> (start-mark (element* tokens guess-pos)) high-mark))
312 do (delete* tokens guess-pos))
313 (setf scan (make-instance 'standard-left-sticky-mark
314 :buffer buffer
315 :offset (if (zerop guess-pos)
316 0
317 (end-offset (element* tokens (1- guess-pos))))))
318 ;; scan
319 (loop with start-mark = nil
320 do (loop until (end-of-buffer-p scan)
321 while (whitespacep (object-after scan))
322 do (forward-object scan))
323 until (if (end-of-buffer-p high-mark)
324 (end-of-buffer-p scan)
325 (mark> scan high-mark))
326 do (setf start-mark (clone-mark scan))
327 (insert* tokens guess-pos (next-token scan))
328 (incf guess-pos))))))
329
330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
331 ;;;
332 ;;; display
333
334 (defvar *white-space-start* nil)
335
336 (defvar *cursor-positions* nil)
337 (defvar *current-line* 0)
338
339 (defun handle-whitespace (pane buffer start end)
340 (let ((space-width (space-width pane))
341 (tab-width (tab-width pane)))
342 (loop while (< start end)
343 do (ecase (buffer-object buffer start)
344 (#\Newline (terpri pane)
345 (setf (aref *cursor-positions* (incf *current-line*))
346 (multiple-value-bind (x y) (stream-cursor-position pane)
347 (declare (ignore x))
348 y)))
349 (#\Space (stream-increment-cursor-position
350 pane space-width 0))
351 (#\Tab (let ((x (stream-cursor-position pane)))
352 (stream-increment-cursor-position
353 pane (- tab-width (mod x tab-width)) 0))))
354 (incf start))))
355
356 (defmethod display-parse-tree :around ((entity html-sym) syntax pane)
357 (with-slots (top bot) pane
358 (when (mark> (end-offset entity) top)
359 (call-next-method))))
360
361 (defmethod display-parse-tree :around ((entity empty-words) syntax pane)
362 (declare (ignore syntax pane))
363 nil)
364
365 (defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
366 (flet ((cache-test (t1 t2)
367 (and (eq t1 t2)
368 (eq (slot-value t1 'ink)
369 (medium-ink (sheet-medium pane)))
370 (eq (slot-value t1 'face)
371 (text-style-face (medium-text-style (sheet-medium pane)))))))
372 (updating-output (pane :unique-id entity
373 :id-test #'eq
374 :cache-value entity
375 :cache-test #'cache-test)
376 (with-slots (ink face) entity
377 (setf ink (medium-ink (sheet-medium pane))
378 face (text-style-face (medium-text-style (sheet-medium pane))))
379 (present (coerce (region-to-sequence (start-mark entity)
380 (end-offset entity))
381 'string)
382 'string
383 :stream pane)))))
384
385 (defmethod display-parse-tree :around ((entity html-tag) (syntax html-syntax) pane)
386 (with-drawing-options (pane :ink +green+)
387 (call-next-method)))
388
389 (defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
390 (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
391 (setf *white-space-start* (end-offset entity)))
392
393 (defmethod display-parse-tree :before ((entity html-balanced) (syntax html-syntax) pane)
394 (with-slots (start) entity
395 (display-parse-tree start syntax pane)))
396
397 (defmethod display-parse-tree :after ((entity html-balanced) (syntax html-syntax) pane)
398 (with-slots (end) entity
399 (display-parse-tree end syntax pane)))
400
401 (defmethod display-parse-tree :around ((entity title) (syntax html-syntax) pane)
402 (with-text-face (pane :bold)
403 (call-next-method)))
404
405 (defmethod display-parse-tree ((entity html-words) (syntax html-syntax) pane)
406 (with-slots (words) entity
407 (display-parse-tree words syntax pane)))
408
409 (defmethod display-parse-tree ((entity empty-words) (syntax html-syntax) pane)
410 (declare (ignore pane))
411 nil)
412
413 (defmethod display-parse-tree ((entity nonempty-words) (syntax html-syntax) pane)
414 (with-slots (words word) entity
415 (display-parse-tree words syntax pane)
416 (display-parse-tree word syntax pane)))
417
418 (defmethod display-parse-tree ((entity html) (syntax html-syntax) pane)
419 (with-slots (head body) entity
420 (display-parse-tree head syntax pane)
421 (display-parse-tree body syntax pane)))
422
423 (defmethod display-parse-tree ((entity head) (syntax html-syntax) pane)
424 (with-slots (title) entity
425 (display-parse-tree title syntax pane)))
426
427 (defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane)
428 (with-slots (start word words end) entity
429 (display-parse-tree start syntax pane)
430 (display-parse-tree word syntax pane)
431 (display-parse-tree words syntax pane)
432 (display-parse-tree end syntax pane)))
433
434 (defgeneric display-parse-stack (symbol stack syntax pane))
435
436 (defmethod display-parse-stack (symbol stack (syntax html-syntax) pane)
437 (let ((next (parse-stack-next stack)))
438 (unless (null next)
439 (display-parse-stack (parse-stack-symbol next) next syntax pane))
440 (loop for parse-tree in (reverse (parse-stack-parse-trees stack))
441 do (display-parse-tree parse-tree syntax pane))))
442
443 (defun display-parse-state (state syntax pane)
444 (let ((top (parse-stack-top state)))
445 (if (not (null top))
446 (display-parse-stack (parse-stack-symbol top) top syntax pane)
447 (display-parse-tree (target-parse-tree state) syntax pane))))
448
449 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
450 (with-slots (top bot) pane
451 (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
452 *current-line* 0
453 (aref *cursor-positions* 0) (stream-cursor-position pane))
454 (with-slots (tokens) syntax
455 (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements tokens)))
456 1.0)))
457 ;; find the last token before bot
458 (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
459 ;; go back to a token before bot
460 (loop until (mark<= (end-offset (element* tokens (1- end-token-index))) bot)
461 do (decf end-token-index))
462 ;; go forward to the last token before bot
463 (loop until (or (= end-token-index (nb-elements tokens))
464 (mark> (start-offset (element* tokens end-token-index)) bot))
465 do (incf end-token-index))
466 (let ((start-token-index end-token-index))
467 ;; go back to the first token after top, or until the previous token
468 ;; contains a valid parser state
469 (loop until (or (mark<= (end-offset (element* tokens (1- start-token-index))) top)
470 (not (parse-state-empty-p
471 (slot-value (element* tokens (1- start-token-index)) 'state))))
472 do (decf start-token-index))
473 (let ((*white-space-start* (offset top)))
474 ;; display the parse tree if any
475 (unless (parse-state-empty-p (slot-value (element* tokens (1- start-token-index)) 'state))
476 (display-parse-state (slot-value (element* tokens (1- start-token-index)) 'state)
477 syntax
478 pane))
479 ;; display the tokens
480 (with-drawing-options (pane :ink +red+)
481 (loop while (< start-token-index end-token-index)
482 do (let ((token (element* tokens start-token-index)))
483 (display-parse-tree token syntax pane))
484 (incf start-token-index))))))))
485 (let* ((cursor-line (number-of-lines-in-region top (point pane)))
486 (height (text-style-height (medium-text-style pane) pane))
487 (cursor-y (+ (* cursor-line (+ height (stream-vertical-spacing pane)))))
488 (cursor-column (column-number (point pane)))
489 (cursor-x (* cursor-column (text-style-width (medium-text-style pane) pane))))
490 (updating-output (pane :unique-id -1)
491 (draw-rectangle* pane
492 (1- cursor-x) (- cursor-y (* 0.2 height))
493 (+ cursor-x 2) (+ cursor-y (* 0.8 height))
494 :ink (if current-p +red+ +blue+))))))
495

  ViewVC Help
Powered by ViewVC 1.1.5