/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (show annotations)
Tue Mar 15 05:39:24 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.13: +2 -84 lines
The incremental lexer is now in the climacs-syntax package in the
syntax.lisp file.
1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2
3 ;;; (c) copyright 2005 by
4 ;;; Robert Strandh (strandh@labri.fr)
5
6 ;;; This library is free software; you can redistribute it and/or
7 ;;; modify it under the terms of the GNU Library General Public
8 ;;; License as published by the Free Software Foundation; either
9 ;;; version 2 of the License, or (at your option) any later version.
10 ;;;
11 ;;; This library is distributed in the hope that it will be useful,
12 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;;; Library General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU Library General Public
17 ;;; License along with this library; if not, write to the
18 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 ;;; Boston, MA 02111-1307 USA.
20
21 ;;; Syntax for analysing HTML
22
23 (in-package :climacs-html-syntax)
24
25 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 ;;;
27 ;;; grammar classes
28
29 (defclass html-sym (parse-tree)
30 ((badness :initform 0 :initarg :badness :reader badness)))
31
32 (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
33 (and (eq (class-of t1) (class-of t2))
34 (< (badness t1) (badness t2))))
35
36 (defclass html-nonterminal (html-sym) ())
37
38 (defclass words (html-nonterminal) ())
39
40 (defclass empty-words (words) ())
41
42 (defclass nonempty-words (words)
43 ((words :initarg :words)
44 (word :initarg :word)))
45
46 (defclass html-balanced (html-nonterminal)
47 ((start :initarg :start)
48 (end :initarg :end)))
49
50 (defclass html (html-balanced)
51 ((head :initarg :head)
52 (body :initarg :body)))
53
54 (defclass head (html-balanced)
55 ((title :initarg :title)))
56
57 (defclass html-words (html-balanced)
58 ((words :initarg :words)))
59
60 (defclass title (html-words) ())
61 (defclass body (html-words) ())
62 (defclass h1 (html-words) ())
63 (defclass h2 (html-words) ())
64 (defclass h3 (html-words) ())
65 (defclass a (html-words) ())
66 (defclass para (html-words) ())
67
68 (defclass html-token (html-sym)
69 ((ink) (face)))
70
71 (defclass html-tag (html-token) ())
72
73 (defclass <html> (html-tag) () (:default-initargs :size 6))
74 (defclass </html> (html-tag) ()(:default-initargs :size 7))
75 (defclass <head> (html-tag) () (:default-initargs :size 6))
76 (defclass </head> (html-tag) () (:default-initargs :size 7))
77 (defclass <title> (html-tag) () (:default-initargs :size 7))
78 (defclass </title> (html-tag) () (:default-initargs :size 8))
79 (defclass <body> (html-tag) () (:default-initargs :size 6))
80 (defclass </body> (html-tag) () (:default-initargs :size 7))
81 (defclass <h1> (html-tag) () (:default-initargs :size 4))
82 (defclass </h1> (html-tag) () (:default-initargs :size 5))
83 (defclass <h2> (html-tag) () (:default-initargs :size 4))
84 (defclass </h2> (html-tag) () (:default-initargs :size 5))
85 (defclass <h3> (html-tag) () (:default-initargs :size 4))
86 (defclass </h3> (html-tag) () (:default-initargs :size 5))
87 (defclass <p> (html-tag) () (:default-initargs :size 3))
88 (defclass </p> (html-tag) () (:default-initargs :size 4))
89 (defclass <ul> (html-tag) () (:default-initargs :size 4))
90 (defclass </ul> (html-tag) () (:default-initargs :size 5))
91 (defclass <li> (html-tag) () (:default-initargs :size 4))
92 (defclass </li> (html-tag) () (:default-initargs :size 5))
93 (defclass <a> (html-tag)
94 ((start :initarg :start)
95 (word :initarg :word)
96 (words :initarg :words)
97 (end :initarg :end)))
98 (defclass </a> (html-tag) () (:default-initargs :size 4))
99
100
101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
102 ;;;
103 ;;; lexer
104
105 (defclass html-element (html-token)
106 ((state :initarg :state)))
107
108 (defclass start-element (html-element) ())
109 (defclass tag-start (html-element) ())
110 (defclass tag-end (html-element) ())
111 (defclass slash (html-element) ())
112 (defclass word (html-element) ())
113 (defclass delimiter (html-element) ())
114
115 (defmethod next-lexeme ((lexer html-lexer) scan)
116 (flet ((fo () (forward-object scan)))
117 (let ((object (object-after scan)))
118 (case object
119 (#\< (fo) (make-instance 'tag-start))
120 (#\> (fo) (make-instance 'tag-end))
121 (#\/ (fo) (make-instance 'slash))
122 (t (cond ((alphanumericp object)
123 (loop until (end-of-buffer-p scan)
124 while (alphanumericp (object-after scan))
125 do (fo))
126 (make-instance 'word))
127 (t
128 (fo) (make-instance 'delimiter))))))))
129
130 (defclass html-lexer (incremental-lexer) ())
131
132 (define-syntax html-syntax ("HTML" (basic-syntax))
133 ((lexer :reader lexer)
134 (valid-parse :initform 1)
135 (parser)))
136
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
138 ;;;
139 ;;; parser
140
141 (defun word-is (word string)
142 (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
143 string))
144
145 (defparameter *html-grammar*
146 (grammar
147 (<html> -> (tag-start
148 (word (and (= (end-offset tag-start) (start-offset word))
149 (word-is word "html")))
150 (tag-end (= (end-offset word) (start-offset tag-end))))
151 :start-mark (start-mark tag-start))
152 (</html> -> (tag-start
153 (slash (= (end-offset tag-start) (start-offset slash)))
154 (word (and (= (end-offset slash) (start-offset word))
155 (word-is word "html")))
156 (tag-end (= (end-offset word) (start-offset tag-end))))
157 :start-mark (start-mark tag-start))
158 (<head> -> (tag-start
159 (word (and (= (end-offset tag-start) (start-offset word))
160 (word-is word "head")))
161 (tag-end (= (end-offset word) (start-offset tag-end))))
162 :start-mark (start-mark tag-start))
163 (</head> -> (tag-start
164 (slash (= (end-offset tag-start) (start-offset slash)))
165 (word (and (= (end-offset slash) (start-offset word))
166 (word-is word "head")))
167 (tag-end (= (end-offset word) (start-offset tag-end))))
168 :start-mark (start-mark tag-start))
169 (<title> -> (tag-start
170 (word (and (= (end-offset tag-start) (start-offset word))
171 (word-is word "title")))
172 (tag-end (= (end-offset word) (start-offset tag-end))))
173 :start-mark (start-mark tag-start))
174 (</title> -> (tag-start
175 (slash (= (end-offset tag-start) (start-offset slash)))
176 (word (and (= (end-offset slash) (start-offset word))
177 (word-is word "title")))
178 (tag-end (= (end-offset word) (start-offset tag-end))))
179 :start-mark (start-mark tag-start))
180 (<body> -> (tag-start
181 (word (and (= (end-offset tag-start) (start-offset word))
182 (word-is word "body")))
183 (tag-end (= (end-offset word) (start-offset tag-end))))
184 :start-mark (start-mark tag-start))
185 (</body> -> (tag-start
186 (slash (= (end-offset tag-start) (start-offset slash)))
187 (word (and (= (end-offset slash) (start-offset word))
188 (word-is word "body")))
189 (tag-end (= (end-offset word) (start-offset tag-end))))
190 :start-mark (start-mark tag-start))
191 (<a> -> (tag-start
192 (word (and (= (end-offset tag-start) (start-offset word))
193 (word-is word "a")))
194 words
195 tag-end)
196 :start-mark (start-mark tag-start)
197 :size (- (end-offset tag-end) (start-offset tag-start))
198 :start tag-start :word word :words words :end tag-end)
199 (</a> -> (tag-start
200 (slash (= (end-offset tag-start) (start-offset slash)))
201 (word (and (= (end-offset slash) (start-offset word))
202 (word-is word "a")))
203 (tag-end (= (end-offset word) (start-offset tag-end))))
204 :start-mark (start-mark tag-start))
205 (html -> (<html> head body </html>)
206 :start-mark (start-mark <html>)
207 :size (- (end-offset </html>) (start-offset <html>))
208 :start <html> :head head :body body :end </html>)
209 (head -> (<head> title </head>)
210 :start-mark (start-mark <head>)
211 :size (- (end-offset </head>) (start-offset <head>))
212 :start <head> :title title :end </head>)
213 (title -> (<title> words </title>)
214 :start-mark (start-mark <title>)
215 :size (- (end-offset </title>) (start-offset <title>))
216 :start <title> :words words :end </title>)
217 (body -> (<body> words </body>)
218 :start-mark (start-mark <body>)
219 :size (- (end-offset </body>) (start-offset <body>))
220 :start <body> :words words :end </body>)
221 (a -> (<a> words </a>)
222 :start-mark (start-mark <a>)
223 :size (- (end-offset </a>) (start-offset <a>))
224 :start <a> :words words :end </a>)
225 (words -> ()
226 (make-instance 'empty-words :start-mark nil))
227 (words -> (words word)
228 (make-instance 'nonempty-words
229 :start-mark (or (start-mark words) (start-mark word))
230 :size (- (end-offset word) (offset (or (start-mark words) (start-mark word))))
231 :words words :word word))
232 (word -> (a)
233 :start-mark (start-mark a)
234 :size (- (end-offset a) (start-offset a)))
235 (word -> (delimiter)
236 :start-mark (start-mark delimiter)
237 :size (- (end-offset delimiter) (start-offset delimiter)))))
238
239
240 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
241 (declare (ignore args))
242 (with-slots (parser lexer buffer) syntax
243 (setf parser (make-instance 'parser
244 :grammar *html-grammar*
245 :target 'html))
246 (setf lexer (make-instance 'html-lexer :buffer (buffer syntax)))
247 (let ((m (clone-mark (low-mark buffer) :left)))
248 (setf (offset m) 0)
249 (insert-lexeme lexer 0 (make-instance 'start-element
250 :start-mark m
251 :size 0
252 :state (initial-state parser))))))
253
254 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
255 ;;;
256 ;;; update syntax
257
258
259 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
260 (with-slots (parser lexer valid-parse) syntax
261 (loop until (= valid-parse (nb-lexemes lexer))
262 while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
263 do (let ((current-token (lexeme lexer (1- valid-parse)))
264 (next-lexeme (lexeme lexer valid-parse)))
265 (setf (slot-value next-lexeme 'state)
266 (advance-parse parser (list next-lexeme) (slot-value current-token 'state))))
267 (incf valid-parse))))
268
269 (defmethod inter-lexeme-object-p ((lexer html-lexer) object)
270 (whitespacep object))
271
272 (defmethod update-syntax (buffer (syntax html-syntax))
273 (with-slots (lexer valid-parse) syntax
274 (let* ((low-mark (low-mark buffer))
275 (high-mark (high-mark buffer))
276 (first-invalid-position (delete-invalid-lexemes lexer low-mark high-mark)))
277 (setf valid-parse first-invalid-position)
278 (update-lex lexer first-invalid-position high-mark))))
279
280 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
281 ;;;
282 ;;; display
283
284 (defvar *white-space-start* nil)
285
286 (defvar *cursor-positions* nil)
287 (defvar *current-line* 0)
288
289 (defun handle-whitespace (pane buffer start end)
290 (let ((space-width (space-width pane))
291 (tab-width (tab-width pane)))
292 (loop while (< start end)
293 do (ecase (buffer-object buffer start)
294 (#\Newline (terpri pane)
295 (setf (aref *cursor-positions* (incf *current-line*))
296 (multiple-value-bind (x y) (stream-cursor-position pane)
297 (declare (ignore x))
298 y)))
299 (#\Space (stream-increment-cursor-position
300 pane space-width 0))
301 (#\Tab (let ((x (stream-cursor-position pane)))
302 (stream-increment-cursor-position
303 pane (- tab-width (mod x tab-width)) 0))))
304 (incf start))))
305
306 (defmethod display-parse-tree :around ((entity html-sym) syntax pane)
307 (with-slots (top bot) pane
308 (when (mark> (end-offset entity) top)
309 (call-next-method))))
310
311 (defmethod display-parse-tree :around ((entity empty-words) syntax pane)
312 (declare (ignore syntax pane))
313 nil)
314
315 (defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
316 (flet ((cache-test (t1 t2)
317 (and (eq t1 t2)
318 (eq (slot-value t1 'ink)
319 (medium-ink (sheet-medium pane)))
320 (eq (slot-value t1 'face)
321 (text-style-face (medium-text-style (sheet-medium pane)))))))
322 (updating-output (pane :unique-id entity
323 :id-test #'eq
324 :cache-value entity
325 :cache-test #'cache-test)
326 (with-slots (ink face) entity
327 (setf ink (medium-ink (sheet-medium pane))
328 face (text-style-face (medium-text-style (sheet-medium pane))))
329 (present (coerce (region-to-sequence (start-mark entity)
330 (end-offset entity))
331 'string)
332 'string
333 :stream pane)))))
334
335 (defmethod display-parse-tree :around ((entity html-tag) (syntax html-syntax) pane)
336 (with-drawing-options (pane :ink +green+)
337 (call-next-method)))
338
339 (defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
340 (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
341 (setf *white-space-start* (end-offset entity)))
342
343 (defmethod display-parse-tree :before ((entity html-balanced) (syntax html-syntax) pane)
344 (with-slots (start) entity
345 (display-parse-tree start syntax pane)))
346
347 (defmethod display-parse-tree :after ((entity html-balanced) (syntax html-syntax) pane)
348 (with-slots (end) entity
349 (display-parse-tree end syntax pane)))
350
351 (defmethod display-parse-tree :around ((entity title) (syntax html-syntax) pane)
352 (with-text-face (pane :bold)
353 (call-next-method)))
354
355 (defmethod display-parse-tree ((entity html-words) (syntax html-syntax) pane)
356 (with-slots (words) entity
357 (display-parse-tree words syntax pane)))
358
359 (defmethod display-parse-tree ((entity empty-words) (syntax html-syntax) pane)
360 (declare (ignore pane))
361 nil)
362
363 (defmethod display-parse-tree ((entity nonempty-words) (syntax html-syntax) pane)
364 (with-slots (words word) entity
365 (display-parse-tree words syntax pane)
366 (display-parse-tree word syntax pane)))
367
368 (defmethod display-parse-tree ((entity html) (syntax html-syntax) pane)
369 (with-slots (head body) entity
370 (display-parse-tree head syntax pane)
371 (display-parse-tree body syntax pane)))
372
373 (defmethod display-parse-tree ((entity head) (syntax html-syntax) pane)
374 (with-slots (title) entity
375 (display-parse-tree title syntax pane)))
376
377 (defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane)
378 (with-slots (start word words end) entity
379 (display-parse-tree start syntax pane)
380 (display-parse-tree word syntax pane)
381 (display-parse-tree words syntax pane)
382 (display-parse-tree end syntax pane)))
383
384 (defgeneric display-parse-stack (symbol stack syntax pane))
385
386 (defmethod display-parse-stack (symbol stack (syntax html-syntax) pane)
387 (let ((next (parse-stack-next stack)))
388 (unless (null next)
389 (display-parse-stack (parse-stack-symbol next) next syntax pane))
390 (loop for parse-tree in (reverse (parse-stack-parse-trees stack))
391 do (display-parse-tree parse-tree syntax pane))))
392
393 (defun display-parse-state (state syntax pane)
394 (let ((top (parse-stack-top state)))
395 (if (not (null top))
396 (display-parse-stack (parse-stack-symbol top) top syntax pane)
397 (display-parse-tree (target-parse-tree state) syntax pane))))
398
399 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
400 (with-slots (top bot) pane
401 (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
402 *current-line* 0
403 (aref *cursor-positions* 0) (stream-cursor-position pane))
404 (with-slots (lexer) syntax
405 (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-lexemes lexer)))
406 1.0)))
407 ;; find the last token before bot
408 (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
409 ;; go back to a token before bot
410 (loop until (mark<= (end-offset (lexeme lexer (1- end-token-index))) bot)
411 do (decf end-token-index))
412 ;; go forward to the last token before bot
413 (loop until (or (= end-token-index (nb-lexemes lexer))
414 (mark> (start-offset (lexeme lexer end-token-index)) bot))
415 do (incf end-token-index))
416 (let ((start-token-index end-token-index))
417 ;; go back to the first token after top, or until the previous token
418 ;; contains a valid parser state
419 (loop until (or (mark<= (end-offset (lexeme lexer (1- start-token-index))) top)
420 (not (parse-state-empty-p
421 (slot-value (lexeme lexer (1- start-token-index)) 'state))))
422 do (decf start-token-index))
423 (let ((*white-space-start* (offset top)))
424 ;; display the parse tree if any
425 (unless (parse-state-empty-p (slot-value (lexeme lexer (1- start-token-index)) 'state))
426 (display-parse-state (slot-value (lexeme lexer (1- start-token-index)) 'state)
427 syntax
428 pane))
429 ;; display the lexemes
430 (with-drawing-options (pane :ink +red+)
431 (loop while (< start-token-index end-token-index)
432 do (let ((token (lexeme lexer start-token-index)))
433 (display-parse-tree token syntax pane))
434 (incf start-token-index))))))))
435 (let* ((cursor-line (number-of-lines-in-region top (point pane)))
436 (height (text-style-height (medium-text-style pane) pane))
437 (cursor-y (+ (* cursor-line (+ height (stream-vertical-spacing pane)))))
438 (cursor-column (column-number (point pane)))
439 (cursor-x (* cursor-column (text-style-width (medium-text-style pane) pane))))
440 (updating-output (pane :unique-id -1)
441 (draw-rectangle* pane
442 (1- cursor-x) (- cursor-y (* 0.2 height))
443 (+ cursor-x 2) (+ cursor-y (* 0.8 height))
444 :ink (if current-p +red+ +blue+))))))
445

  ViewVC Help
Powered by ViewVC 1.1.5