/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (show annotations)
Thu Mar 10 06:37:40 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.7: +77 -4 lines
More progress on html-syntax, which may eventually become a model
for many different language syntax modules.

The display function now traverses the parse tree up as long as a
valid parse tree exists.  The rest of the display is done from the
token sequence.  It is likely that all of this can be abstracted out
and put into syntax.lisp so that html-syntax would just become a
client among others for this traversal.

Not only is the cursor still not displayed, whitespace is not handled
during the traversal of the parse tree.  This will likely be fixed in
the next few day.
1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2
3 ;;; (c) copyright 2005 by
4 ;;; Robert Strandh (strandh@labri.fr)
5
6 ;;; This library is free software; you can redistribute it and/or
7 ;;; modify it under the terms of the GNU Library General Public
8 ;;; License as published by the Free Software Foundation; either
9 ;;; version 2 of the License, or (at your option) any later version.
10 ;;;
11 ;;; This library is distributed in the hope that it will be useful,
12 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;;; Library General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU Library General Public
17 ;;; License along with this library; if not, write to the
18 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 ;;; Boston, MA 02111-1307 USA.
20
21 ;;; Syntax for analysing HTML
22
23 (in-package :climacs-html-syntax)
24
25 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 ;;;
27 ;;; grammar classes
28
29 (defclass html-sym ()
30 ((badness :initform 0 :initarg :badness :reader badness)
31 (message :initform "" :initarg :message :reader message)))
32
33 (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
34 (and (eq (class-of t1) (class-of t2))
35 (< (badness t1) (badness t2))))
36
37 (defclass html-nonterminal (html-sym)
38 ((start-offset :initarg :start-offset :reader start-offset)
39 (end-offset :initarg :end-offset :reader end-offset)))
40
41 (defclass words (html-nonterminal) ())
42
43 (defclass empty-words (words) ())
44
45 (defclass nonempty-words (words)
46 ((words :initarg :words)
47 (word :initarg :word)))
48
49 (defclass html-balanced (html-nonterminal)
50 ((start :initarg :start)
51 (end :initarg :end)))
52
53 (defclass html (html-balanced)
54 ((head :initarg :head)
55 (body :initarg :body)))
56
57 (defclass head (html-balanced)
58 ((title :initarg :title)))
59
60 (defclass html-words (html-balanced)
61 ((words :initarg :words)))
62
63 (defclass title (html-words) ())
64 (defclass body (html-words) ())
65 (defclass h1 (html-words) ())
66 (defclass h2 (html-words) ())
67 (defclass h3 (html-words) ())
68 (defclass para (html-words) ())
69
70 (defclass html-token (html-sym)
71 ((start-mark :initarg :start-mark :reader start-mark)
72 (size :initarg :size)))
73
74 (defgeneric end-offset (html-token))
75
76 (defmethod end-offset ((token html-token))
77 (with-slots (start-mark size) token
78 (+ (offset start-mark) size)))
79
80 (defgeneric start-offset (html-token))
81
82 (defmethod start-offset ((token html-token))
83 (offset (start-mark token)))
84
85 (defclass <html> (html-token) () (:default-initargs :size 6))
86 (defclass </html> (html-token) ()(:default-initargs :size 7))
87 (defclass <head> (html-token) () (:default-initargs :size 6))
88 (defclass </head> (html-token) () (:default-initargs :size 7))
89 (defclass <title> (html-token) () (:default-initargs :size 7))
90 (defclass </title> (html-token) () (:default-initargs :size 8))
91 (defclass <body> (html-token) () (:default-initargs :size 6))
92 (defclass </body> (html-token) () (:default-initargs :size 7))
93 (defclass <h1> (html-token) () (:default-initargs :size 4))
94 (defclass </h1> (html-token) () (:default-initargs :size 5))
95 (defclass <h2> (html-token) () (:default-initargs :size 4))
96 (defclass </h2> (html-token) () (:default-initargs :size 5))
97 (defclass <h3> (html-token) () (:default-initargs :size 4))
98 (defclass </h3> (html-token) () (:default-initargs :size 5))
99 (defclass <p> (html-token) () (:default-initargs :size 3))
100 (defclass </p> (html-token) () (:default-initargs :size 4))
101 (defclass <ul> (html-token) () (:default-initargs :size 4))
102 (defclass </ul> (html-token) () (:default-initargs :size 5))
103 (defclass <li> (html-token) () (:default-initargs :size 4))
104 (defclass </li> (html-token) () (:default-initargs :size 5))
105
106 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
107 ;;;
108 ;;; lexer
109
110 (defclass html-element (html-token)
111 ((state :initarg :state)))
112
113 (defclass start-element (html-element) ())
114 (defclass tag-start (html-element) ())
115 (defclass tag-end (html-element) ())
116 (defclass slash (html-element) ())
117 (defclass word (html-element) ())
118 (defclass delimiter (html-element) ())
119
120 (defun next-token (scan)
121 (let ((start-mark (clone-mark scan)))
122 (flet ((fo () (forward-object scan)))
123 (macrolet ((make-entry (type)
124 `(return-from next-token
125 (make-instance ,type :start-mark start-mark
126 :size (- (offset scan) (offset start-mark))))))
127 (loop with object = (object-after scan)
128 until (end-of-buffer-p scan)
129 do (case object
130 (#\< (fo) (make-entry 'tag-start))
131 (#\> (fo) (make-entry 'tag-end))
132 (#\/ (fo) (make-entry 'slash))
133 (t (cond ((alphanumericp object)
134 (loop until (end-of-buffer-p scan)
135 while (alphanumericp (object-after scan))
136 do (fo))
137 (make-entry 'word))
138 (t
139 (fo) (make-entry 'delimiter))))))))))
140
141 (define-syntax html-syntax ("HTML" (basic-syntax))
142 ((tokens :initform (make-instance 'standard-flexichain))
143 (guess-pos :initform 1)
144 (valid-parse :initform 1)
145 (parser)))
146
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
148 ;;;
149 ;;; parser
150
151 (defun word-is (word string)
152 (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
153 string))
154
155 (defparameter *html-grammar*
156 (grammar
157 (<html> -> (tag-start
158 (word (and (= (end-offset tag-start) (start-offset word))
159 (word-is word "html")))
160 (tag-end (= (end-offset word) (start-offset tag-end))))
161 :start-mark (start-mark tag-start))
162 (</html> -> (tag-start
163 (slash (= (end-offset tag-start) (start-offset slash)))
164 (word (and (= (end-offset slash) (start-offset word))
165 (word-is word "html")))
166 (tag-end (= (end-offset word) (start-offset tag-end))))
167 :start-mark (start-mark tag-start))
168 (<head> -> (tag-start
169 (word (and (= (end-offset tag-start) (start-offset word))
170 (word-is word "head")))
171 (tag-end (= (end-offset word) (start-offset tag-end))))
172 :start-mark (start-mark tag-start))
173 (</head> -> (tag-start
174 (slash (= (end-offset tag-start) (start-offset slash)))
175 (word (and (= (end-offset slash) (start-offset word))
176 (word-is word "head")))
177 (tag-end (= (end-offset word) (start-offset tag-end))))
178 :start-mark (start-mark tag-start))
179 (<title> -> (tag-start
180 (word (and (= (end-offset tag-start) (start-offset word))
181 (word-is word "title")))
182 (tag-end (= (end-offset word) (start-offset tag-end))))
183 :start-mark (start-mark tag-start))
184 (</title> -> (tag-start
185 (slash (= (end-offset tag-start) (start-offset slash)))
186 (word (and (= (end-offset slash) (start-offset word))
187 (word-is word "title")))
188 (tag-end (= (end-offset word) (start-offset tag-end))))
189 :start-mark (start-mark tag-start))
190 (<body> -> (tag-start
191 (word (and (= (end-offset tag-start) (start-offset word))
192 (word-is word "body")))
193 (tag-end (= (end-offset word) (start-offset tag-end))))
194 :start-mark (start-mark tag-start))
195 (</body> -> (tag-start
196 (slash (= (end-offset tag-start) (start-offset slash)))
197 (word (and (= (end-offset slash) (start-offset word))
198 (word-is word "body")))
199 (tag-end (= (end-offset word) (start-offset tag-end))))
200 :start-mark (start-mark tag-start))
201 (html -> (<html> head body </html>)
202 :start-offset (start-offset <html>) :end-offset (end-offset </html>)
203 :start <html> :head head :body body :end </html>)
204 (head -> (<head> title </head>)
205 :start-offset (start-offset <head>) :end-offset (end-offset </head>)
206 :start <head> :title title :end </head>)
207 (title -> (<title> words </title>)
208 :start-offset (start-offset <title>) :end-offset (end-offset </title>)
209 :start <title> :words words :end </title>)
210 (body -> (<body> words </body>)
211 :start-offset (start-offset <body>) :end-offset (end-offset </body>)
212 :start <body> :words words :end </body>)
213 (words -> ()
214 (make-instance 'empty-words :start-offset nil))
215 (words -> (words word)
216 (make-instance 'nonempty-words
217 :start-offset (or (start-offset words) (start-offset word))
218 :end-offset (end-offset word)
219 :words words :word word))))
220
221 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
222 (declare (ignore args))
223 (with-slots (parser tokens buffer) syntax
224 (setf parser (make-instance 'parser
225 :grammar *html-grammar*
226 :target 'html))
227 (insert* tokens 0 (make-instance 'start-element
228 :start-mark (make-instance 'standard-left-sticky-mark
229 :buffer buffer
230 :offset 0)
231 :size 0
232 :state (initial-state parser)))))
233
234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
235 ;;;
236 ;;; update syntax
237
238 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
239 (with-slots (parser tokens valid-parse) syntax
240 (loop until (= valid-parse (nb-elements tokens))
241 while (mark<= (end-offset (element* tokens valid-parse)) bot)
242 do (let ((current-token (element* tokens (1- valid-parse)))
243 (next-token (element* tokens valid-parse)))
244 (setf (slot-value next-token 'state)
245 (advance-parse parser (list next-token) (slot-value current-token 'state))))
246 (incf valid-parse))))
247
248 (defmethod update-syntax (buffer (syntax html-syntax))
249 (let ((low-mark (low-mark buffer))
250 (high-mark (high-mark buffer))
251 (scan))
252 (with-slots (tokens guess-pos valid-parse) syntax
253 (when (mark<= low-mark high-mark)
254 ;; go back to a position before low-mark
255 (loop until (or (= guess-pos 1)
256 (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
257 do (decf guess-pos))
258 ;; go forward to the last position before low-mark
259 (loop with nb-elements = (nb-elements tokens)
260 until (or (= guess-pos nb-elements)
261 (mark>= (end-offset (element* tokens guess-pos)) low-mark))
262 do (incf guess-pos))
263 ;; mark valid parse
264 (setf valid-parse guess-pos)
265 ;; delete entries that must be reparsed
266 (loop until (or (= guess-pos (nb-elements tokens))
267 (mark> (start-mark (element* tokens guess-pos)) high-mark))
268 do (delete* tokens guess-pos))
269 (setf scan (make-instance 'standard-left-sticky-mark
270 :buffer buffer
271 :offset (if (zerop guess-pos)
272 0
273 (end-offset (element* tokens (1- guess-pos))))))
274 ;; scan
275 (loop with start-mark = nil
276 do (loop until (end-of-buffer-p scan)
277 while (whitespacep (object-after scan))
278 do (forward-object scan))
279 until (if (end-of-buffer-p high-mark)
280 (end-of-buffer-p scan)
281 (mark> scan high-mark))
282 do (setf start-mark (clone-mark scan))
283 (insert* tokens guess-pos (next-token scan))
284 (incf guess-pos))))))
285
286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
287 ;;;
288 ;;; display
289
290 (defun handle-whitespace (pane buffer start end)
291 (let ((space-width (space-width pane))
292 (tab-width (tab-width pane)))
293 (loop while (< start end)
294 do (ecase (buffer-object buffer start)
295 (#\Newline (terpri pane))
296 (#\Space (stream-increment-cursor-position
297 pane space-width 0))
298 (#\Tab (let ((x (stream-cursor-position pane)))
299 (stream-increment-cursor-position
300 pane (- tab-width (mod x tab-width)) 0))))
301 (incf start))))
302
303 (defmethod display-parse-tree :around ((entity html-sym) syntax pane)
304 (with-slots (top bot) pane
305 (when (mark> (end-offset entity) top)
306 (call-next-method))))
307
308 (defmethod display-parse-tree :around ((entity empty-words) syntax pane)
309 (declare (ignore syntax pane))
310 nil)
311
312 (defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
313 (updating-output (pane :unique-id entity
314 :id-test #'eq
315 :cache-value entity
316 :cache-test #'eq)
317 (present (coerce (region-to-sequence (start-mark entity)
318 (end-offset entity))
319 'string)
320 'string
321 :stream pane)))
322
323 (defmethod display-parse-tree :before ((entity html-balanced) (syntax html-syntax) pane)
324 (with-slots (start) entity
325 (display-parse-tree start syntax pane)))
326
327 (defmethod display-parse-tree :after ((entity html-balanced) (syntax html-syntax) pane)
328 (with-slots (end) entity
329 (display-parse-tree end syntax pane)))
330
331 (defmethod display-parse-tree ((entity html-words) (syntax html-syntax) pane)
332 (with-slots (words) entity
333 (display-parse-tree words syntax pane)))
334
335 (defmethod display-parse-tree ((entity empty-words) (syntax html-syntax) pane)
336 (declare (ignore pane))
337 nil)
338
339 (defmethod display-parse-tree ((entity nonempty-words) (syntax html-syntax) pane)
340 (with-slots (words word) entity
341 (display-parse-tree words syntax pane)
342 (display-parse-tree word syntax pane)))
343
344 (defmethod display-parse-tree ((entity html) (syntax html-syntax) pane)
345 (with-slots (head body) entity
346 (display-parse-tree head syntax pane)
347 (display-parse-tree body syntax pane)))
348
349 (defmethod display-parse-tree ((entity head) (syntax html-syntax) pane)
350 (with-slots (title) entity
351 (display-parse-tree title syntax pane)))
352
353 (defgeneric display-parse-stack (symbol stack syntax pane))
354
355 (defmethod display-parse-stack (symbol stack (syntax html-syntax) pane)
356 (let ((next (parse-stack-next stack)))
357 (unless (null next)
358 (display-parse-stack (parse-stack-symbol next) next syntax pane))
359 (loop for parse-tree in (reverse (parse-stack-parse-trees stack))
360 do (display-parse-tree parse-tree syntax pane))))
361
362 (defun display-parse-state (state syntax pane)
363 (let ((top (parse-stack-top state)))
364 (if (not (null top))
365 (display-parse-stack (parse-stack-symbol top) top syntax pane)
366 (display-parse-tree (target-parse-tree state) syntax pane))))
367
368 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
369 (with-slots (top bot) pane
370 (with-slots (tokens) syntax
371 (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements tokens)))
372 1.0)))
373 ;; find the last token before bot
374 (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
375 ;; go back to a token before bot
376 (loop until (mark<= (end-offset (element* tokens (1- end-token-index))) bot)
377 do (decf end-token-index))
378 ;; go forward to the last token before bot
379 (loop until (or (= end-token-index (nb-elements tokens))
380 (mark> (start-offset (element* tokens end-token-index)) bot))
381 do (incf end-token-index))
382 (let ((start-token-index end-token-index))
383 ;; go back to the first token after top, or until the previous token
384 ;; contains a valid parser state
385 (loop until (or (mark<= (end-offset (element* tokens (1- start-token-index))) top)
386 (not (null (parse-stack-top
387 (slot-value (element* tokens (1- start-token-index)) 'state)))))
388 do (decf start-token-index))
389 ;; display the parse tree if any
390 (unless (parse-state-empty-p (slot-value (element* tokens (1- start-token-index)) 'state))
391 (display-parse-state (slot-value (element* tokens (1- start-token-index)) 'state)
392 syntax
393 pane))
394 ;; display the tokens
395 (loop with prev-offset = (end-offset (element* tokens (1- start-token-index)))
396 while (< start-token-index end-token-index)
397 do (let ((token (element* tokens start-token-index)))
398 (handle-whitespace pane (buffer pane) prev-offset (start-offset token))
399 (updating-output (pane :unique-id token
400 :id-test #'eq
401 :cache-value token
402 :cache-test #'eq)
403 (present (coerce (region-to-sequence (start-mark token)
404 (end-offset token))
405 'string)
406 'string
407 :stream pane))
408 (setf prev-offset (end-offset token)))
409 (incf start-token-index))))))))
410
411

  ViewVC Help
Powered by ViewVC 1.1.5