/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.20 - (hide annotations)
Sun Mar 20 08:25:21 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.19: +9 -6 lines
Factored out the rules for `html' and `head' so that they now
use add-rule.
1 rstrandh 1.16 ;;; -*- Mode: Lisp; Package: CLIMACS-HTML-SYNTAX -*-
2 rstrandh 1.1
3     ;;; (c) copyright 2005 by
4     ;;; Robert Strandh (strandh@labri.fr)
5    
6     ;;; This library is free software; you can redistribute it and/or
7     ;;; modify it under the terms of the GNU Library General Public
8     ;;; License as published by the Free Software Foundation; either
9     ;;; version 2 of the License, or (at your option) any later version.
10     ;;;
11     ;;; This library is distributed in the hope that it will be useful,
12     ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13     ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14     ;;; Library General Public License for more details.
15     ;;;
16     ;;; You should have received a copy of the GNU Library General Public
17     ;;; License along with this library; if not, write to the
18     ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19     ;;; Boston, MA 02111-1307 USA.
20    
21     ;;; Syntax for analysing HTML
22    
23 rstrandh 1.6 (in-package :climacs-html-syntax)
24 rstrandh 1.1
25     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26     ;;;
27     ;;; grammar classes
28    
29 rstrandh 1.17 (defclass html-parse-tree (parse-tree)
30 rstrandh 1.14 ((badness :initform 0 :initarg :badness :reader badness)))
31 rstrandh 1.1
32 rstrandh 1.17 (defmethod parse-tree-better ((t1 html-parse-tree) (t2 html-parse-tree))
33 rstrandh 1.1 (and (eq (class-of t1) (class-of t2))
34     (< (badness t1) (badness t2))))
35    
36 rstrandh 1.17 (defclass html-nonterminal (html-parse-tree) ())
37 rstrandh 1.6
38 rstrandh 1.17 (defclass html-token (html-parse-tree)
39 rstrandh 1.9 ((ink) (face)))
40 rstrandh 1.4
41 rstrandh 1.9 (defclass html-tag (html-token) ())
42 rstrandh 1.4
43 rstrandh 1.1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
44     ;;;
45     ;;; lexer
46    
47 rstrandh 1.17 (defclass html-lexeme (html-token)
48 rstrandh 1.4 ((state :initarg :state)))
49    
50 rstrandh 1.17 (defclass start-lexeme (html-lexeme) ())
51     (defclass tag-start (html-lexeme) ())
52     (defclass tag-end (html-lexeme) ())
53     (defclass slash (html-lexeme) ())
54     (defclass word (html-lexeme) ())
55     (defclass delimiter (html-lexeme) ())
56 rstrandh 1.4
57 rstrandh 1.15 (defclass html-lexer (incremental-lexer) ())
58    
59 rstrandh 1.14 (defmethod next-lexeme ((lexer html-lexer) scan)
60 rstrandh 1.11 (flet ((fo () (forward-object scan)))
61     (let ((object (object-after scan)))
62     (case object
63     (#\< (fo) (make-instance 'tag-start))
64     (#\> (fo) (make-instance 'tag-end))
65     (#\/ (fo) (make-instance 'slash))
66     (t (cond ((alphanumericp object)
67     (loop until (end-of-buffer-p scan)
68     while (alphanumericp (object-after scan))
69     do (fo))
70     (make-instance 'word))
71     (t
72     (fo) (make-instance 'delimiter))))))))
73 rstrandh 1.13
74 rstrandh 1.4 (define-syntax html-syntax ("HTML" (basic-syntax))
75 rstrandh 1.13 ((lexer :reader lexer)
76 rstrandh 1.4 (valid-parse :initform 1)
77     (parser)))
78 rstrandh 1.1
79     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
80     ;;;
81     ;;; parser
82    
83 rstrandh 1.20 (defparameter *html-grammar* (grammar))
84 rstrandh 1.18
85     (defun word-is (word string)
86 rstrandh 1.19 (string-equal (coerce (buffer-sequence (buffer word) (start-offset word) (end-offset word)) 'string)
87 rstrandh 1.18 string))
88 rstrandh 1.1
89 rstrandh 1.17 (defmacro define-start-tag (name string)
90     `(progn
91     (defclass ,name (html-tag) ())
92    
93     (add-rule (grammar-rule
94     (,name -> (tag-start
95     (word (and (= (end-offset tag-start) (start-offset word))
96     (word-is word ,string)))
97     (tag-end (= (end-offset word) (start-offset tag-end))))))
98     *html-grammar*)))
99    
100     (defmacro define-end-tag (name string)
101     `(progn
102     (defclass ,name (html-tag) ())
103    
104     (add-rule (grammar-rule
105     (,name -> (tag-start
106     (slash (= (end-offset tag-start) (start-offset slash)))
107     (word (and (= (end-offset slash) (start-offset word))
108     (word-is word ,string)))
109     (tag-end (= (end-offset word) (start-offset tag-end))))))
110     *html-grammar*)))
111    
112     (defmacro define-tag-pair (start-name end-name string)
113     `(progn (define-start-tag ,start-name ,string)
114     (define-end-tag ,end-name ,string)))
115    
116     (define-tag-pair <html> </html> "html")
117     (define-tag-pair <head> </head> "head")
118     (define-tag-pair <title> </title> "title")
119     (define-tag-pair <body> </body> "body")
120     (define-tag-pair <h1> </h1> "h1")
121     (define-tag-pair <h2> </h2> "h2")
122     (define-tag-pair <h3> </h3> "h3")
123     (define-tag-pair <p> </p> "p")
124     (define-tag-pair <ul> </ul> "ul")
125     (define-tag-pair <li> </li> "li")
126    
127     (defmacro define-list (name empty-name nonempty-name item-name)
128     `(progn
129     (defclass ,name (html-nonterminal) ())
130     (defclass ,empty-name (,name) ())
131    
132     (defclass ,nonempty-name (,name)
133     ((items :initarg :items)
134     (item :initarg :item)))
135    
136     (add-rule (grammar-rule (,name -> ()
137     (make-instance ',empty-name)))
138     *html-grammar*)
139    
140     (add-rule (grammar-rule (,name -> (,name ,item-name)
141     (make-instance ',nonempty-name
142     :items ,name :item ,item-name)))
143     *html-grammar*)
144    
145     (defmethod display-parse-tree ((entity ,empty-name) (syntax html-syntax) pane)
146     (declare (ignore pane))
147     nil)
148    
149     (defmethod display-parse-tree ((entity ,nonempty-name) (syntax html-syntax) pane)
150     (with-slots (items item) entity
151     (display-parse-tree items syntax pane)
152     (display-parse-tree item syntax pane)))))
153    
154     ;;;;;;;;;;;;;;; title-item, title-items
155    
156 rstrandh 1.16 (defclass title-item (html-nonterminal)
157     ((item :initarg :item)))
158    
159     (add-rule (grammar-rule (title-item -> (word) :item word)) *html-grammar*)
160     (add-rule (grammar-rule (title-item -> (delimiter) :item delimiter)) *html-grammar*)
161    
162     (defmethod display-parse-tree ((entity title-item) (syntax html-syntax) pane)
163     (with-slots (item) entity
164     (display-parse-tree item syntax pane)))
165    
166 rstrandh 1.17 (define-list title-items empty-title-items nonempty-title-items title-item)
167 rstrandh 1.16
168     ;;;;;;;;;;;;;;; title
169    
170     (defclass title (html-nonterminal)
171     ((<title> :initarg :<title>)
172     (items :initarg :items)
173     (</title> :initarg :</title>)))
174    
175     (add-rule (grammar-rule (title -> (<title> title-items </title>)
176     :<title> <title> :items title-items :</title> </title>))
177     *html-grammar*)
178    
179     (defmethod display-parse-tree ((entity title) (syntax html-syntax) pane)
180     (with-slots (<title> items </title>) entity
181     (display-parse-tree <title> syntax pane)
182     (with-text-face (pane :bold)
183     (display-parse-tree items syntax pane))
184     (display-parse-tree </title> syntax pane)))
185    
186 rstrandh 1.17 ;;;;;;;;;;;;;;; body-item body-items
187 rstrandh 1.16
188     (defclass body-item (html-nonterminal)
189     ((item :initarg :item)))
190    
191     (add-rule (grammar-rule (body-item -> (word) :item word)) *html-grammar*)
192     (add-rule (grammar-rule (body-item -> (delimiter) :item delimiter)) *html-grammar*)
193     (add-rule (grammar-rule (body-item -> (a) :item a)) *html-grammar*)
194    
195     (defmethod display-parse-tree ((entity body-item) (syntax html-syntax) pane)
196     (with-slots (item) entity
197     (display-parse-tree item syntax pane)))
198    
199 rstrandh 1.17 (define-list body-items empty-body-items nonempty-body-items body-item)
200 rstrandh 1.16
201     ;;;;;;;;;;;;;;; body
202    
203     (defclass body (html-nonterminal)
204     ((<body> :initarg :<body>)
205     (items :initarg :items)
206     (</body> :initarg :</body>)))
207    
208     (add-rule (grammar-rule (body -> (<body> body-items </body>)
209     :<body> <body> :items body-items :</body> </body>))
210     *html-grammar*)
211    
212     (defmethod display-parse-tree ((entity body) (syntax html-syntax) pane)
213     (with-slots (<body> items </body>) entity
214     (display-parse-tree <body> syntax pane)
215     (display-parse-tree items syntax pane)
216     (display-parse-tree </body> syntax pane)))
217    
218 rstrandh 1.18 ;;;;;;;;;;;;;;; <a>-tag
219    
220     (defclass a-tag-item (html-nonterminal)
221     ((item :initarg :item)))
222    
223     (add-rule (grammar-rule (a-tag-item -> (word) :item word)) *html-grammar*)
224     (add-rule (grammar-rule (a-tag-item -> (delimiter) :item delimiter)) *html-grammar*)
225    
226     (defmethod display-parse-tree ((entity a-tag-item) (syntax html-syntax) pane)
227     (with-slots (item) entity
228     (display-parse-tree item syntax pane)))
229    
230     (define-list a-tag-items empty-a-tag-items nonempty-a-tag-items a-tag-item)
231    
232     (defclass <a> (html-tag)
233     ((start :initarg :start)
234     (name :initarg :name)
235     (items :initarg :items)
236     (end :initarg :end)))
237    
238     (add-rule (grammar-rule (<a> -> (tag-start
239     (word (and (= (end-offset tag-start) (start-offset word))
240     (word-is word "a")))
241     a-tag-items
242     tag-end)
243     :start tag-start :name word :items a-tag-items :end tag-end))
244     *html-grammar*)
245    
246     (defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane)
247     (with-slots (start name items end) entity
248     (display-parse-tree start syntax pane)
249     (display-parse-tree name syntax pane)
250     (display-parse-tree items syntax pane)
251     (display-parse-tree end syntax pane)))
252    
253     (define-end-tag </a> "a")
254    
255     (defclass a (html-nonterminal)
256     ((<a> :initarg :<a>)
257     (items :initarg :items)
258     (</a> :initarg :</a>)))
259    
260     (add-rule (grammar-rule (a -> (<a> body-items </a>)
261     :<a> <a> :items body-items :</a> </a>))
262     *html-grammar*)
263    
264     (defmethod display-parse-tree ((entity a) (syntax html-syntax) pane)
265     (with-slots (<a> items </a>) entity
266     (display-parse-tree <a> syntax pane)
267     (display-parse-tree items syntax pane)
268     (display-parse-tree </a> syntax pane)))
269    
270     ;;;;;;;;;;;;;;; head
271    
272     (defclass head (html-nonterminal)
273     ((<head> :initarg :<head>)
274     (title :initarg :title)
275     (</head> :initarg :</head>)))
276    
277 rstrandh 1.20 (add-rule (grammar-rule (head -> (<head> title </head>)
278     :<head> <head> :title title :</head> </head>))
279     *html-grammar*)
280    
281 rstrandh 1.18 (defmethod display-parse-tree ((entity head) (syntax html-syntax) pane)
282     (with-slots (<head> title </head>) entity
283     (display-parse-tree <head> syntax pane)
284     (display-parse-tree title syntax pane)
285     (display-parse-tree </head> syntax pane)))
286    
287     ;;;;;;;;;;;;;;; html
288    
289     (defclass html (html-nonterminal)
290     ((<html> :initarg :<html>)
291     (head :initarg :head)
292     (body :initarg :body)
293     (</html> :initarg :</html>)))
294 rstrandh 1.20
295     (add-rule (grammar-rule (html -> (<html> head body </html>)
296     :<html> <html> :head head :body body :</html> </html>))
297     *html-grammar*)
298 rstrandh 1.18
299     (defmethod display-parse-tree ((entity html) (syntax html-syntax) pane)
300     (with-slots (<html> head body </html>) entity
301     (display-parse-tree <html> syntax pane)
302     (display-parse-tree head syntax pane)
303     (display-parse-tree body syntax pane)
304     (display-parse-tree </html> syntax pane)))
305    
306     ;;;;;;;;;;;;;;;
307    
308 rstrandh 1.1 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
309     (declare (ignore args))
310 rstrandh 1.13 (with-slots (parser lexer buffer) syntax
311 rstrandh 1.1 (setf parser (make-instance 'parser
312     :grammar *html-grammar*
313     :target 'html))
314 rstrandh 1.13 (setf lexer (make-instance 'html-lexer :buffer (buffer syntax)))
315 rstrandh 1.19 (let ((m (clone-mark (low-mark buffer) :left))
316     (lexeme (make-instance 'start-lexeme :state (initial-state parser))))
317 abakic 1.12 (setf (offset m) 0)
318 rstrandh 1.19 (setf (start-offset lexeme) m
319     (end-offset lexeme) 0)
320     (insert-lexeme lexer 0 lexeme))))
321 rstrandh 1.4
322 rstrandh 1.6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
323     ;;;
324     ;;; update syntax
325    
326 rstrandh 1.13
327 rstrandh 1.4 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
328 rstrandh 1.13 (with-slots (parser lexer valid-parse) syntax
329     (loop until (= valid-parse (nb-lexemes lexer))
330     while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
331     do (let ((current-token (lexeme lexer (1- valid-parse)))
332     (next-lexeme (lexeme lexer valid-parse)))
333 rstrandh 1.11 (setf (slot-value next-lexeme 'state)
334     (advance-parse parser (list next-lexeme) (slot-value current-token 'state))))
335 rstrandh 1.4 (incf valid-parse))))
336 rstrandh 1.1
337 rstrandh 1.13 (defmethod inter-lexeme-object-p ((lexer html-lexer) object)
338 rstrandh 1.11 (whitespacep object))
339    
340 rstrandh 1.1 (defmethod update-syntax (buffer (syntax html-syntax))
341 rstrandh 1.13 (with-slots (lexer valid-parse) syntax
342 rstrandh 1.11 (let* ((low-mark (low-mark buffer))
343 rstrandh 1.17 (high-mark (high-mark buffer)))
344     (when (mark<= low-mark high-mark)
345     (let ((first-invalid-position (delete-invalid-lexemes lexer low-mark high-mark)))
346     (setf valid-parse first-invalid-position)
347     (update-lex lexer first-invalid-position high-mark))))))
348 rstrandh 1.6
349     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
350     ;;;
351     ;;; display
352    
353 rstrandh 1.9 (defvar *white-space-start* nil)
354    
355     (defvar *cursor-positions* nil)
356     (defvar *current-line* 0)
357    
358 rstrandh 1.7 (defun handle-whitespace (pane buffer start end)
359     (let ((space-width (space-width pane))
360     (tab-width (tab-width pane)))
361     (loop while (< start end)
362     do (ecase (buffer-object buffer start)
363 rstrandh 1.9 (#\Newline (terpri pane)
364     (setf (aref *cursor-positions* (incf *current-line*))
365     (multiple-value-bind (x y) (stream-cursor-position pane)
366 rstrandh 1.10 (declare (ignore x))
367 rstrandh 1.9 y)))
368 rstrandh 1.7 (#\Space (stream-increment-cursor-position
369     pane space-width 0))
370     (#\Tab (let ((x (stream-cursor-position pane)))
371     (stream-increment-cursor-position
372     pane (- tab-width (mod x tab-width)) 0))))
373     (incf start))))
374 rstrandh 1.6
375 rstrandh 1.17 (defmethod display-parse-tree :around ((entity html-parse-tree) syntax pane)
376 rstrandh 1.8 (with-slots (top bot) pane
377 rstrandh 1.17 (when (and (end-offset entity) (mark> (end-offset entity) top))
378 rstrandh 1.8 (call-next-method))))
379    
380     (defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
381 rstrandh 1.9 (flet ((cache-test (t1 t2)
382     (and (eq t1 t2)
383     (eq (slot-value t1 'ink)
384     (medium-ink (sheet-medium pane)))
385     (eq (slot-value t1 'face)
386     (text-style-face (medium-text-style (sheet-medium pane)))))))
387     (updating-output (pane :unique-id entity
388     :id-test #'eq
389     :cache-value entity
390     :cache-test #'cache-test)
391     (with-slots (ink face) entity
392     (setf ink (medium-ink (sheet-medium pane))
393     face (text-style-face (medium-text-style (sheet-medium pane))))
394 rstrandh 1.19 (present (coerce (buffer-sequence (buffer syntax)
395     (start-offset entity)
396     (end-offset entity))
397 rstrandh 1.9 'string)
398     'string
399     :stream pane)))))
400    
401     (defmethod display-parse-tree :around ((entity html-tag) (syntax html-syntax) pane)
402     (with-drawing-options (pane :ink +green+)
403     (call-next-method)))
404    
405     (defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
406     (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
407     (setf *white-space-start* (end-offset entity)))
408 rstrandh 1.10
409 rstrandh 1.8 (defgeneric display-parse-stack (symbol stack syntax pane))
410    
411     (defmethod display-parse-stack (symbol stack (syntax html-syntax) pane)
412     (let ((next (parse-stack-next stack)))
413     (unless (null next)
414     (display-parse-stack (parse-stack-symbol next) next syntax pane))
415     (loop for parse-tree in (reverse (parse-stack-parse-trees stack))
416     do (display-parse-tree parse-tree syntax pane))))
417    
418     (defun display-parse-state (state syntax pane)
419     (let ((top (parse-stack-top state)))
420     (if (not (null top))
421     (display-parse-stack (parse-stack-symbol top) top syntax pane)
422     (display-parse-tree (target-parse-tree state) syntax pane))))
423    
424 rstrandh 1.7 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
425     (with-slots (top bot) pane
426 rstrandh 1.9 (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
427     *current-line* 0
428     (aref *cursor-positions* 0) (stream-cursor-position pane))
429 rstrandh 1.13 (with-slots (lexer) syntax
430     (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-lexemes lexer)))
431 rstrandh 1.7 1.0)))
432     ;; find the last token before bot
433     (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
434     ;; go back to a token before bot
435 rstrandh 1.13 (loop until (mark<= (end-offset (lexeme lexer (1- end-token-index))) bot)
436 rstrandh 1.7 do (decf end-token-index))
437 rstrandh 1.8 ;; go forward to the last token before bot
438 rstrandh 1.13 (loop until (or (= end-token-index (nb-lexemes lexer))
439     (mark> (start-offset (lexeme lexer end-token-index)) bot))
440 rstrandh 1.7 do (incf end-token-index))
441     (let ((start-token-index end-token-index))
442 rstrandh 1.8 ;; go back to the first token after top, or until the previous token
443     ;; contains a valid parser state
444 rstrandh 1.13 (loop until (or (mark<= (end-offset (lexeme lexer (1- start-token-index))) top)
445 rstrandh 1.9 (not (parse-state-empty-p
446 rstrandh 1.13 (slot-value (lexeme lexer (1- start-token-index)) 'state))))
447 rstrandh 1.7 do (decf start-token-index))
448 rstrandh 1.9 (let ((*white-space-start* (offset top)))
449     ;; display the parse tree if any
450 rstrandh 1.13 (unless (parse-state-empty-p (slot-value (lexeme lexer (1- start-token-index)) 'state))
451     (display-parse-state (slot-value (lexeme lexer (1- start-token-index)) 'state)
452 rstrandh 1.9 syntax
453     pane))
454 rstrandh 1.11 ;; display the lexemes
455 rstrandh 1.9 (with-drawing-options (pane :ink +red+)
456     (loop while (< start-token-index end-token-index)
457 rstrandh 1.13 do (let ((token (lexeme lexer start-token-index)))
458 rstrandh 1.9 (display-parse-tree token syntax pane))
459     (incf start-token-index))))))))
460     (let* ((cursor-line (number-of-lines-in-region top (point pane)))
461     (height (text-style-height (medium-text-style pane) pane))
462     (cursor-y (+ (* cursor-line (+ height (stream-vertical-spacing pane)))))
463     (cursor-column (column-number (point pane)))
464     (cursor-x (* cursor-column (text-style-width (medium-text-style pane) pane))))
465     (updating-output (pane :unique-id -1)
466     (draw-rectangle* pane
467     (1- cursor-x) (- cursor-y (* 0.2 height))
468     (+ cursor-x 2) (+ cursor-y (* 0.8 height))
469     :ink (if current-p +red+ +blue+))))))
470 rstrandh 1.7

  ViewVC Help
Powered by ViewVC 1.1.5