/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations)
Fri Mar 11 07:03:31 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.8: +120 -76 lines
HTML syntax now does syntax highlighting.

The current code is a mess, because I haven't figured out how much of
html-syntax.lisp can be factored out and put in syntax.lisp for use
with other syntax modules.

Also, the HTML syntax module is nowhere near complete.  It exists
merely as an illustration of what can be done with incremental
parsing.  It is definitely premature to try to turn it into something
more complete and truly useful for editing HTML.
1 rstrandh 1.1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2    
3     ;;; (c) copyright 2005 by
4     ;;; Robert Strandh (strandh@labri.fr)
5    
6     ;;; This library is free software; you can redistribute it and/or
7     ;;; modify it under the terms of the GNU Library General Public
8     ;;; License as published by the Free Software Foundation; either
9     ;;; version 2 of the License, or (at your option) any later version.
10     ;;;
11     ;;; This library is distributed in the hope that it will be useful,
12     ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13     ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14     ;;; Library General Public License for more details.
15     ;;;
16     ;;; You should have received a copy of the GNU Library General Public
17     ;;; License along with this library; if not, write to the
18     ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19     ;;; Boston, MA 02111-1307 USA.
20    
21     ;;; Syntax for analysing HTML
22    
23 rstrandh 1.6 (in-package :climacs-html-syntax)
24 rstrandh 1.1
25     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26     ;;;
27 rstrandh 1.9 ;;; this should really go in syntax.lisp
28    
29     (defclass parse-tree ()
30     ((start-mark :initarg :start-mark :reader start-mark)
31     (size :initarg :size)))
32    
33     (defgeneric start-offset (parse-tree))
34    
35     (defmethod start-offset ((tree parse-tree))
36     (offset (start-mark tree)))
37    
38     (defgeneric end-offset (parse-tree))
39    
40     (defmethod end-offset ((tree parse-tree))
41     (with-slots (start-mark size) tree
42     (+ (offset start-mark) size)))
43    
44     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
45     ;;;
46 rstrandh 1.1 ;;; grammar classes
47    
48 rstrandh 1.9 (defclass html-sym (parse-tree)
49 rstrandh 1.1 ((badness :initform 0 :initarg :badness :reader badness)
50     (message :initform "" :initarg :message :reader message)))
51    
52     (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
53     (and (eq (class-of t1) (class-of t2))
54     (< (badness t1) (badness t2))))
55    
56 rstrandh 1.9 (defclass html-nonterminal (html-sym) ())
57 rstrandh 1.6
58     (defclass words (html-nonterminal) ())
59 rstrandh 1.4
60     (defclass empty-words (words) ())
61    
62     (defclass nonempty-words (words)
63     ((words :initarg :words)
64     (word :initarg :word)))
65    
66 rstrandh 1.6 (defclass html-balanced (html-nonterminal)
67 rstrandh 1.4 ((start :initarg :start)
68     (end :initarg :end)))
69    
70     (defclass html (html-balanced)
71     ((head :initarg :head)
72     (body :initarg :body)))
73    
74     (defclass head (html-balanced)
75     ((title :initarg :title)))
76    
77     (defclass html-words (html-balanced)
78     ((words :initarg :words)))
79    
80     (defclass title (html-words) ())
81     (defclass body (html-words) ())
82     (defclass h1 (html-words) ())
83     (defclass h2 (html-words) ())
84     (defclass h3 (html-words) ())
85     (defclass para (html-words) ())
86    
87     (defclass html-token (html-sym)
88 rstrandh 1.9 ((ink) (face)))
89 rstrandh 1.4
90 rstrandh 1.9 (defclass html-tag (html-token) ())
91 rstrandh 1.4
92 rstrandh 1.9 (defclass <html> (html-tag) () (:default-initargs :size 6))
93     (defclass </html> (html-tag) ()(:default-initargs :size 7))
94     (defclass <head> (html-tag) () (:default-initargs :size 6))
95     (defclass </head> (html-tag) () (:default-initargs :size 7))
96     (defclass <title> (html-tag) () (:default-initargs :size 7))
97     (defclass </title> (html-tag) () (:default-initargs :size 8))
98     (defclass <body> (html-tag) () (:default-initargs :size 6))
99     (defclass </body> (html-tag) () (:default-initargs :size 7))
100     (defclass <h1> (html-tag) () (:default-initargs :size 4))
101     (defclass </h1> (html-tag) () (:default-initargs :size 5))
102     (defclass <h2> (html-tag) () (:default-initargs :size 4))
103     (defclass </h2> (html-tag) () (:default-initargs :size 5))
104     (defclass <h3> (html-tag) () (:default-initargs :size 4))
105     (defclass </h3> (html-tag) () (:default-initargs :size 5))
106     (defclass <p> (html-tag) () (:default-initargs :size 3))
107     (defclass </p> (html-tag) () (:default-initargs :size 4))
108     (defclass <ul> (html-tag) () (:default-initargs :size 4))
109     (defclass </ul> (html-tag) () (:default-initargs :size 5))
110     (defclass <li> (html-tag) () (:default-initargs :size 4))
111     (defclass </li> (html-tag) () (:default-initargs :size 5))
112 rstrandh 1.1
113     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
114     ;;;
115     ;;; lexer
116    
117 rstrandh 1.4 (defclass html-element (html-token)
118     ((state :initarg :state)))
119    
120     (defclass start-element (html-element) ())
121     (defclass tag-start (html-element) ())
122     (defclass tag-end (html-element) ())
123     (defclass slash (html-element) ())
124     (defclass word (html-element) ())
125     (defclass delimiter (html-element) ())
126    
127     (defun next-token (scan)
128     (let ((start-mark (clone-mark scan)))
129     (flet ((fo () (forward-object scan)))
130     (macrolet ((make-entry (type)
131     `(return-from next-token
132     (make-instance ,type :start-mark start-mark
133     :size (- (offset scan) (offset start-mark))))))
134     (loop with object = (object-after scan)
135     until (end-of-buffer-p scan)
136     do (case object
137     (#\< (fo) (make-entry 'tag-start))
138     (#\> (fo) (make-entry 'tag-end))
139     (#\/ (fo) (make-entry 'slash))
140     (t (cond ((alphanumericp object)
141     (loop until (end-of-buffer-p scan)
142     while (alphanumericp (object-after scan))
143     do (fo))
144     (make-entry 'word))
145     (t
146     (fo) (make-entry 'delimiter))))))))))
147    
148     (define-syntax html-syntax ("HTML" (basic-syntax))
149     ((tokens :initform (make-instance 'standard-flexichain))
150     (guess-pos :initform 1)
151     (valid-parse :initform 1)
152     (parser)))
153 rstrandh 1.1
154     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
155     ;;;
156     ;;; parser
157    
158 rstrandh 1.4 (defun word-is (word string)
159     (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
160     string))
161    
162 rstrandh 1.1 (defparameter *html-grammar*
163     (grammar
164 rstrandh 1.4 (<html> -> (tag-start
165     (word (and (= (end-offset tag-start) (start-offset word))
166     (word-is word "html")))
167     (tag-end (= (end-offset word) (start-offset tag-end))))
168     :start-mark (start-mark tag-start))
169     (</html> -> (tag-start
170     (slash (= (end-offset tag-start) (start-offset slash)))
171     (word (and (= (end-offset slash) (start-offset word))
172     (word-is word "html")))
173     (tag-end (= (end-offset word) (start-offset tag-end))))
174     :start-mark (start-mark tag-start))
175     (<head> -> (tag-start
176     (word (and (= (end-offset tag-start) (start-offset word))
177     (word-is word "head")))
178     (tag-end (= (end-offset word) (start-offset tag-end))))
179     :start-mark (start-mark tag-start))
180     (</head> -> (tag-start
181     (slash (= (end-offset tag-start) (start-offset slash)))
182     (word (and (= (end-offset slash) (start-offset word))
183     (word-is word "head")))
184     (tag-end (= (end-offset word) (start-offset tag-end))))
185     :start-mark (start-mark tag-start))
186     (<title> -> (tag-start
187     (word (and (= (end-offset tag-start) (start-offset word))
188     (word-is word "title")))
189     (tag-end (= (end-offset word) (start-offset tag-end))))
190     :start-mark (start-mark tag-start))
191     (</title> -> (tag-start
192     (slash (= (end-offset tag-start) (start-offset slash)))
193     (word (and (= (end-offset slash) (start-offset word))
194     (word-is word "title")))
195     (tag-end (= (end-offset word) (start-offset tag-end))))
196     :start-mark (start-mark tag-start))
197     (<body> -> (tag-start
198     (word (and (= (end-offset tag-start) (start-offset word))
199     (word-is word "body")))
200     (tag-end (= (end-offset word) (start-offset tag-end))))
201     :start-mark (start-mark tag-start))
202     (</body> -> (tag-start
203     (slash (= (end-offset tag-start) (start-offset slash)))
204     (word (and (= (end-offset slash) (start-offset word))
205     (word-is word "body")))
206     (tag-end (= (end-offset word) (start-offset tag-end))))
207     :start-mark (start-mark tag-start))
208     (html -> (<html> head body </html>)
209 rstrandh 1.9 :start-mark (start-mark <html>)
210     :size (- (end-offset </html>) (start-offset <html>))
211 rstrandh 1.4 :start <html> :head head :body body :end </html>)
212     (head -> (<head> title </head>)
213 rstrandh 1.9 :start-mark (start-mark <head>)
214     :size (- (end-offset </head>) (start-offset <head>))
215 rstrandh 1.4 :start <head> :title title :end </head>)
216     (title -> (<title> words </title>)
217 rstrandh 1.9 :start-mark (start-mark <title>)
218     :size (- (end-offset </title>) (start-offset <title>))
219 rstrandh 1.4 :start <title> :words words :end </title>)
220     (body -> (<body> words </body>)
221 rstrandh 1.9 :start-mark (start-mark <body>)
222     :size (- (end-offset </body>) (start-offset <body>))
223 rstrandh 1.4 :start <body> :words words :end </body>)
224     (words -> ()
225 rstrandh 1.9 (make-instance 'empty-words :start-mark nil))
226 rstrandh 1.4 (words -> (words word)
227 rstrandh 1.6 (make-instance 'nonempty-words
228 rstrandh 1.9 :start-mark (or (start-mark words) (start-mark word))
229     :size (- (end-offset word) (offset (or (start-mark words) (start-mark word))))
230 rstrandh 1.6 :words words :word word))))
231 rstrandh 1.1
232     (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
233     (declare (ignore args))
234 rstrandh 1.4 (with-slots (parser tokens buffer) syntax
235 rstrandh 1.1 (setf parser (make-instance 'parser
236     :grammar *html-grammar*
237     :target 'html))
238 rstrandh 1.4 (insert* tokens 0 (make-instance 'start-element
239     :start-mark (make-instance 'standard-left-sticky-mark
240     :buffer buffer
241     :offset 0)
242     :size 0
243     :state (initial-state parser)))))
244    
245 rstrandh 1.6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
246     ;;;
247     ;;; update syntax
248    
249 rstrandh 1.4 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
250     (with-slots (parser tokens valid-parse) syntax
251     (loop until (= valid-parse (nb-elements tokens))
252 rstrandh 1.5 while (mark<= (end-offset (element* tokens valid-parse)) bot)
253 rstrandh 1.4 do (let ((current-token (element* tokens (1- valid-parse)))
254     (next-token (element* tokens valid-parse)))
255     (setf (slot-value next-token 'state)
256     (advance-parse parser (list next-token) (slot-value current-token 'state))))
257     (incf valid-parse))))
258 rstrandh 1.1
259     (defmethod update-syntax (buffer (syntax html-syntax))
260 rstrandh 1.4 (let ((low-mark (low-mark buffer))
261     (high-mark (high-mark buffer))
262     (scan))
263     (with-slots (tokens guess-pos valid-parse) syntax
264     (when (mark<= low-mark high-mark)
265     ;; go back to a position before low-mark
266     (loop until (or (= guess-pos 1)
267     (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
268     do (decf guess-pos))
269     ;; go forward to the last position before low-mark
270     (loop with nb-elements = (nb-elements tokens)
271     until (or (= guess-pos nb-elements)
272     (mark>= (end-offset (element* tokens guess-pos)) low-mark))
273     do (incf guess-pos))
274     ;; mark valid parse
275     (setf valid-parse guess-pos)
276     ;; delete entries that must be reparsed
277     (loop until (or (= guess-pos (nb-elements tokens))
278     (mark> (start-mark (element* tokens guess-pos)) high-mark))
279     do (delete* tokens guess-pos))
280     (setf scan (make-instance 'standard-left-sticky-mark
281     :buffer buffer
282     :offset (if (zerop guess-pos)
283     0
284     (end-offset (element* tokens (1- guess-pos))))))
285     ;; scan
286     (loop with start-mark = nil
287     do (loop until (end-of-buffer-p scan)
288     while (whitespacep (object-after scan))
289     do (forward-object scan))
290     until (if (end-of-buffer-p high-mark)
291     (end-of-buffer-p scan)
292     (mark> scan high-mark))
293     do (setf start-mark (clone-mark scan))
294     (insert* tokens guess-pos (next-token scan))
295     (incf guess-pos))))))
296 rstrandh 1.6
297     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
298     ;;;
299     ;;; display
300    
301 rstrandh 1.9 (defvar *white-space-start* nil)
302    
303     (defvar *cursor-positions* nil)
304     (defvar *current-line* 0)
305    
306 rstrandh 1.7 (defun handle-whitespace (pane buffer start end)
307     (let ((space-width (space-width pane))
308     (tab-width (tab-width pane)))
309     (loop while (< start end)
310     do (ecase (buffer-object buffer start)
311 rstrandh 1.9 (#\Newline (terpri pane)
312     (setf (aref *cursor-positions* (incf *current-line*))
313     (multiple-value-bind (x y) (stream-cursor-position pane)
314     y)))
315 rstrandh 1.7 (#\Space (stream-increment-cursor-position
316     pane space-width 0))
317     (#\Tab (let ((x (stream-cursor-position pane)))
318     (stream-increment-cursor-position
319     pane (- tab-width (mod x tab-width)) 0))))
320     (incf start))))
321 rstrandh 1.6
322 rstrandh 1.8 (defmethod display-parse-tree :around ((entity html-sym) syntax pane)
323     (with-slots (top bot) pane
324     (when (mark> (end-offset entity) top)
325     (call-next-method))))
326    
327     (defmethod display-parse-tree :around ((entity empty-words) syntax pane)
328     (declare (ignore syntax pane))
329     nil)
330    
331     (defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
332 rstrandh 1.9 (flet ((cache-test (t1 t2)
333     (and (eq t1 t2)
334     (eq (slot-value t1 'ink)
335     (medium-ink (sheet-medium pane)))
336     (eq (slot-value t1 'face)
337     (text-style-face (medium-text-style (sheet-medium pane)))))))
338     (updating-output (pane :unique-id entity
339     :id-test #'eq
340     :cache-value entity
341     :cache-test #'cache-test)
342     (with-slots (ink face) entity
343     (setf ink (medium-ink (sheet-medium pane))
344     face (text-style-face (medium-text-style (sheet-medium pane))))
345     (present (coerce (region-to-sequence (start-mark entity)
346     (end-offset entity))
347     'string)
348     'string
349     :stream pane)))))
350    
351     (defmethod display-parse-tree :around ((entity html-tag) (syntax html-syntax) pane)
352     (with-drawing-options (pane :ink +green+)
353     (call-next-method)))
354    
355     (defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
356     (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
357     (setf *white-space-start* (end-offset entity)))
358 rstrandh 1.8
359     (defmethod display-parse-tree :before ((entity html-balanced) (syntax html-syntax) pane)
360     (with-slots (start) entity
361     (display-parse-tree start syntax pane)))
362    
363     (defmethod display-parse-tree :after ((entity html-balanced) (syntax html-syntax) pane)
364     (with-slots (end) entity
365     (display-parse-tree end syntax pane)))
366    
367 rstrandh 1.9 (defmethod display-parse-tree :around ((entity title) (syntax html-syntax) pane)
368     (with-text-face (pane :bold)
369     (call-next-method)))
370    
371 rstrandh 1.8 (defmethod display-parse-tree ((entity html-words) (syntax html-syntax) pane)
372     (with-slots (words) entity
373     (display-parse-tree words syntax pane)))
374    
375     (defmethod display-parse-tree ((entity empty-words) (syntax html-syntax) pane)
376     (declare (ignore pane))
377     nil)
378    
379     (defmethod display-parse-tree ((entity nonempty-words) (syntax html-syntax) pane)
380     (with-slots (words word) entity
381     (display-parse-tree words syntax pane)
382     (display-parse-tree word syntax pane)))
383    
384     (defmethod display-parse-tree ((entity html) (syntax html-syntax) pane)
385     (with-slots (head body) entity
386     (display-parse-tree head syntax pane)
387     (display-parse-tree body syntax pane)))
388    
389     (defmethod display-parse-tree ((entity head) (syntax html-syntax) pane)
390     (with-slots (title) entity
391     (display-parse-tree title syntax pane)))
392    
393     (defgeneric display-parse-stack (symbol stack syntax pane))
394    
395     (defmethod display-parse-stack (symbol stack (syntax html-syntax) pane)
396     (let ((next (parse-stack-next stack)))
397     (unless (null next)
398     (display-parse-stack (parse-stack-symbol next) next syntax pane))
399     (loop for parse-tree in (reverse (parse-stack-parse-trees stack))
400     do (display-parse-tree parse-tree syntax pane))))
401    
402     (defun display-parse-state (state syntax pane)
403     (let ((top (parse-stack-top state)))
404     (if (not (null top))
405     (display-parse-stack (parse-stack-symbol top) top syntax pane)
406     (display-parse-tree (target-parse-tree state) syntax pane))))
407    
408 rstrandh 1.7 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
409     (with-slots (top bot) pane
410 rstrandh 1.9 (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
411     *current-line* 0
412     (aref *cursor-positions* 0) (stream-cursor-position pane))
413 rstrandh 1.7 (with-slots (tokens) syntax
414     (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements tokens)))
415     1.0)))
416     ;; find the last token before bot
417     (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
418     ;; go back to a token before bot
419     (loop until (mark<= (end-offset (element* tokens (1- end-token-index))) bot)
420     do (decf end-token-index))
421 rstrandh 1.8 ;; go forward to the last token before bot
422 rstrandh 1.7 (loop until (or (= end-token-index (nb-elements tokens))
423     (mark> (start-offset (element* tokens end-token-index)) bot))
424     do (incf end-token-index))
425     (let ((start-token-index end-token-index))
426 rstrandh 1.8 ;; go back to the first token after top, or until the previous token
427     ;; contains a valid parser state
428     (loop until (or (mark<= (end-offset (element* tokens (1- start-token-index))) top)
429 rstrandh 1.9 (not (parse-state-empty-p
430     (slot-value (element* tokens (1- start-token-index)) 'state))))
431 rstrandh 1.7 do (decf start-token-index))
432 rstrandh 1.9 (let ((*white-space-start* (offset top)))
433     ;; display the parse tree if any
434     (unless (parse-state-empty-p (slot-value (element* tokens (1- start-token-index)) 'state))
435     (display-parse-state (slot-value (element* tokens (1- start-token-index)) 'state)
436     syntax
437     pane))
438     ;; display the tokens
439     (with-drawing-options (pane :ink +red+)
440     (loop while (< start-token-index end-token-index)
441     do (let ((token (element* tokens start-token-index)))
442     (display-parse-tree token syntax pane))
443     (incf start-token-index))))))))
444     (let* ((cursor-line (number-of-lines-in-region top (point pane)))
445     (height (text-style-height (medium-text-style pane) pane))
446     (cursor-y (+ (* cursor-line (+ height (stream-vertical-spacing pane)))))
447     (cursor-column (column-number (point pane)))
448     (cursor-x (* cursor-column (text-style-width (medium-text-style pane) pane))))
449     (updating-output (pane :unique-id -1)
450     (draw-rectangle* pane
451     (1- cursor-x) (- cursor-y (* 0.2 height))
452     (+ cursor-x 2) (+ cursor-y (* 0.8 height))
453     :ink (if current-p +red+ +blue+))))))
454 rstrandh 1.7
455    

  ViewVC Help
Powered by ViewVC 1.1.5