/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations)
Mon Mar 7 06:51:02 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.6: +48 -1 lines
First attempt at a display function (for html syntax) that uses the
output of an incremental lexer and parser.  This code is not complete
yet:

  * right now, it uses only the lexer output, and not the parser

  * the cursor is not displayed yet

  * it is too slow, most likely because the output records are all in
    one big bag, as opposed to being tree structured, such as the
    lines of basic syntax.

The slowness is not a serious problem, because one day, McCLIM will
have tree-structured output records itself, and because most syntax
modules (including this one, very soon) will have some tree structure
itself.  It might be worthwhile to display the part of the buffer
beyond a parse error in some artificially structured way, such as by
lines as in the current basic syntax.
1 rstrandh 1.1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2    
3     ;;; (c) copyright 2005 by
4     ;;; Robert Strandh (strandh@labri.fr)
5    
6     ;;; This library is free software; you can redistribute it and/or
7     ;;; modify it under the terms of the GNU Library General Public
8     ;;; License as published by the Free Software Foundation; either
9     ;;; version 2 of the License, or (at your option) any later version.
10     ;;;
11     ;;; This library is distributed in the hope that it will be useful,
12     ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13     ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14     ;;; Library General Public License for more details.
15     ;;;
16     ;;; You should have received a copy of the GNU Library General Public
17     ;;; License along with this library; if not, write to the
18     ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19     ;;; Boston, MA 02111-1307 USA.
20    
21     ;;; Syntax for analysing HTML
22    
23 rstrandh 1.6 (in-package :climacs-html-syntax)
24 rstrandh 1.1
25     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26     ;;;
27     ;;; grammar classes
28    
29     (defclass html-sym ()
30     ((badness :initform 0 :initarg :badness :reader badness)
31     (message :initform "" :initarg :message :reader message)))
32    
33     (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
34     (and (eq (class-of t1) (class-of t2))
35     (< (badness t1) (badness t2))))
36    
37 rstrandh 1.6 (defclass html-nonterminal (html-sym)
38     ((start-offset :initarg :start-offset :reader start-offset)
39     (end-offset :initarg :end-offset :reader end-offset)))
40    
41     (defclass words (html-nonterminal) ())
42 rstrandh 1.4
43     (defclass empty-words (words) ())
44    
45     (defclass nonempty-words (words)
46     ((words :initarg :words)
47     (word :initarg :word)))
48    
49 rstrandh 1.6 (defclass html-balanced (html-nonterminal)
50 rstrandh 1.4 ((start :initarg :start)
51     (end :initarg :end)))
52    
53     (defclass html (html-balanced)
54     ((head :initarg :head)
55     (body :initarg :body)))
56    
57     (defclass head (html-balanced)
58     ((title :initarg :title)))
59    
60     (defclass html-words (html-balanced)
61     ((words :initarg :words)))
62    
63     (defclass title (html-words) ())
64     (defclass body (html-words) ())
65     (defclass h1 (html-words) ())
66     (defclass h2 (html-words) ())
67     (defclass h3 (html-words) ())
68     (defclass para (html-words) ())
69    
70     (defclass html-token (html-sym)
71     ((start-mark :initarg :start-mark :reader start-mark)
72     (size :initarg :size)))
73    
74     (defgeneric end-offset (html-token))
75    
76     (defmethod end-offset ((token html-token))
77     (with-slots (start-mark size) token
78     (+ (offset start-mark) size)))
79    
80     (defgeneric start-offset (html-token))
81    
82     (defmethod start-offset ((token html-token))
83     (offset (start-mark token)))
84    
85     (defclass <html> (html-token) () (:default-initargs :size 6))
86     (defclass </html> (html-token) ()(:default-initargs :size 7))
87     (defclass <head> (html-token) () (:default-initargs :size 6))
88     (defclass </head> (html-token) () (:default-initargs :size 7))
89     (defclass <title> (html-token) () (:default-initargs :size 7))
90     (defclass </title> (html-token) () (:default-initargs :size 8))
91     (defclass <body> (html-token) () (:default-initargs :size 6))
92     (defclass </body> (html-token) () (:default-initargs :size 7))
93     (defclass <h1> (html-token) () (:default-initargs :size 4))
94     (defclass </h1> (html-token) () (:default-initargs :size 5))
95     (defclass <h2> (html-token) () (:default-initargs :size 4))
96     (defclass </h2> (html-token) () (:default-initargs :size 5))
97     (defclass <h3> (html-token) () (:default-initargs :size 4))
98     (defclass </h3> (html-token) () (:default-initargs :size 5))
99     (defclass <p> (html-token) () (:default-initargs :size 3))
100     (defclass </p> (html-token) () (:default-initargs :size 4))
101     (defclass <ul> (html-token) () (:default-initargs :size 4))
102     (defclass </ul> (html-token) () (:default-initargs :size 5))
103     (defclass <li> (html-token) () (:default-initargs :size 4))
104     (defclass </li> (html-token) () (:default-initargs :size 5))
105 rstrandh 1.1
106     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
107     ;;;
108     ;;; lexer
109    
110 rstrandh 1.4 (defclass html-element (html-token)
111     ((state :initarg :state)))
112    
113     (defclass start-element (html-element) ())
114     (defclass tag-start (html-element) ())
115     (defclass tag-end (html-element) ())
116     (defclass slash (html-element) ())
117     (defclass word (html-element) ())
118     (defclass delimiter (html-element) ())
119    
120     (defun next-token (scan)
121     (let ((start-mark (clone-mark scan)))
122     (flet ((fo () (forward-object scan)))
123     (macrolet ((make-entry (type)
124     `(return-from next-token
125     (make-instance ,type :start-mark start-mark
126     :size (- (offset scan) (offset start-mark))))))
127     (loop with object = (object-after scan)
128     until (end-of-buffer-p scan)
129     do (case object
130     (#\< (fo) (make-entry 'tag-start))
131     (#\> (fo) (make-entry 'tag-end))
132     (#\/ (fo) (make-entry 'slash))
133     (t (cond ((alphanumericp object)
134     (loop until (end-of-buffer-p scan)
135     while (alphanumericp (object-after scan))
136     do (fo))
137     (make-entry 'word))
138     (t
139     (fo) (make-entry 'delimiter))))))))))
140    
141     (define-syntax html-syntax ("HTML" (basic-syntax))
142     ((tokens :initform (make-instance 'standard-flexichain))
143     (guess-pos :initform 1)
144     (valid-parse :initform 1)
145     (parser)))
146 rstrandh 1.1
147     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
148     ;;;
149     ;;; parser
150    
151 rstrandh 1.4 (defun word-is (word string)
152     (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
153     string))
154    
155 rstrandh 1.1 (defparameter *html-grammar*
156     (grammar
157 rstrandh 1.4 (<html> -> (tag-start
158     (word (and (= (end-offset tag-start) (start-offset word))
159     (word-is word "html")))
160     (tag-end (= (end-offset word) (start-offset tag-end))))
161     :start-mark (start-mark tag-start))
162     (</html> -> (tag-start
163     (slash (= (end-offset tag-start) (start-offset slash)))
164     (word (and (= (end-offset slash) (start-offset word))
165     (word-is word "html")))
166     (tag-end (= (end-offset word) (start-offset tag-end))))
167     :start-mark (start-mark tag-start))
168     (<head> -> (tag-start
169     (word (and (= (end-offset tag-start) (start-offset word))
170     (word-is word "head")))
171     (tag-end (= (end-offset word) (start-offset tag-end))))
172     :start-mark (start-mark tag-start))
173     (</head> -> (tag-start
174     (slash (= (end-offset tag-start) (start-offset slash)))
175     (word (and (= (end-offset slash) (start-offset word))
176     (word-is word "head")))
177     (tag-end (= (end-offset word) (start-offset tag-end))))
178     :start-mark (start-mark tag-start))
179     (<title> -> (tag-start
180     (word (and (= (end-offset tag-start) (start-offset word))
181     (word-is word "title")))
182     (tag-end (= (end-offset word) (start-offset tag-end))))
183     :start-mark (start-mark tag-start))
184     (</title> -> (tag-start
185     (slash (= (end-offset tag-start) (start-offset slash)))
186     (word (and (= (end-offset slash) (start-offset word))
187     (word-is word "title")))
188     (tag-end (= (end-offset word) (start-offset tag-end))))
189     :start-mark (start-mark tag-start))
190     (<body> -> (tag-start
191     (word (and (= (end-offset tag-start) (start-offset word))
192     (word-is word "body")))
193     (tag-end (= (end-offset word) (start-offset tag-end))))
194     :start-mark (start-mark tag-start))
195     (</body> -> (tag-start
196     (slash (= (end-offset tag-start) (start-offset slash)))
197     (word (and (= (end-offset slash) (start-offset word))
198     (word-is word "body")))
199     (tag-end (= (end-offset word) (start-offset tag-end))))
200     :start-mark (start-mark tag-start))
201     (html -> (<html> head body </html>)
202 rstrandh 1.6 :start-offset (start-offset <html>) :end-offset (end-offset </html>)
203 rstrandh 1.4 :start <html> :head head :body body :end </html>)
204     (head -> (<head> title </head>)
205 rstrandh 1.6 :start-offset (start-offset <head>) :end-offset (end-offset </head>)
206 rstrandh 1.4 :start <head> :title title :end </head>)
207     (title -> (<title> words </title>)
208 rstrandh 1.6 :start-offset (start-offset <title>) :end-offset (end-offset </title>)
209 rstrandh 1.4 :start <title> :words words :end </title>)
210     (body -> (<body> words </body>)
211 rstrandh 1.6 :start-offset (start-offset <body>) :end-offset (end-offset </body>)
212 rstrandh 1.4 :start <body> :words words :end </body>)
213     (words -> ()
214 rstrandh 1.6 (make-instance 'empty-words :start-offset nil))
215 rstrandh 1.4 (words -> (words word)
216 rstrandh 1.6 (make-instance 'nonempty-words
217     :start-offset (or (start-offset words) (start-offset word))
218     :end-offset (end-offset word)
219     :words words :word word))))
220 rstrandh 1.1
221     (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
222     (declare (ignore args))
223 rstrandh 1.4 (with-slots (parser tokens buffer) syntax
224 rstrandh 1.1 (setf parser (make-instance 'parser
225     :grammar *html-grammar*
226     :target 'html))
227 rstrandh 1.4 (insert* tokens 0 (make-instance 'start-element
228     :start-mark (make-instance 'standard-left-sticky-mark
229     :buffer buffer
230     :offset 0)
231     :size 0
232     :state (initial-state parser)))))
233    
234 rstrandh 1.6 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
235     ;;;
236     ;;; update syntax
237    
238 rstrandh 1.4 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
239     (with-slots (parser tokens valid-parse) syntax
240     (loop until (= valid-parse (nb-elements tokens))
241 rstrandh 1.5 while (mark<= (end-offset (element* tokens valid-parse)) bot)
242 rstrandh 1.4 do (let ((current-token (element* tokens (1- valid-parse)))
243     (next-token (element* tokens valid-parse)))
244     (setf (slot-value next-token 'state)
245     (advance-parse parser (list next-token) (slot-value current-token 'state))))
246     (incf valid-parse))))
247 rstrandh 1.1
248     (defmethod update-syntax (buffer (syntax html-syntax))
249 rstrandh 1.4 (let ((low-mark (low-mark buffer))
250     (high-mark (high-mark buffer))
251     (scan))
252     (with-slots (tokens guess-pos valid-parse) syntax
253     (when (mark<= low-mark high-mark)
254     ;; go back to a position before low-mark
255     (loop until (or (= guess-pos 1)
256     (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
257     do (decf guess-pos))
258     ;; go forward to the last position before low-mark
259     (loop with nb-elements = (nb-elements tokens)
260     until (or (= guess-pos nb-elements)
261     (mark>= (end-offset (element* tokens guess-pos)) low-mark))
262     do (incf guess-pos))
263     ;; mark valid parse
264     (setf valid-parse guess-pos)
265     ;; delete entries that must be reparsed
266     (loop until (or (= guess-pos (nb-elements tokens))
267     (mark> (start-mark (element* tokens guess-pos)) high-mark))
268     do (delete* tokens guess-pos))
269     (setf scan (make-instance 'standard-left-sticky-mark
270     :buffer buffer
271     :offset (if (zerop guess-pos)
272     0
273     (end-offset (element* tokens (1- guess-pos))))))
274     ;; scan
275     (loop with start-mark = nil
276     do (loop until (end-of-buffer-p scan)
277     while (whitespacep (object-after scan))
278     do (forward-object scan))
279     until (if (end-of-buffer-p high-mark)
280     (end-of-buffer-p scan)
281     (mark> scan high-mark))
282     do (setf start-mark (clone-mark scan))
283     (insert* tokens guess-pos (next-token scan))
284     (incf guess-pos))))))
285 rstrandh 1.6
286     ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
287     ;;;
288     ;;; display
289    
290 rstrandh 1.7 (defun handle-whitespace (pane buffer start end)
291     (let ((space-width (space-width pane))
292     (tab-width (tab-width pane)))
293     (loop while (< start end)
294     do (ecase (buffer-object buffer start)
295     (#\Newline (terpri pane))
296     (#\Space (stream-increment-cursor-position
297     pane space-width 0))
298     (#\Tab (let ((x (stream-cursor-position pane)))
299     (stream-increment-cursor-position
300     pane (- tab-width (mod x tab-width)) 0))))
301     (incf start))))
302 rstrandh 1.6
303 rstrandh 1.7 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax html-syntax) current-p)
304     (with-slots (top bot) pane
305     (with-slots (tokens) syntax
306     (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements tokens)))
307     1.0)))
308     ;; find the last token before bot
309     (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
310     ;; go back to a token before bot
311     (loop until (mark<= (end-offset (element* tokens (1- end-token-index))) bot)
312     do (decf end-token-index))
313     ;; for forward to the last token before bot
314     (loop until (or (= end-token-index (nb-elements tokens))
315     (mark> (start-offset (element* tokens end-token-index)) bot))
316     do (incf end-token-index))
317     (let ((start-token-index end-token-index))
318     ;; go back to the first token after top
319     (loop until (mark<= (end-offset (element* tokens (1- start-token-index))) top)
320     do (decf start-token-index))
321     ;; display the tokens
322     (loop with prev-offset = (offset top)
323     while (< start-token-index end-token-index)
324     do (let ((token (element* tokens start-token-index)))
325     (handle-whitespace pane (buffer pane) prev-offset (start-offset token))
326     (updating-output (pane :unique-id token
327     :id-test #'eq
328     :cache-value token
329     :cache-test #'eq)
330     (present (coerce (region-to-sequence (start-mark token)
331     (end-offset token))
332     'string)
333     'string
334     :stream pane))
335     (setf prev-offset (end-offset token)))
336     (incf start-token-index))))))))
337    
338    

  ViewVC Help
Powered by ViewVC 1.1.5