/[climacs]/climacs/html-syntax.lisp
ViewVC logotype

Contents of /climacs/html-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Sat Mar 5 07:03:53 2005 UTC (9 years, 1 month ago) by rstrandh
Branch: MAIN
Changes since 1.5: +26 -5 lines
Split off the climacs-html-syntax package from the climacs-syntax
package.  Exported some more symbols from the climacs-syntax package.
Implemented a few more functions in the climacs-syntax package that
can be used to travarse the parse stack.

The redisplay-pane function now calls a generic function
redisplay-pane-with-syntax that also takes a syntax object as argument.
1 ;;; -*- Mode: Lisp; Package: COMMON-LISP-USER -*-
2
3 ;;; (c) copyright 2005 by
4 ;;; Robert Strandh (strandh@labri.fr)
5
6 ;;; This library is free software; you can redistribute it and/or
7 ;;; modify it under the terms of the GNU Library General Public
8 ;;; License as published by the Free Software Foundation; either
9 ;;; version 2 of the License, or (at your option) any later version.
10 ;;;
11 ;;; This library is distributed in the hope that it will be useful,
12 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;;; Library General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU Library General Public
17 ;;; License along with this library; if not, write to the
18 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 ;;; Boston, MA 02111-1307 USA.
20
21 ;;; Syntax for analysing HTML
22
23 (in-package :climacs-html-syntax)
24
25 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 ;;;
27 ;;; grammar classes
28
29 (defclass html-sym ()
30 ((badness :initform 0 :initarg :badness :reader badness)
31 (message :initform "" :initarg :message :reader message)))
32
33 (defmethod parse-tree-better ((t1 html-sym) (t2 html-sym))
34 (and (eq (class-of t1) (class-of t2))
35 (< (badness t1) (badness t2))))
36
37 (defclass html-nonterminal (html-sym)
38 ((start-offset :initarg :start-offset :reader start-offset)
39 (end-offset :initarg :end-offset :reader end-offset)))
40
41 (defclass words (html-nonterminal) ())
42
43 (defclass empty-words (words) ())
44
45 (defclass nonempty-words (words)
46 ((words :initarg :words)
47 (word :initarg :word)))
48
49 (defclass html-balanced (html-nonterminal)
50 ((start :initarg :start)
51 (end :initarg :end)))
52
53 (defclass html (html-balanced)
54 ((head :initarg :head)
55 (body :initarg :body)))
56
57 (defclass head (html-balanced)
58 ((title :initarg :title)))
59
60 (defclass html-words (html-balanced)
61 ((words :initarg :words)))
62
63 (defclass title (html-words) ())
64 (defclass body (html-words) ())
65 (defclass h1 (html-words) ())
66 (defclass h2 (html-words) ())
67 (defclass h3 (html-words) ())
68 (defclass para (html-words) ())
69
70 (defclass html-token (html-sym)
71 ((start-mark :initarg :start-mark :reader start-mark)
72 (size :initarg :size)))
73
74 (defgeneric end-offset (html-token))
75
76 (defmethod end-offset ((token html-token))
77 (with-slots (start-mark size) token
78 (+ (offset start-mark) size)))
79
80 (defgeneric start-offset (html-token))
81
82 (defmethod start-offset ((token html-token))
83 (offset (start-mark token)))
84
85 (defclass <html> (html-token) () (:default-initargs :size 6))
86 (defclass </html> (html-token) ()(:default-initargs :size 7))
87 (defclass <head> (html-token) () (:default-initargs :size 6))
88 (defclass </head> (html-token) () (:default-initargs :size 7))
89 (defclass <title> (html-token) () (:default-initargs :size 7))
90 (defclass </title> (html-token) () (:default-initargs :size 8))
91 (defclass <body> (html-token) () (:default-initargs :size 6))
92 (defclass </body> (html-token) () (:default-initargs :size 7))
93 (defclass <h1> (html-token) () (:default-initargs :size 4))
94 (defclass </h1> (html-token) () (:default-initargs :size 5))
95 (defclass <h2> (html-token) () (:default-initargs :size 4))
96 (defclass </h2> (html-token) () (:default-initargs :size 5))
97 (defclass <h3> (html-token) () (:default-initargs :size 4))
98 (defclass </h3> (html-token) () (:default-initargs :size 5))
99 (defclass <p> (html-token) () (:default-initargs :size 3))
100 (defclass </p> (html-token) () (:default-initargs :size 4))
101 (defclass <ul> (html-token) () (:default-initargs :size 4))
102 (defclass </ul> (html-token) () (:default-initargs :size 5))
103 (defclass <li> (html-token) () (:default-initargs :size 4))
104 (defclass </li> (html-token) () (:default-initargs :size 5))
105
106 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
107 ;;;
108 ;;; lexer
109
110 (defclass html-element (html-token)
111 ((state :initarg :state)))
112
113 (defclass start-element (html-element) ())
114 (defclass tag-start (html-element) ())
115 (defclass tag-end (html-element) ())
116 (defclass slash (html-element) ())
117 (defclass word (html-element) ())
118 (defclass delimiter (html-element) ())
119
120 (defun next-token (scan)
121 (let ((start-mark (clone-mark scan)))
122 (flet ((fo () (forward-object scan)))
123 (macrolet ((make-entry (type)
124 `(return-from next-token
125 (make-instance ,type :start-mark start-mark
126 :size (- (offset scan) (offset start-mark))))))
127 (loop with object = (object-after scan)
128 until (end-of-buffer-p scan)
129 do (case object
130 (#\< (fo) (make-entry 'tag-start))
131 (#\> (fo) (make-entry 'tag-end))
132 (#\/ (fo) (make-entry 'slash))
133 (t (cond ((alphanumericp object)
134 (loop until (end-of-buffer-p scan)
135 while (alphanumericp (object-after scan))
136 do (fo))
137 (make-entry 'word))
138 (t
139 (fo) (make-entry 'delimiter))))))))))
140
141 (define-syntax html-syntax ("HTML" (basic-syntax))
142 ((tokens :initform (make-instance 'standard-flexichain))
143 (guess-pos :initform 1)
144 (valid-parse :initform 1)
145 (parser)))
146
147 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
148 ;;;
149 ;;; parser
150
151 (defun word-is (word string)
152 (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
153 string))
154
155 (defparameter *html-grammar*
156 (grammar
157 (<html> -> (tag-start
158 (word (and (= (end-offset tag-start) (start-offset word))
159 (word-is word "html")))
160 (tag-end (= (end-offset word) (start-offset tag-end))))
161 :start-mark (start-mark tag-start))
162 (</html> -> (tag-start
163 (slash (= (end-offset tag-start) (start-offset slash)))
164 (word (and (= (end-offset slash) (start-offset word))
165 (word-is word "html")))
166 (tag-end (= (end-offset word) (start-offset tag-end))))
167 :start-mark (start-mark tag-start))
168 (<head> -> (tag-start
169 (word (and (= (end-offset tag-start) (start-offset word))
170 (word-is word "head")))
171 (tag-end (= (end-offset word) (start-offset tag-end))))
172 :start-mark (start-mark tag-start))
173 (</head> -> (tag-start
174 (slash (= (end-offset tag-start) (start-offset slash)))
175 (word (and (= (end-offset slash) (start-offset word))
176 (word-is word "head")))
177 (tag-end (= (end-offset word) (start-offset tag-end))))
178 :start-mark (start-mark tag-start))
179 (<title> -> (tag-start
180 (word (and (= (end-offset tag-start) (start-offset word))
181 (word-is word "title")))
182 (tag-end (= (end-offset word) (start-offset tag-end))))
183 :start-mark (start-mark tag-start))
184 (</title> -> (tag-start
185 (slash (= (end-offset tag-start) (start-offset slash)))
186 (word (and (= (end-offset slash) (start-offset word))
187 (word-is word "title")))
188 (tag-end (= (end-offset word) (start-offset tag-end))))
189 :start-mark (start-mark tag-start))
190 (<body> -> (tag-start
191 (word (and (= (end-offset tag-start) (start-offset word))
192 (word-is word "body")))
193 (tag-end (= (end-offset word) (start-offset tag-end))))
194 :start-mark (start-mark tag-start))
195 (</body> -> (tag-start
196 (slash (= (end-offset tag-start) (start-offset slash)))
197 (word (and (= (end-offset slash) (start-offset word))
198 (word-is word "body")))
199 (tag-end (= (end-offset word) (start-offset tag-end))))
200 :start-mark (start-mark tag-start))
201 (html -> (<html> head body </html>)
202 :start-offset (start-offset <html>) :end-offset (end-offset </html>)
203 :start <html> :head head :body body :end </html>)
204 (head -> (<head> title </head>)
205 :start-offset (start-offset <head>) :end-offset (end-offset </head>)
206 :start <head> :title title :end </head>)
207 (title -> (<title> words </title>)
208 :start-offset (start-offset <title>) :end-offset (end-offset </title>)
209 :start <title> :words words :end </title>)
210 (body -> (<body> words </body>)
211 :start-offset (start-offset <body>) :end-offset (end-offset </body>)
212 :start <body> :words words :end </body>)
213 (words -> ()
214 (make-instance 'empty-words :start-offset nil))
215 (words -> (words word)
216 (make-instance 'nonempty-words
217 :start-offset (or (start-offset words) (start-offset word))
218 :end-offset (end-offset word)
219 :words words :word word))))
220
221 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
222 (declare (ignore args))
223 (with-slots (parser tokens buffer) syntax
224 (setf parser (make-instance 'parser
225 :grammar *html-grammar*
226 :target 'html))
227 (insert* tokens 0 (make-instance 'start-element
228 :start-mark (make-instance 'standard-left-sticky-mark
229 :buffer buffer
230 :offset 0)
231 :size 0
232 :state (initial-state parser)))))
233
234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
235 ;;;
236 ;;; update syntax
237
238 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
239 (with-slots (parser tokens valid-parse) syntax
240 (loop until (= valid-parse (nb-elements tokens))
241 while (mark<= (end-offset (element* tokens valid-parse)) bot)
242 do (let ((current-token (element* tokens (1- valid-parse)))
243 (next-token (element* tokens valid-parse)))
244 (setf (slot-value next-token 'state)
245 (advance-parse parser (list next-token) (slot-value current-token 'state))))
246 (incf valid-parse))))
247
248 (defmethod update-syntax (buffer (syntax html-syntax))
249 (let ((low-mark (low-mark buffer))
250 (high-mark (high-mark buffer))
251 (scan))
252 (with-slots (tokens guess-pos valid-parse) syntax
253 (when (mark<= low-mark high-mark)
254 ;; go back to a position before low-mark
255 (loop until (or (= guess-pos 1)
256 (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
257 do (decf guess-pos))
258 ;; go forward to the last position before low-mark
259 (loop with nb-elements = (nb-elements tokens)
260 until (or (= guess-pos nb-elements)
261 (mark>= (end-offset (element* tokens guess-pos)) low-mark))
262 do (incf guess-pos))
263 ;; mark valid parse
264 (setf valid-parse guess-pos)
265 ;; delete entries that must be reparsed
266 (loop until (or (= guess-pos (nb-elements tokens))
267 (mark> (start-mark (element* tokens guess-pos)) high-mark))
268 do (delete* tokens guess-pos))
269 (setf scan (make-instance 'standard-left-sticky-mark
270 :buffer buffer
271 :offset (if (zerop guess-pos)
272 0
273 (end-offset (element* tokens (1- guess-pos))))))
274 ;; scan
275 (loop with start-mark = nil
276 do (loop until (end-of-buffer-p scan)
277 while (whitespacep (object-after scan))
278 do (forward-object scan))
279 until (if (end-of-buffer-p high-mark)
280 (end-of-buffer-p scan)
281 (mark> scan high-mark))
282 do (setf start-mark (clone-mark scan))
283 (insert* tokens guess-pos (next-token scan))
284 (incf guess-pos))))))
285
286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
287 ;;;
288 ;;; display
289
290
291

  ViewVC Help
Powered by ViewVC 1.1.5