/[climacs]/climacs/java-syntax.lisp
ViewVC logotype

Contents of /climacs/java-syntax.lisp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations)
Wed Jan 9 13:03:28 2008 UTC (6 years, 3 months ago) by thenriksen
Branch: MAIN
CVS Tags: HEAD
Changes since 1.6: +1 -1 lines
Fixed silly typo.
1 ;; -*- Mode: Lisp; Package: CLIMACS-JAVA-SYNTAX -*-
2
3 ;;; (c) copyright 2005 by
4 ;;; Robert Strandh (strandh@labri.fr)
5 ;;; (c) copyright 2006 by
6 ;;; Troels Henriksen (athas@sigkill.dk)
7 ;;; (c) copyright 2007 by
8 ;;; John Q Splittist (splittist@gmail.com)
9 ;;;
10 ;;; This library is free software; you can redistribute it and/or
11 ;;; modify it under the terms of the GNU Library General Public
12 ;;; License as published by the Free Software Foundation; either
13 ;;; version 2 of the License, or (at your option) any later version.
14 ;;;
15 ;;; This library is distributed in the hope that it will be useful,
16 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;;; Library General Public License for more details.
19 ;;;
20 ;;; You should have received a copy of the GNU Library General Public
21 ;;; License along with this library; if not, write to the
22 ;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;;; Boston, MA 02111-1307 USA.
24
25 ;;;# Syntax module for analysing Java(TM)
26
27 (in-package :climacs-java-syntax)
28
29 ;;;# The command table.
30
31 (define-syntax-command-table java-table
32 :errorp nil)
33
34 ;;;# The syntax object.
35 ;;;
36 ;;; We could add options here.
37
38 (define-syntax java-syntax (lr-syntax-mixin fundamental-syntax)
39 ((package :accessor package-of
40 :documentation "A list of strings being the components of
41 the `package' definition, if any."))
42 (:name "Java")
43 (:pathname-types "java" "jav")
44 (:command-table java-table)
45 (:default-initargs :initial-state |initial-state |))
46
47 ;;; Now some ways to indicate what the syntax is. Extra details could be
48 ;;; added. For now we'll show the package, if any.
49
50 (defmethod name-for-info-pane ((syntax java-syntax) &key pane)
51 (declare (ignore pane))
52 (update-parse syntax)
53 (format nil "Java~@[:~{~A~^.~}~]"
54 (package-of syntax)))
55
56 ;;;# Lexing.
57 ;;;
58 ;;; First we define the different states the lexer can be in (as triggered
59 ;;; by the parser.)
60
61 (define-lexer-state lexer-string-state ()
62 ()
63 (:documentation "In this state, the lexer is working inside a string
64 delimited by double quote characters."))
65
66 (define-lexer-state lexer-line-comment-state ()
67 ()
68 (:documentation "In this state, the lexer is working inside a line
69 comment starting with //."))
70
71 (define-lexer-state lexer-long-comment-state ()
72 ()
73 (:documentation "In this state, the lexer is working inside a long
74 comment delimited by /* and */."))
75
76 ;;; And then we define the various elements of the language.
77 ;;;
78 ;;; First, some high-level concepts:
79
80 (defclass java-nonterminal (nonterminal) ())
81
82 (defclass form (java-nonterminal) ())
83
84 ;;; Since we're dealing with things that might not be finished,
85 ;;; we allow for incomplete forms at the end of the buffer.
86
87 (defclass complete-form-mixin () ())
88 (defclass incomplete-form-mixin () ())
89
90 (defclass comment (java-nonterminal) ())
91 (defclass line-comment (java-comment) ())
92 (defclass long-comment (java-comment) ())
93
94 ;;; Of course, sometimes people type things that don't (yet) comply
95 ;;; with the language specification.
96
97 (defclass error-symbol (java-nonterminal) ())
98
99 ;;; Finally, we define the relevant lexeme. We will check the `ink' and
100 ;;; and the `face' later during redisplay.
101
102 (defclass java-lexeme (lexeme)
103 ((ink)
104 (face)))
105
106 (defclass form-lexeme (form java-lexeme) ())
107
108 ;;; Keywords come in various flavours.
109
110 (defclass keyword-lexeme (form-lexeme) ())
111
112 (defclass basic-type () ())
113 (defclass modifier () ())
114 (defclass operator () ())
115
116 (eval-when (:compile-toplevel :load-toplevel :execute)
117 (defun spelling-to-symbol (name)
118 (intern (concatenate 'string name "-LEXEME") #.*package*)))
119
120 (defmacro define-keywords (&rest keyword-names)
121 `(progn
122 ,@(loop for (name . supers) in keyword-names
123 for real-name = (spelling-to-symbol name)
124 collecting `(defclass ,real-name (,@ supers keyword-lexeme) ())
125 into defclasses
126 collecting name into names
127 finally (return (cons `(defparameter *keyword-spellings* ',names)
128 defclasses)))))
129
130 (define-keywords
131 ("abstract" modifier)
132 ("assert" operator)
133 ("boolean" basic-type)
134 ("break" operator)
135 ("byte" basic-type)
136 ("case" operator)
137 ("catch" operator)
138 ("char" basic-type)
139 ("class" operator)
140 ("const") ; reserved but not used
141 ("continue" operator)
142 ("default" operator)
143 ("do" operator)
144 ("double" basic-type)
145 ("else" operator)
146 ("enum" operator)
147 ("extends" operator)
148 ("final" modifier)
149 ("finally" operator)
150 ("float" basic-type)
151 ("for" operator)
152 ("if" operator)
153 ("int" basic-type)
154 ("goto") ; reserved but not used
155 ("implements" operator)
156 ("import" operator)
157 ("instanceof" operator)
158 ("interface" operator)
159 ("long" basic-type)
160 ("native" basic-type)
161 ("new" operator)
162 ("package" operator)
163 ("private" operator)
164 ("package" operator)
165 ("private" modifier)
166 ("protected" modifier)
167 ("public" modifier)
168 ("return" operator)
169 ("short" basic-type)
170 ("static" modifier)
171 ("striftfp" modifier)
172 ("super" operator)
173 ("switch" operator)
174 ("synchronized" modifier)
175 ("this" operator)
176 ("throw" operator)
177 ("throws" operator)
178 ("transient" modifier)
179 ("try" operator)
180 ("void" operator)
181 ("volatile" modifier)
182 ("while" operator))
183
184 (defclass identifier-lexeme (form-lexeme) ())
185 (defclass literal-lexeme (form-lexeme) ())
186 (defclass integer-literal-lexeme (literal-lexeme) ())
187 (defclass decimal-integer-literal-lexeme (integer-literal-lexeme) ())
188 (defclass octal-integer-literal-lexeme (integer-literal-lexeme) ())
189 (defclass hex-integer-literal-lexeme (integer-literal-lexeme) ())
190 (defclass floating-point-literal-lexeme (literal-lexeme) ())
191 (defclass decimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
192 (defclass hexidecimal-floating-point-literal-lexeme (floating-point-literal-lexeme) ())
193 ;;; A badly formed, or perhaps unfinished, number.
194 (defclass bad-number-literal-lexeme (literal-lexeme) ())
195 (defclass boolean-literal-lexeme (literal-lexeme) ())
196 (defclass character-literal-lexeme (literal-lexeme) ())
197 (defclass incomplete-character-literal-lexeme (literal-lexeme incomplete-form-mixin) ())
198 (defclass string-literal-lexeme (literal-lexeme) ())
199 (defclass null-literal-lexeme (literal-lexeme) ())
200 (defclass separator-lexeme (form-lexeme) ())
201 (defclass punctuator-lexeme (form-lexeme) ())
202
203 ;;; Separators: ( ) { } [ ] ; , .
204
205 (defclass semi-colon-lexeme (separator-lexeme) ())
206 (defclass comma-lexeme (separator-lexeme) ())
207 (defclass dot-lexeme (separator-lexeme) ())
208 (defclass delimiter-mixin () ())
209 (defclass opening-delimiter-mixin (delimiter-mixin) ())
210 (defclass closing-delimiter-mixin (delimiter-mixin) ())
211
212 (defclass left-bracket-lexeme (separator-lexeme opening-delimiter-mixin) ())
213 (defclass right-bracket-lexeme (separator-lexeme closing-delimiter-mixin) ())
214 (defclass left-parenthesis-lexeme (separator-lexeme opening-delimiter-mixin) ())
215 (defclass right-parenthesis-lexeme (separator-lexeme closing-delimiter-mixin) ())
216 (defclass left-brace-lexeme (separator-lexeme opening-delimiter-mixin) ())
217 (defclass right-brace-lexeme (separator-lexeme closing-delimiter-mixin) ())
218
219 ;;; Operators:
220 ;;; = < > ! ~ ? :
221 ;;; == <= >= != && || ++ --
222 ;;; + - * / & | ^ % << >> >>>
223 ;;; += -= *= /= &= |= ^= %= <<= >>= >>>=
224
225 (defmacro define-operators (&rest punctuator-names)
226 `(progn
227 ,@(loop for name in punctuator-names
228 for real-name = (intern (concatenate 'string
229 (string name) "-LEXEME")
230 #.*package*)
231 collecting `(defclass ,real-name (punctuator-lexeme) ()))))
232
233 (define-operators
234 equal left-angle-bracket right-angle-bracket exclamation tilde question
235 colon
236 eq leq geq neq and-and or-or increment decrement
237 plus minus asterisk slash ampersand pipe circumflex percent
238 left-shift right-shift unsigned-right-shift
239 plus-equal minus-equal asterisk-equal slash-equal ampersand-equal pipe-equal
240 circumflex-equal percent-equal left-shift-equal right-shift-equal
241 unsigned-right-shift-equal)
242
243 ;;; This for annotated interfaces.
244 (defclass ampersand-lexeme (punctuator-lexeme) ())
245
246 ;;; And something for when we come across something completely wrong.
247
248 (defclass error-lexeme (java-lexeme) ())
249
250 ;;; Some lexemes that will drive the parser and lexer.
251
252 (defclass line-comment-start-lexeme (java-lexeme) ())
253 (defclass long-comment-start-lexeme (java-lexeme) ())
254 (defclass comment-end-lexeme (java-lexeme) ())
255 (defclass string-start-lexeme (java-lexeme) ())
256 (defclass string-end-lexeme (java-lexeme) ())
257
258 ;;; And some lexemes used inside strings and comments.
259
260 (defclass word-lexeme (java-lexeme) ())
261 (defclass delimiter-lexeme (java-lexeme) ())
262 (defclass text-lexeme (java-lexeme) ())
263
264 ;;; Some predicates for recognizing the constituents of identifiers.
265 ;;; "The $ character should be used only in mechanically generated
266 ;;; source code or, rarely, to access preexisting names on legacy
267 ;;; systems."
268
269 (defun java-letter-p (ch)
270 (and (characterp ch)
271 (or (alpha-char-p ch)
272 (char= ch #\_)
273 (char= ch #\$))))
274
275 (defun java-letter-or-digit-p (ch)
276 (and (characterp ch)
277 (or (alphanumericp ch)
278 (char= ch #\_)
279 (char= ch #\$))))
280
281 ;;; Something to recognise escapes, including unicode escapes (which may
282 ;;; have multiple #\u characters).
283
284 (defun eat-escape (scan)
285 "Advance over an escape (after the #\\), returning T if valid so far, or NIL."
286 (macrolet ((fo () `(forward-object scan)))
287 (case (object-after scan)
288 ((#\b #\t #\n #\f #\r #\" #\' #\\)
289 (fo) t)
290 (#\u
291 (loop until (end-of-buffer-p scan)
292 while (eql (object-after scan) #\u)
293 do (fo))
294 (loop until (end-of-buffer-p scan)
295 for char = (object-after scan)
296 with count = 0
297 while (and (characterp char)
298 (digit-char-p char 16))
299 do (fo) (incf count)
300 finally (return (or (and (end-of-buffer-p scan)
301 (< count 4))
302 (= count 4)))))
303 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
304 (loop repeat 3
305 until (end-of-buffer-p scan)
306 for char = (object-after scan)
307 while (and (characterp char)
308 (digit-char-p char 8))
309 do (fo))
310 t)
311 (t nil))))
312
313 ;;; The default method for skipping whitespace.
314
315 (defmethod skip-inter ((syntax java-syntax) state scan)
316 (macrolet ((fo () `(forward-object scan)))
317 (loop when (end-of-buffer-p scan)
318 do (return nil)
319 until (not (whitespacep syntax (object-after scan)))
320 do (fo)
321 finally (return t))))
322
323 ;;; The lexing procedure used at the toplevel. Dispatches to lex-token
324 ;;; at the appropriate time - except for standalone dots (where the lexer
325 ;;; doesn't know whether it's looking at a potential number or the
326 ;;; separator in a QualifiedIdentifier).
327
328 (defmethod lex ((syntax java-syntax) (state lexer-toplevel-state) scan)
329 (macrolet ((fo () `(forward-object scan)))
330 (let ((object (object-after scan)))
331 (case object
332 (#\" (fo) (make-instance 'string-start-lexeme))
333 (#\' (fo)
334 (cond ((end-of-buffer-p scan)
335 (make-instance 'incomplete-character-literal-lexeme))
336 (t (cond ((eql (object-after scan) #\\)
337 (fo)
338 (if (not (end-of-buffer-p scan))
339 (unless (eat-escape scan)
340 (return-from lex
341 (make-instance 'error-lexeme)))))
342 (t (fo)))
343 (cond ((end-of-buffer-p scan)
344 (make-instance 'incomplete-character-literal-lexeme))
345 ((eql (object-after scan) #\')
346 (fo)
347 (make-instance 'character-literal-lexeme))
348 (t (make-instance 'error-lexeme))))))
349 (#\[ (fo) (make-instance 'left-bracket-lexeme))
350 (#\] (fo) (make-instance 'right-bracket-lexeme))
351 (#\( (fo) (make-instance 'left-parenthesis-lexeme))
352 (#\) (fo) (make-instance 'right-parenthesis-lexeme))
353 (#\{ (fo) (make-instance 'left-brace-lexeme))
354 (#\} (fo) (make-instance 'right-brace-lexeme))
355 (#\@ (fo) (make-instance 'ampersand-lexeme))
356 (#\. (fo) (if (end-of-buffer-p scan)
357 (make-instance 'dot-lexeme)
358 (cond ((and (characterp (object-after scan))
359 (digit-char-p (object-after scan)))
360 (backward-object scan)
361 (lex-token syntax scan))
362 (t (make-instance 'dot-lexeme)))))
363 (#\- (fo) (if (end-of-buffer-p scan)
364 (make-instance 'minus-lexeme)
365 (case (object-after scan)
366 (#\- (fo) (make-instance 'decrement-lexeme))
367 (#\= (fo) (make-instance 'minus-equal-lexeme))
368 (t (make-instance 'minus-lexeme)))))
369 (#\+ (fo) (if (end-of-buffer-p scan)
370 (make-instance 'plus-lexeme)
371 (case (object-after scan)
372 (#\+ (fo) (make-instance 'increment-lexeme))
373 (#\= (fo) (make-instance 'plus-equal-lexeme))
374 (t (make-instance 'plus-lexeme)))))
375 (#\& (fo) (if (end-of-buffer-p scan)
376 (make-instance 'ampersand-lexeme)
377 (case (object-after scan)
378 (#\& (fo) (make-instance 'and-and-lexeme))
379 (#\= (fo) (make-instance 'ampersand-equal-lexeme))
380 (t (make-instance 'ampersand-lexeme)))))
381 (#\* (fo) (if (end-of-buffer-p scan)
382 (make-instance 'asterisk-lexeme)
383 (cond ((eql (object-after scan) #\=)
384 (fo)
385 (make-instance 'asterisk-equal-lexeme))
386 (t (make-instance 'asterisk-lexeme)))))
387 (#\~ (fo) (make-instance 'tilde-lexeme))
388 (#\! (fo) (if (end-of-buffer-p scan)
389 (make-instance 'exclamation-lexeme)
390 (cond ((eql (object-after scan) #\=)
391 (fo)
392 (make-instance 'neq-lexeme))
393 (t (make-instance 'exclamation-lexeme)))))
394 (#\/ (fo) (if (end-of-buffer-p scan)
395 (make-instance 'slash-lexeme)
396 (case (object-after scan)
397 (#\= (fo) (make-instance 'slash-equal-lexeme))
398 (#\* (fo) (make-instance 'long-comment-start-lexeme))
399 (#\/ (fo) (make-instance 'line-comment-start-lexeme))
400 (t (make-instance 'slash-lexeme)))))
401 (#\% (fo) (if (end-of-buffer-p scan)
402 (make-instance 'percent-lexeme)
403 (case (object-after scan)
404 (#\= (fo) (make-instance 'percent-equal-lexeme))
405 (t (make-instance 'percent-lexeme)))))
406 (#\< (fo) (if (end-of-buffer-p scan)
407 (make-instance 'left-angle-bracket-lexeme)
408 (case (object-after scan)
409 (#\= (fo) (make-instance 'leq-lexeme))
410 (#\< (fo)
411 (cond ((eql (object-after scan) #\=)
412 (fo)
413 (make-instance 'left-shift-equal-lexeme))
414 (t (make-instance 'left-shift-lexeme))))
415 (t (make-instance 'left-angle-bracket-lexeme)))))
416 (#\> (fo) (if (end-of-buffer-p scan)
417 (make-instance 'right-angle-bracket-lexeme)
418 (case (object-after scan)
419 (#\= (fo) (make-instance 'geq-lexeme))
420 (#\> (fo)
421 (cond ((eql (object-after scan) #\=)
422 (fo)
423 (make-instance 'right-shift-equal-lexeme))
424 ((eql (object-after scan) #\>)
425 (fo)
426 (cond ((eql (object-after scan) #\=)
427 (fo)
428 (make-instance 'unsigned-right-shift-equal-lexeme))
429 (t (make-instance 'unsigned-right-shift-lexeme))))
430 (t (make-instance 'right-shift-lexeme))))
431 (t (make-instance 'right-angle-bracket-lexeme)))))
432 (#\= (fo) (if (end-of-buffer-p scan)
433 (make-instance 'equal-lexeme)
434 (cond ((eql (object-after scan) #\=)
435 (fo)
436 (make-instance 'eq-lexeme))
437 (t (make-instance 'equal-lexeme)))))
438 (#\^ (fo) (if (end-of-buffer-p scan)
439 (make-instance 'circumflex-lexeme)
440 (cond ((eql (object-after scan) #\=)
441 (fo)
442 (make-instance 'circumflex-equal-lexeme))
443 (t (make-instance 'circumflex-lexeme)))))
444 (#\| (fo) (if (end-of-buffer-p scan)
445 (make-instance 'pipe-lexeme)
446 (case (object-after scan)
447 (#\| (fo) (make-instance 'or-or-lexeme))
448 (#\= (fo) (make-instance 'pipe-equal-lexeme))
449 (t (make-instance 'pipe-lexeme)))))
450 (#\? (fo) (make-instance 'question-lexeme))
451 (#\: (fo) (make-instance 'colon-lexeme))
452 (#\; (fo) (make-instance 'semi-colon-lexeme))
453 (#\, (fo) (make-instance 'comma-lexeme))
454 (t (cond ((or (java-letter-or-digit-p object)
455 (eql object #\\))
456 (lex-token syntax scan))
457 (t (fo) (make-instance 'error-lexeme))))))))
458
459 ;;; Lexing in strings is essentially splitting the input into words,
460 ;;; delimters and whitespace.
461
462 (defmethod lex ((syntax java-syntax) (state lexer-string-state) scan)
463 (macrolet ((fo () `(forward-object scan)))
464 (let ((object (object-after scan)))
465 (cond ((eql object #\") (fo) (make-instance 'string-end-lexeme))
466 ((eql object #\\)
467 (fo)
468 (eat-escape scan)
469 (make-instance 'delimiter-lexeme))
470 ((java-letter-or-digit-p object)
471 (loop until (or (end-of-buffer-p scan)
472 (not (java-letter-or-digit-p (object-after scan))))
473 do (fo))
474 (make-instance 'word-lexeme))
475 (t (fo) (make-instance 'delimiter-lexeme))))))
476
477 ;;; Lexing in comments is similar to strings, but in long comments we
478 ;;; need to detect the comment end.
479
480 (defmethod lex ((syntax java-syntax) (state lexer-long-comment-state) scan)
481 (flet ((fo () (forward-object scan)))
482 (let ((object (object-after scan)))
483 (cond ((eql object #\*)
484 (fo)
485 (cond ((or (end-of-buffer-p scan)
486 (not (eql (object-after scan) #\/)))
487 (make-instance 'delimiter-lexeme))
488 (t (fo) (make-instance 'comment-end-lexeme))))
489 ((java-letter-or-digit-p object)
490 (loop until (or (end-of-buffer-p scan)
491 (not (java-letter-or-digit-p (object-after scan))))
492 do (fo))
493 (make-instance 'word-lexeme))
494 (t (fo) (make-instance 'delimiter-lexeme))))))
495
496 (defmethod skip-inter ((syntax java-syntax)
497 (state lexer-line-comment-state)
498 scan)
499 (macrolet ((fo () `(forward-object scan)))
500 (loop until (or (end-of-line-p scan)
501 (not (whitespacep syntax (object-after scan))))
502 do (fo)
503 finally (return t))))
504
505 (defmethod lex ((syntax java-syntax) (state lexer-line-comment-state) scan)
506 (macrolet ((fo () `(forward-object scan)))
507 (cond ((end-of-line-p scan)
508 (make-instance 'comment-end-lexeme))
509 ((java-letter-or-digit-p (object-after scan))
510 (loop until (or (end-of-buffer-p scan)
511 (not (java-letter-or-digit-p (object-after scan))))
512 do (fo))
513 (make-instance 'word-lexeme))
514 (t (fo) (make-instance 'delimiter-lexeme)))))
515
516 ;;; Recognise the various types of numbers, returning the appropriate
517 ;;; class name. We return `'bad-number-lexeme' in some circumstances where
518 ;;; the author might just not have finished typing in. The logic detects
519 ;;; 'long' versions separately, although the same result as the non-long
520 ;;; version is returned for now.
521
522 (defun lex-number (scan)
523 (let (hex oct dot exp float-suffix)
524 (labels ((fo () (forward-object scan))
525 (eat-digits (&optional (radix 10))
526 (loop until (end-of-buffer-p scan)
527 while (and (characterp (object-after scan))
528 (digit-char-p (object-after scan) radix))
529 do (fo))))
530 (when (eql (object-after scan) #\0)
531 (fo)
532 (cond ((end-of-buffer-p scan)
533 (return-from lex-number 'decimal-integer-literal-lexeme))
534 ((equalp (object-after scan) #\X)
535 (fo)
536 (setf hex t))
537 ((eql (object-after scan) #\.)
538 (fo)
539 (setf dot t))
540 ((and (characterp (object-after scan))
541 (digit-char-p (object-after scan) 8))
542 (setf oct t))
543 ((equalp (object-after scan) #\L)
544 (fo)
545 (return-from lex-number 'decimal-integer-literal-lexeme))))
546 (eat-digits (cond (hex 16) (oct 8) (t 10)))
547 (when (end-of-buffer-p scan)
548 (return-from lex-number
549 (cond (hex 'hex-integer-literal-lexeme)
550 (oct 'octal-integer-literal-lexeme)
551 (t 'decimal-integer-literal-lexeme))))
552 (cond ((equalp (object-after scan) #\L)
553 (fo)
554 (return-from lex-number
555 (cond (hex 'hex-integer-literal-lexeme)
556 (oct 'octal-integer-literal-lexeme)
557 (t 'decimal-integer-literal-lexeme))))
558 (oct (return-from lex-number 'octal-integer-literal-lexeme))
559 ((eql (object-after scan) #\.)
560 (when dot
561 (return-from lex-number 'bad-number-literal-lexeme))
562 (setf dot t)
563 (fo)
564 (eat-digits (cond (hex 16) (oct 8) (t 10)))))
565 (when (end-of-buffer-p scan)
566 (return-from lex-number
567 (if (or dot exp float-suffix)
568 (if hex
569 'hexidecimal-floating-point-literal-lexeme
570 'decimal-floating-point-literal-lexeme)
571 (if hex
572 'hex-integer-literal-lexeme
573 'decimal-integer-literal-lexeme))))
574 (when (equalp (object-after scan) (if hex #\P #\E))
575 (setf exp t)
576 (fo)
577 (when (end-of-buffer-p scan)
578 (return-from lex-number 'bad-number-literal-lexeme))
579 (if (member (object-after scan) '(#\+ #\-))
580 (fo))
581 (when (end-of-buffer-p scan)
582 (return-from lex-number 'bad-number-literal-lexeme))
583 (eat-digits))
584 (unless (end-of-buffer-p scan)
585 (when (member (object-after scan) '(#\f #\F #\d #\D))
586 (setf float-suffix t)
587 (fo)))
588 (return-from lex-number
589 (if (or dot exp float-suffix)
590 (if hex
591 'hexidecimal-floating-point-literal-lexeme
592 'decimal-floating-point-literal-lexeme)
593 (if hex
594 'hex-integer-literal-lexeme
595 'decimal-integer-literal-lexeme))))))
596
597 ;;; Decide whether we're lexing an identifier (or one of the textual literals)
598 ;;; or a number.
599
600 (defun lex-token (syntax scan)
601 (declare (ignore syntax))
602 (labels ((fo () (forward-object scan)))
603 (cond ((java-letter-p (object-after scan))
604 (let ((token (make-array 32 :element-type 'character
605 :adjustable t :fill-pointer 0)))
606 (loop until (or (end-of-buffer-p scan)
607 (not (or (java-letter-or-digit-p
608 (object-after scan)))))
609 do (vector-push-extend (object-after scan) token)
610 (fo))
611 (cond ((find token *keyword-spellings* :test #'string=)
612 (make-instance (spelling-to-symbol token)))
613 ((string= token "null")
614 (make-instance 'null-literal-lexeme))
615 ((or (string= token "true")
616 (string= token "false"))
617 (make-instance 'boolean-literal-lexeme))
618 (t (make-instance 'identifier-lexeme)))))
619 (t
620 (make-instance (lex-number scan))))))
621
622 ;;; In the error state, just slurp full lines.
623
624 (defmethod lex ((syntax java-syntax) (state lexer-error-state) scan)
625 (macrolet ((fo () `(forward-object scan)))
626 (loop until (end-of-line-p scan)
627 do (fo))
628 (make-instance 'error-lexeme)))
629
630 ;;;# Parsing
631
632 (defmacro define-java-action ((state lexeme) &body body)
633 `(defmethod action ((syntax java-syntax) (state ,state) (lexeme ,lexeme))
634 ,@body))
635
636 (defmacro define-new-java-state ((state parser-symbol) &body body)
637 `(defmethod new-state ((syntax java-syntax)
638 (state ,state)
639 (tree ,parser-symbol))
640 ,@body))
641
642 (define-java-action (error-reduce-state (eql nil))
643 (throw 'done nil))
644
645 ;;; The default action for any lexeme is shift.
646 (define-java-action (t java-lexeme)
647 lexeme)
648
649 ;;; The action on end-of-buffer is to reduce to the error symbol.
650 (define-java-action (t (eql nil))
651 (reduce-all error-symbol))
652
653 ;;; The default new state is the error state.
654 (define-new-java-state (t parser-symbol) error-state)
655
656 ;;; The new state when an error-state
657 (define-new-java-state (t error-symbol) error-reduce-state)
658
659 ;;;;;;;;;;;;;;;; Top-level
660
661 #| rules
662 form* ->
663 form* -> form* form
664 |#
665
666 ;;; parse trees
667 (defclass form* (java-nonterminal) ())
668
669 (define-parser-state |form* | (lexer-toplevel-state parser-state) ())
670 (define-parser-state form-may-follow (lexer-toplevel-state parser-state) ())
671 (define-parser-state |initial-state | (form-may-follow) ())
672
673 (define-new-java-state (|initial-state | form) |initial-state |)
674 (define-new-java-state (|initial-state | comment) |initial-state |)
675
676 (define-java-action (|initial-state | (eql nil))
677 (reduce-all form*))
678
679 (define-new-java-state (|initial-state | form*) |form* | )
680
681 (define-java-action (|form* | (eql nil))
682 (throw 'done nil))
683
684 ;;;;;;;;;;;;;;;; String
685
686 ;;; parse trees
687 (defclass string-form (form) ())
688 (defclass complete-string-form (string-form complete-form-mixin) ())
689 (defclass incomplete-string-form (string-form incomplete-form-mixin) ())
690
691 (define-parser-state |" word* | (lexer-string-state parser-state) ())
692 (define-parser-state |" word* " | (lexer-toplevel-state parser-state) ())
693
694 (define-new-java-state (|" word* | word-lexeme) |" word* |)
695 (define-new-java-state (|" word* | delimiter-lexeme) |" word* |)
696 (define-new-java-state (form-may-follow string-start-lexeme) |" word* |)
697 (define-new-java-state (|" word* | string-end-lexeme) |" word* " |)
698
699 ;;; reduce according to the rule form -> " word* "
700 (define-java-action (|" word* " | t)
701 (reduce-until-type complete-string-form string-start-lexeme))
702
703 ;;; reduce at the end of the buffer
704 (define-java-action (|" word* | (eql nil))
705 (reduce-until-type incomplete-string-form string-start-lexeme))
706
707 ;;;;;;;;;;;;;;;; Line comment
708
709 ;;; parse trees
710 (defclass line-comment-form (comment) ())
711
712 (define-parser-state |// word* | (lexer-line-comment-state parser-state) ())
713 (define-parser-state |// word* NL | (lexer-toplevel-state parser-state) ())
714
715 (define-new-java-state (form-may-follow line-comment-start-lexeme) |// word* |)
716 (define-new-java-state (|// word* | word-lexeme) |// word* |)
717 (define-new-java-state (|// word* | delimiter-lexeme) |// word* |)
718 (define-new-java-state (|// word* | comment-end-lexeme) |// word* NL |)
719
720 ;;; reduce according to the rule form -> // word* NL
721 (define-java-action (|// word* NL | t)
722 (reduce-until-type line-comment-form line-comment-start-lexeme))
723
724 ;;;;;;;;;;;;;;;; Long comment
725
726 ;;; parse trees
727 (defclass long-comment-form (comment) ())
728 (defclass complete-long-comment-form (long-comment-form complete-form-mixin) ())
729 (defclass incomplete-long-comment-form (long-comment-form incomplete-form-mixin) ())
730
731 (define-parser-state |/* word* | (lexer-long-comment-state parser-state) ())
732 (define-parser-state |/* word* */ | (lexer-toplevel-state parser-state) ())
733
734 (define-new-java-state (|/* word* | word-lexeme) |/* word* |)
735 (define-new-java-state (|/* word* | delimiter-lexeme) |/* word* |)
736 (define-new-java-state (|/* word* | long-comment-start-lexeme) |/* word* |)
737 (define-new-java-state (|/* word* | long-comment-form) |/* word* |)
738 (define-new-java-state (form-may-follow long-comment-start-lexeme) |/* word* |)
739 (define-new-java-state (|/* word* | comment-end-lexeme) |/* word* */ |)
740
741 ;;; reduce according to the rule form -> /* word* */
742 (define-java-action (|/* word* */ | t)
743 (reduce-until-type complete-long-comment-form long-comment-start-lexeme))
744
745 ;;; reduce at the end of the buffer
746 (define-java-action (|/* word* | (eql nil))
747 (reduce-until-type incomplete-long-comment-form long-comment-start-lexeme))
748
749 ;;; Here we search for the package name.
750
751 (defun update-package-name (buffer syntax)
752 (declare (ignore buffer))
753 (setf (package-of syntax) nil)
754 (with-slots (stack-top) syntax
755 (loop for (token . rest) on (children stack-top)
756 when (typep token '|package|-LEXEME)
757 do (loop for component in rest
758 until (typep component 'semi-colon-lexeme)
759 while (or (typep component 'dot-lexeme)
760 (typep component 'identifier-lexeme)
761 (typep component 'comment))
762 when (typep component 'identifier-lexeme)
763 collect (form-string syntax component) into components
764 finally (setf (package-of syntax) components)))))
765
766 (defmethod update-syntax :after ((syntax java-syntax) prefix-size suffix-size
767 &optional begin end)
768 (declare (ignore begin end))
769 (update-package-name (buffer syntax) syntax))
770
771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
772 ;;;
773 ;;; display
774
775 (defun form-string (syntax form)
776 "Return the string that correspond to `form' in the buffer of
777 `syntax'."
778 (buffer-substring (buffer syntax) (start-offset form) (end-offset form)))
779
780 (define-syntax-highlighting-rules default-java-highlighting
781 (error-symbol (*error-drawing-options*))
782 (string-form (*string-drawing-options*))
783 (operator (*special-operator-drawing-options*))
784 (basic-type (:face :ink +dark-blue+))
785 (modifier (:face :ink +dark-green+))
786 (comment (*comment-drawing-options*))
787 (integer-literal-lexeme (:face :ink +gray50+))
788 (floating-point-literal-lexeme (:face :ink +gray50+)))
789
790 (defparameter *syntax-highlighting-rules* 'default-java-highlighting
791 "The syntax highlighting rules used for highlighting Java
792 syntax.")
793
794 (defmethod syntax-highlighting-rules ((syntax java-syntax))
795 *syntax-highlighting-rules*)
796
797 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
798 ;;;
799 ;;; exploit the parse
800
801 (defun form-string-p (form)
802 (typep form 'string-form))
803
804 (defun commentp (form)
805 (typep form 'comment))
806
807 (defun top-level-vector (syntax)
808 (coerce (children (slot-value syntax 'stack-top)) 'simple-vector))
809
810 (defun top-level-form-before-in-vector (tlv
811 offset
812 &optional ignore-comments-p)
813 "Return top-level form in top-level-vector `tlv' around or before `offset'
814 together with index of form in `tlv', or nil. If `ignore-comments-p', don't
815 treat comments as forms."
816 (loop for count from (1- (length tlv)) downto 0
817 for tlf = (aref tlv count)
818 when (and (or (not ignore-comments-p) (not (commentp tlf)))
819 (< (start-offset tlf) offset (end-offset tlf)))
820 return (values tlf count)
821 when (and (or (not ignore-comments-p) (not (commentp tlf)))
822 (<= (end-offset tlf) offset))
823 return (values tlf count)
824 finally (return nil)))
825
826 (defun top-level-form-after-in-vector (tlv
827 offset
828 &optional ignore-comments-p)
829 "Return top-level form in top-level-vector `tlv' around or after `offset'
830 together with index of form in `tlv', or nil. If `ignore-comments-p', don't
831 treat comments as forms."
832 (loop for tlf across tlv
833 for count from 0
834 when (and (or (not ignore-comments-p) (not (commentp tlf)))
835 (< (start-offset tlf) offset (end-offset tlf)))
836 return (values tlf count)
837 when (and (or (not ignore-comments-p) (not (commentp tlf)))
838 (>= (start-offset tlf) offset))
839 return (values tlf count)
840 finally (return nil)))
841
842 (defun top-level-form-around-in-vector (tlv
843 offset
844 &optional ignore-comments-p)
845 "Return top-level form in top-level-vector `tlv' around `offset'
846 together with index of form in `tlv', or nil. If `ignore-comments-p', don't
847 treat comments as forms."
848 (loop for tlf across tlv
849 for count from 0
850 when (and (or (not ignore-comments-p) (not (commentp tlf)))
851 (< (start-offset tlf) offset (end-offset tlf)))
852 return (values tlf count)
853 when (and (or (not ignore-comments-p) (not (commentp tlf)))
854 (>= (start-offset tlf) offset))
855 return nil
856 finally (return nil)))
857
858 (defun form-around (syntax offset &optional ignore-comments-p)
859 (top-level-form-around-in-vector
860 (top-level-vector syntax)
861 offset
862 ignore-comments-p))
863
864 (defgeneric opening-delimiter-p (token)
865 (:documentation "Is `token' an opening delimiter."))
866
867 (defmethod opening-delimiter-p (token)
868 nil)
869
870 (defmethod opening-delimiter-p ((token opening-delimiter-mixin))
871 t)
872
873 (defgeneric closing-delimiter-p (token)
874 (:documentation "Is `token' a closing delimiter."))
875
876 (defmethod closing-delimiter-p (token)
877 nil)
878
879 (defmethod closing-delimiter-p ((token closing-delimiter-mixin))
880 t)
881
882 (defgeneric matching-delimiter-p (token match)
883 (:documentation "Is `match' a matching delimiter of `token'."))
884
885 (defmethod matching-delimiter-p (token match)
886 nil)
887
888 (defmethod matching-delimiter-p ((token closing-delimiter-mixin)
889 (match opening-delimiter-mixin))
890 (matching-delimiter-p match token))
891
892 (defmethod matching-delimiter-p ((token left-parenthesis-lexeme)
893 (match right-parenthesis-lexeme))
894 t)
895
896 (defmethod matching-delimiter-p ((token left-bracket-lexeme)
897 (match right-bracket-lexeme))
898 t)
899
900 (defmethod matching-delimiter-p ((token left-brace-lexeme)
901 (match right-brace-lexeme))
902 t)
903
904 (defmethod backward-one-expression ((mark mark) (syntax java-syntax))
905 (let ((tlv (top-level-vector syntax)))
906 (multiple-value-bind (form count)
907 (top-level-form-before-in-vector tlv (offset mark) t)
908 (when form
909 (if (closing-delimiter-p form)
910 (loop for index from count downto 0
911 for match = (aref tlv index)
912 with delims = 0
913 when (eql (class-of match)
914 (class-of form))
915 do (incf delims)
916 when (matching-delimiter-p form match)
917 do (decf delims)
918 until (zerop delims)
919 finally (cond ((zerop delims)
920 (setf (offset mark) (start-offset match))
921 (return t))
922 (t (return nil))))
923 (setf (offset mark) (start-offset form)))))))
924
925 (defmethod forward-one-expression ((mark mark) (syntax java-syntax))
926 (let ((tlv (top-level-vector syntax)))
927 (multiple-value-bind (form count)
928 (top-level-form-after-in-vector tlv (offset mark) t)
929 (when form
930 (if (opening-delimiter-p form)
931 (loop for index from count below (length tlv)
932 for match = (aref tlv index)
933 with delims = 0
934 when (eql (class-of match)
935 (class-of form))
936 do (incf delims)
937 when (matching-delimiter-p form match)
938 do (decf delims)
939 until (zerop delims)
940 finally (cond ((zerop delims)
941 (setf (offset mark) (end-offset match))
942 (return t))
943 (t (return nil))))
944 (setf (offset mark) (end-offset form)))))))
945
946 (defmethod forward-one-list (mark (syntax java-syntax))
947 (let ((tlv (top-level-vector syntax)))
948 (multiple-value-bind (form count)
949 (top-level-form-after-in-vector tlv (offset mark))
950 (when form
951 (loop for index from count below (length tlv)
952 for match = (aref tlv index)
953 with delims = ()
954 when (opening-delimiter-p match)
955 do (push match delims)
956 when (closing-delimiter-p match)
957 do (cond ((null delims)
958 (return nil))
959 (t (cond ((matching-delimiter-p match
960 (car delims))
961 (pop delims)
962 (when (null delims)
963 (setf (offset mark) (end-offset match))
964 (return t)))
965 (t (return nil)))))
966 finally (return nil))))))
967
968 (defmethod backward-one-list (mark (syntax java-syntax))
969 (let ((tlv (top-level-vector syntax)))
970 (multiple-value-bind (form count)
971 (top-level-form-before-in-vector tlv (offset mark))
972 (when form
973 (loop for index from count downto 0
974 for match = (aref tlv index)
975 with delims = ()
976 when (closing-delimiter-p match)
977 do (push match delims)
978 when (opening-delimiter-p match)
979 do (cond
980 ((null delims)
981 (return nil))
982 (t (cond ((matching-delimiter-p match
983 (car delims))
984 (pop delims)
985 (when (null delims)
986 (setf (offset mark) (start-offset match))
987 (return t)))
988 (t (return nil)))))
989 finally (return nil))))))
990
991 (drei-motion:define-motion-fns list)
992
993 (defmethod backward-one-down ((mark mark) (syntax java-syntax))
994 (let ((tlv (top-level-vector syntax)))
995 (multiple-value-bind (form count)
996 (top-level-form-before-in-vector tlv (offset mark))
997 (when form
998 (loop for index from count downto 0
999 for match = (aref tlv index)
1000 when (closing-delimiter-p match)
1001 do (setf (offset mark) (start-offset match))
1002 (return t)
1003 finally (return nil))))))
1004
1005 (defmethod backward-one-up (mark (syntax java-syntax))
1006 (let ((tlv (top-level-vector syntax)))
1007 (multiple-value-bind (form count)
1008 (top-level-form-before-in-vector tlv (offset mark))
1009 (when form
1010 (loop for index from count downto 0
1011 for match = (aref tlv index)
1012 with delims = ()
1013 when (closing-delimiter-p match)
1014 do (push match delims)
1015 when (opening-delimiter-p match)
1016 do (cond ((null delims)
1017 (setf (offset mark) (start-offset match))
1018 (return t))
1019 ((matching-delimiter-p match
1020 (car delims))
1021 (pop delims))
1022 (t (return nil)))
1023 finally (return nil))))))
1024
1025 (defmethod forward-one-down ((mark mark) (syntax java-syntax))
1026 (let ((tlv (top-level-vector syntax)))
1027 (multiple-value-bind (form count)
1028 (top-level-form-after-in-vector tlv (offset mark))
1029 (when form
1030 (loop for index from count below (length tlv)
1031 for match = (aref tlv index)
1032 when (opening-delimiter-p match)
1033 do (setf (offset mark) (end-offset match))
1034 (return t)
1035 finally (return nil))))))
1036
1037 (defmethod forward-one-up (mark (syntax java-syntax))
1038 (let ((tlv (top-level-vector syntax)))
1039 (multiple-value-bind (form count)
1040 (top-level-form-after-in-vector tlv (offset mark))
1041 (when form
1042 (loop for index from count below (length tlv)
1043 for match = (aref tlv index)
1044 with delims = ()
1045 when (opening-delimiter-p match)
1046 do (push match delims)
1047 when (closing-delimiter-p match)
1048 do (cond ((null delims)
1049 (setf (offset mark) (end-offset match))
1050 (return t))
1051 ((matching-delimiter-p match
1052 (car delims))
1053 (pop delims))
1054 (t (return nil)))
1055 finally (return nil))))))
1056
1057 ;; (defmethod backward-one-definition ((mark mark) (syntax java-syntax))
1058 ;; )
1059
1060 ;; (defmethod forward-one-definition ((mark mark) (syntax java-syntax))
1061 ;; )
1062
1063 ;;;# Indentation
1064
1065 (defmethod syntax-line-indentation (mark tab-width (syntax java-syntax))
1066 (setf mark (clone-mark mark))
1067 (let ((this-indentation (line-indentation mark tab-width)))
1068 (beginning-of-line mark)
1069 (loop until (beginning-of-buffer-p mark)
1070 do (previous-line mark 0)
1071 when (line-indentation mark tab-width)
1072 return it
1073 finally (return this-indentation))))
1074
1075 ;;;# Commenting
1076
1077 (defmethod syntax-line-comment-string ((syntax java-syntax))
1078 "// ")
1079
1080 (defmethod comment-region ((syntax java-syntax) mark1 mark2)
1081 (line-comment-region syntax mark1 mark2))
1082
1083 (defmethod uncomment-region ((syntax java-syntax) mark1 mark2)
1084 (line-uncomment-region syntax mark1 mark2))
1085
1086 ;; ;;; TESTING
1087
1088 ;; (defun collect-forms (top)
1089 ;; (loop for child in (children top)
1090 ;; collect (collect-forms child)
1091 ;; into things
1092 ;; finally (return (cons top things))))
1093
1094 ;; (define-command (com-dump-forms :name t :command-table java-table)
1095 ;; ()
1096 ;; "Dump the parse trees to trace output."
1097 ;; (let* ((buffer (current-buffer))
1098 ;; (syntax (syntax buffer)))
1099 ;; (pprint (collect-forms (slot-value syntax 'stack-top)) *trace-output*)
1100 ;; (terpri *trace-output*)
1101 ;; (finish-output *trace-output*)))
1102
1103 ;; (set-key 'com-dump-forms
1104 ;; 'java-table
1105 ;; '((#\c :control) (#\c :control)))

  ViewVC Help
Powered by ViewVC 1.1.5