Update of /cvsroot/scsh/scsh/scsh/rx
In directory usw-pr-cvs1:/tmp/cvs-serv3287
Modified Files:
parse.scm
Log Message:
Hygienic comparison for symbols.
Index: parse.scm
===================================================================
RCS file: /cvsroot/scsh/scsh/scsh/rx/parse.scm,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** parse.scm 2001/03/11 18:52:59 1.4
--- parse.scm 2001/10/01 14:50:33 1.5
***************
*** 163,298 ****
((pair? sre)
! (case (car sre)
! ((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
! ((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
! ((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
! ((=) (non-cset) (let ((n (cadr sre)))
! (re-repeat n n (parse-seq (cddr sre)))))
! ((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
! ((**) (non-cset) (re-repeat (cadr sre) (caddr sre)
! (parse-seq (cdddr sre))))
!
! ;; Choice is special wrt cset? because it's "polymorphic".
! ;; Note that RE-CHOICE guarantees to construct a char-set
! ;; or single-char string regexp if all of its args are char
! ;; classes.
! ((| or) (let ((elts (map (lambda (sre)
! (recur sre case-sensitive? cset?))
! (cdr sre))))
! (if cset?
! (assoc-cset-op char-set-union 'char-set-union elts r)
! (re-choice elts))))
!
! ((: seq) (non-cset) (parse-seq (cdr sre)))
!
! ((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
! ((word+)
! (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
! (,(r '|) . ,(cdr sre)))))
! case-sensitive?
! cset?))
!
! ((submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
! ((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
! (cadr sre)
! (caddr sre)))
!
! ;; We could be more aggressive and push the uncase op down into
! ;; partially-static regexps, but enough is enough.
! ((uncase)
! (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
! (if cset?
!
! (if (re-char-set? re-or-cset) ; A char set or code
! (uncase-char-set re-or-cset) ; producing a char set.
! `(,(r 'uncase) ,re-or-cset))
!
! (if (static-regexp? re-or-cset) ; A regexp or code
! (uncase re-or-cset) ; producing a regexp.
! `(,(r 'uncase)
! ,(regexp->scheme (simplify-regexp re-or-cset) r))))))
!
! ;; These just change the lexical case-sensitivity context.
! ((w/nocase) (parse-seq/context (cdr sre) #f))
! ((w/case) (parse-seq/context (cdr sre) #t))
!
! ;; ,<exp> and ,@<exp>
! ((unquote)
(let ((exp (cadr sre)))
(if cset?
`(,%coerce-dynamic-charset ,exp)
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
! ((unquote-splicing)
! (let ((exp (cadr sre)))
! (if cset?
! `(,%coerce-dynamic-charset ,exp)
! `(,%coerce-dynamic-regexp ,exp))))
!
! ((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
! (map parse-char-class (cdr sre))
! r))
! (cs (if (char-set? cs)
(char-set-complement cs)
`(,(r 'char-set-complement) ,cs))))
! (if cset? cs (make-re-char-set cs))))
!
! ((&) (let ((cs (assoc-cset-op char-set-intersection
'char-set-intersection
! (map parse-char-class (cdr sre))
! r)))
! (if cset? cs (make-re-char-set cs))))
!
! ((-) (if (pair? (cdr sre))
! (let* ((cs1 (parse-char-class (cadr sre)))
! (cs2 (assoc-cset-op char-set-union 'char-set-union
(map parse-char-class (cddr
sre))
r))
! (cs (if (and (char-set? cs1) (char-set? cs2))
! (char-set-difference cs1 cs2)
! `(,(r 'char-set-difference)
! ,(if (char-set? cs1)
! (char-set->scheme cs1 r)
cs1)
! . ,(if (char-set? cs2)
! (list (char-set->scheme cs2 r))
! (cdr cs2))))))
! (if cset? cs (make-re-char-set cs)))
! (error "SRE set-difference operator (- ...) requires at
least one argument")))
!
! ((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
(if cset? cset (make-re-char-set cset))))
!
! ((posix-string)
(if (and (= 1 (length (cdr sre)))
(string? (cadr sre)))
(posix-string->regexp (cadr sre))
(error "Illegal (posix-string ...) SRE body." sre)))
!
! (else (if (every string? sre) ; A set spec -- ("wxyz").
! (let* ((cs (apply char-set-union
! (map string->char-set sre)))
! (cs (if case-sensitive? cs (uncase-char-set cs))))
! (if cset? cs (make-re-char-set cs)))
!
! (error "Illegal SRE" sre)))))
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
! (else (let ((cs (case sre
! ((any) char-set:full)
! ((nonl) nonl-chars)
! ((lower-case lower) char-set:lower-case)
! ((upper-case upper) char-set:upper-case)
! ((alphabetic alpha) char-set:letter)
! ((numeric digit num) char-set:digit)
! ((alphanumeric alnum alphanum) char-set:letter+digit)
! ((punctuation punct) char-set:punctuation)
! ((graphic graph) char-set:graphic)
! ((blank) char-set:blank)
! ((whitespace space white) char-set:whitespace)
! ((printing print) char-set:printing)
! ((control cntrl) char-set:iso-control)
! ((hex-digit xdigit hex) char-set:hex-digit)
! ((ascii) char-set:ascii)
! (else (error "Illegal regular expression" sre)))))
! (if cset? cs (make-re-char-set cs))))))))
--- 163,309 ----
((pair? sre)
! (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
! (cond
! ((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
! ((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
! ((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
! ((hygn-eq? '=) (non-cset) (let ((n (cadr sre)))
! (re-repeat n n (parse-seq (cddr
sre)))))
! ((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq
(cddr sre))))
! ((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre)
! (parse-seq (cdddr sre))))
!
! ;; Choice is special wrt cset? because it's "polymorphic".
! ;; Note that RE-CHOICE guarantees to construct a char-set
! ;; or single-char string regexp if all of its args are char
! ;; classes.
! ((or (hygn-eq? '|)
! (hygn-eq? 'or))
! (let ((elts (map (lambda (sre)
! (recur sre case-sensitive? cset?))
! (cdr sre))))
! (if cset?
! (assoc-cset-op char-set-union 'char-set-union elts r)
! (re-choice elts))))
!
! ((or (hygn-eq? ':)
! (hygn-eq? 'seq))
! (non-cset) (parse-seq (cdr sre)))
!
! ((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre)
,%eow)))
! ((hygn-eq? 'word+)
! (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
! (,(r '|) . ,(cdr sre)))))
! case-sensitive?
! cset?))
!
! ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr
sre))))
! ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
! (cadr sre)
! (caddr sre)))
!
! ;; We could be more aggressive and push the uncase op down into
! ;; partially-static regexps, but enough is enough.
! ((hygn-eq? 'uncase)
! (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
! (if cset?
!
! (if (re-char-set? re-or-cset) ; A char set or code
! (uncase-char-set re-or-cset) ; producing a char set.
! `(,(r 'uncase) ,re-or-cset))
!
! (if (static-regexp? re-or-cset) ; A regexp or code
! (uncase re-or-cset) ; producing a regexp.
! `(,(r 'uncase)
! ,(regexp->scheme (simplify-regexp re-or-cset) r))))))
!
! ;; These just change the lexical case-sensitivity context.
! ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
! ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
!
! ;; ,<exp> and ,@<exp>
! ((hygn-eq? 'unquote)
(let ((exp (cadr sre)))
(if cset?
`(,%coerce-dynamic-charset ,exp)
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
! ((hygn-eq? 'unquote-splicing)
! (let ((exp (cadr sre)))
! (if cset?
! `(,%coerce-dynamic-charset ,exp)
! `(,%coerce-dynamic-regexp ,exp))))
!
! ((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union
'char-set-union
! (map parse-char-class
(cdr sre))
! r))
! (cs (if (char-set? cs)
(char-set-complement cs)
`(,(r 'char-set-complement) ,cs))))
! (if cset? cs (make-re-char-set cs))))
!
! ((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection
'char-set-intersection
! (map parse-char-class (cdr
sre))
! r)))
! (if cset? cs (make-re-char-set cs))))
!
! ((hygn-eq? '-) (if (pair? (cdr sre))
! (let* ((cs1 (parse-char-class (cadr sre)))
! (cs2 (assoc-cset-op char-set-union
'char-set-union
(map parse-char-class (cddr
sre))
r))
! (cs (if (and (char-set? cs1) (char-set?
cs2))
! (char-set-difference cs1 cs2)
! `(,(r 'char-set-difference)
! ,(if (char-set? cs1)
! (char-set->scheme cs1 r)
cs1)
! . ,(if (char-set? cs2)
! (list (char-set->scheme
cs2 r))
! (cdr cs2))))))
! (if cset? cs (make-re-char-set cs)))
! (error "SRE set-difference operator (- ...)
requires at least one argument")))
!
! ((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre)
case-sensitive?)))
(if cset? cset (make-re-char-set cset))))
!
! ((hygn-eq? 'posix-string)
(if (and (= 1 (length (cdr sre)))
(string? (cadr sre)))
(posix-string->regexp (cadr sre))
(error "Illegal (posix-string ...) SRE body." sre)))
!
! (else (if (every string? sre) ; A set spec -- ("wxyz").
! (let* ((cs (apply char-set-union
! (map string->char-set sre)))
! (cs (if case-sensitive? cs (uncase-char-set cs))))
! (if cset? cs (make-re-char-set cs)))
!
! (error "Illegal SRE" sre))))))
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
! (else
! (letrec ((hygn-memq? (lambda (sym-list)
! (if (null? sym-list)
! #f
! (or (c sre (r (car sym-list)))
! (hygn-memq? (cdr sym-list)))))))
! (let ((cs (cond
! ((hygn-memq? '(any)) char-set:full)
! ((hygn-memq? '(nonl)) nonl-chars)
! ((hygn-memq? '(lower-case lower))
char-set:lower-case)
! ((hygn-memq? '(upper-case upper))
char-set:upper-case)
! ((hygn-memq? '(alphabetic alpha)) char-set:letter)
! ((hygn-memq? '(numeric digit num)) char-set:digit)
! ((hygn-memq? '(alphanumeric alnum alphanum))
char-set:letter+digit)
! ((hygn-memq? '(punctuation punct)) char-set:punctuation)
! ((hygn-memq? '(graphic graph))
char-set:graphic)
! ((hygn-memq? '(blank)) char-set:blank)
! ((hygn-memq? '(whitespace space white))
char-set:whitespace)
! ((hygn-memq? '(printing print))
char-set:printing)
! ((hygn-memq? '(control cntrl))
char-set:iso-control)
! ((hygn-memq? '(hex-digit xdigit hex))
char-set:hex-digit)
! ((hygn-memq? '(ascii)) char-set:ascii)
! (else (error "Illegal regular expression" sre)))))
! (if cset? cs (make-re-char-set cs)))))))))
|