Update of /cvsroot/scsh/scsh-0.6/scsh/rx
In directory usw-pr-cvs1:/tmp/cvs-serv28446/scsh/rx
Modified Files:
packages.scm re-low.scm re-subst.scm
Log Message:
Use Scheme 48 regexp code instead of ours.
Index: packages.scm
===================================================================
RCS file: /cvsroot/scsh/scsh-0.6/scsh/rx/packages.scm,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -r1.6 -r1.7
*** packages.scm 2001/07/07 18:50:19 1.6
--- packages.scm 2001/08/09 13:53:18 1.7
***************
*** 4,7 ****
--- 4,48 ----
;;; -Olin <shivers@ai.mit.edu> 8/98
+ ;; From Scheme 48, only here temporarily
+
+ (define-structure external-util (export immutable-copy-string)
+ (open scheme
+ primitives ;copy-bytes!
+ features) ;immutable? make-immutable!
+ (begin
+ (define (immutable-copy-string string)
+ (if (immutable? string)
+ string
+ (let ((copy (copy-string string)))
+ (make-immutable! copy)
+ copy)))
+
+ ; Why isn't this available elsewhere?
+
+ (define (copy-string string)
+ (let* ((length (string-length string))
+ (new (make-string length #\?)))
+ (copy-bytes! string 0 new 0 length)
+ new))))
+
+ (define-interface posix-regexps-interface
+ (export make-regexp
+ (regexp-option :syntax)
+ regexp?
+ regexp-match
+
+ match?
+ match-start
+ match-end
+ match-submatches
+ ))
+
+ (define-structures ((posix-regexps posix-regexps-interface)
+ (posix-regexps-internal (export make-match)))
+ (open scheme define-record-types finite-types external-calls
+ signals
+ external-util)
+ (files regexp))
+
(define-interface basic-re-interface
(export (re-dsm? (proc (:value) :boolean))
***************
*** 129,134 ****
(define re-match-internals-interface
(export (regexp-match:string (proc (:value) :string))
! (regexp-match:start (proc (:value) :vector))
! (regexp-match:end (proc (:value) :vector))))
--- 170,174 ----
(define re-match-internals-interface
(export (regexp-match:string (proc (:value) :string))
! (regexp-match:submatches (proc (:value) :vector))))
***************
*** 168,172 ****
(match:end (proc (:value &opt :exact-integer)
:value))
(match:substring (proc (:value &opt
:exact-integer) :value))
- (clean-up-cres (proc () :unspecific))
(regexp-search (proc (:value :string &opt
:exact-integer)
:value))
--- 208,211 ----
***************
*** 206,209 ****
--- 245,249 ----
external-calls
string-lib ; string-fold
+ posix-regexps
scheme)
***************
*** 291,294 ****
--- 331,335 ----
(open re-level-0
re-match-internals
+ posix-regexps
scsh-utilities ; fold & some string utilities that need to be moved.
scsh-level-0 ; write-string
***************
*** 333,338 ****
;;; re-subst regexp-substitute regexp-substitute/global
;;; re-low match:start match:end match:substring
! ;;; CRE record, new-cre, compile-posix-re->c-struct
! ;;; cre-search cre-search? clean-up-cres
;;; re-syntax sre-form? if-sre-form expand-rx
;;; re.scm The ADT. flush-submatches uncase uncase-char-set
--- 374,379 ----
;;; re-subst regexp-substitute regexp-substitute/global
;;; re-low match:start match:end match:substring
! ;;; CRE record, new-cre
! ;;; cre-search cre-search?
;;; re-syntax sre-form? if-sre-form expand-rx
;;; re.scm The ADT. flush-submatches uncase uncase-char-set
Index: re-low.scm
===================================================================
RCS file: /cvsroot/scsh/scsh-0.6/scsh/rx/re-low.scm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** re-low.scm 2001/01/01 17:19:55 1.5
--- re-low.scm 2001/08/09 13:53:18 1.6
***************
*** 2,37 ****
;;; Copyright (c) 1994 by Olin Shivers.
- (foreign-init-name "re_low")
-
- (foreign-source
- "/* Make sure foreign-function stubs interface to the C funs correctly: */"
- "#include <sys/types.h>"
- "#include \"../regexp/regex.h\""
- "#include \"re1.h\""
- "" ""
- )
-
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record regexp-match
! string ; The string against which we matched
! start ; vector of starting indices
! end) ; vector of ending indices
(define (match:start match . maybe-index)
! (vector-ref (regexp-match:start match)
! (:optional maybe-index 0)))
(define (match:end match . maybe-index)
! (vector-ref (regexp-match:end match)
! (:optional maybe-index 0)))
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
! (start (vector-ref (regexp-match:start match) i)))
! (and start (substring (regexp-match:string match)
! start
! (vector-ref (regexp-match:end match) i)))))
;;; Compiling regexps
--- 2,28 ----
;;; Copyright (c) 1994 by Olin Shivers.
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record regexp-match
! string
! submatches)
(define (match:start match . maybe-index)
! (match-start
! (vector-ref (regexp-match:submatches match)
! (:optional maybe-index 0))))
(define (match:end match . maybe-index)
! (match-start
! (vector-ref (regexp-match:submatches match)
! (:optional maybe-index 0))))
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
! (submatch (vector-ref (regexp-match:submatches match) i)))
! (and submatch (substring (regexp-match:string match)
! (match-start submatch)
! (match-end submatch)))))
;;; Compiling regexps
***************
*** 48,63 ****
; string ; The Posix string form of the regexp or #F.
; max-paren ; Max paren in STRING needed for submatches.
! ; (bytes #f) ; Pointer to the compiled form, in the C heap,
or #F.
! ; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch.
; tvec ; Translation vector for the submatches
; ((disclose self) (list "cre" (cre:string self))))
(define-record-type cre :cre
! (really-make-cre string max-paren bytes bytes/nm tvec debug)
cre?
(string cre:string set-cre:string)
(max-paren cre:max-paren set-cre:max-paren)
! (bytes cre:bytes set-cre:bytes)
! (bytes/nm cre:bytes/nm set-cre:bytes/nm)
(tvec cre:tvec set-cre:tvec)
(debug cre:debug set-cre:debug))
--- 39,54 ----
; string ; The Posix string form of the regexp or #F.
; max-paren ; Max paren in STRING needed for submatches.
! ; (regexp #f) ; Compiled form or #F.
! ; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch.
; tvec ; Translation vector for the submatches
; ((disclose self) (list "cre" (cre:string self))))
(define-record-type cre :cre
! (really-make-cre string max-paren regexp regexp/nm tvec debug)
cre?
(string cre:string set-cre:string)
(max-paren cre:max-paren set-cre:max-paren)
! (regexp cre:regexp set-cre:regexp)
! (regexp/nm cre:regexp/nm set-cre:regexp/nm)
(tvec cre:tvec set-cre:tvec)
(debug cre:debug set-cre:debug))
***************
*** 66,167 ****
(lambda (self) (list "cre" (cre:string self))))
- (define-record-resumer :cre (lambda (cre)
- (set-cre:bytes cre #f)
- (set-cre:bytes/nm cre #f)))
-
(define (make-cre str max-paren tvec)
(really-make-cre str max-paren #f #f tvec #f))
- (define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
-
(define (max-live-posix-submatch tvec)
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
- (define (compile-posix-re->c-struct re-string sm?)
- (let ((maybe-struct (%compile-re re-string sm?)))
- (if (pair? maybe-struct)
- (error (car maybe-struct)
- (%regerror-msg (car maybe-struct) (cdr maybe-struct))
- compile-posix-re->c-struct re-string sm?)
- maybe-struct)))
-
- ;;; returns pointer as number or a pair of error number and 0
- (define-stubless-foreign %compile-re (pattern submatches?) "compile_re")
-
;;; Searching with compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; cre-search returns match info; cre-search? is just a predicate.
(define (cre-search cre start-vec end-vec str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
! (and re-str
! (let* ((C-bytes (or (cre:bytes cre)
! (let ((C-bytes (compile-posix-re->c-struct re-str
#t)))
! (set-cre:bytes cre C-bytes)
! (register-re-c-struct:bytes cre)
! C-bytes)))
! (retcode (%cre-search C-bytes str start
! (cre:tvec cre)
! (cre:max-paren cre)
! start-vec end-vec)))
! (if (integer? retcode)
! (error retcode (%regerror-msg retcode C-bytes)
! cre-search cre start-vec end-vec str start)
! (and retcode (make-regexp-match str start-vec end-vec)))))))
! (define (cre-search? cre str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
! (and re-str
! (let* ((C-bytes (or (cre:bytes/nm cre)
! (let ((C-bytes (compile-posix-re->c-struct re-str
#f)))
! (set-cre:bytes/nm cre C-bytes)
! (register-re-c-struct:bytes/nm cre)
! C-bytes)))
! (retcode (%cre-search C-bytes str start '#() -1 '#() '#())))
! (if (integer? retcode)
! (error retcode (%regerror-msg retcode C-bytes)
! cre-search? cre str start)
! retcode)))))
!
! ; 0 success, #f no-match, or non-zero int error code:
! (define-stubless-foreign %cre-search
! (compiled-regexp str start tvec max-psm svec evec) "re_search")
!
!
!
! ;;; Generate an error msg from an error code.
!
! (define-stubless-foreign %regerror-msg (errcode re) "re_errint2str")
!
!
! ;;; Reclaiming compiled regexp storage
!
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
! (define-stubless-foreign %free-re (re) "free_re")
!
!
! ;;; Whenever we make a new CRE, add the appropriate finalizer,
! ;;; so the C regex_t structure can be freeed
!
! (define (free-bytes the-cre)
! (if (cre:bytes the-cre)
! (%free-re (cre:bytes the-cre))
! (warn "free-bytes called on #f")))
!
! (define (free-bytes/nm the-cre)
! (if (cre:bytes/nm the-cre)
! (%free-re (cre:bytes/nm the-cre))
! (warn "free-bytes/nm called on #f")))
!
! (define (register-re-c-struct:bytes cre)
! (add-finalizer! cre free-bytes))
!
! (define (register-re-c-struct:bytes/nm cre)
! (add-finalizer! cre free-bytes/nm))
!
!
! (define (register-re-c-struct cre c-bytes)
! (error "function register-re-c-struct no longer supported"))
!
! (define (clean-up-cres)
! (warn "function clean-up-cres no longer supported"))
--- 57,96 ----
(lambda (self) (list "cre" (cre:string self))))
(define (make-cre str max-paren tvec)
(really-make-cre str max-paren #f #f tvec #f))
+ (define (new-cre str tvec)
+ (make-cre str (max-live-posix-submatch tvec) tvec))
(define (max-live-posix-submatch tvec)
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
;;; Searching with compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; cre-search returns match info; cre-search? is just a predicate.
+ ;; ### we do not look at START yet
(define (cre-search cre start-vec end-vec str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
! (if (not re-str)
! #f
! (begin
! (if (not (cre:regexp cre))
! (set-cre:regexp cre (make-regexp re-str
! (regexp-option extended)
! (regexp-option submatches))))
! (let ((ret (regexp-match (cre:regexp cre) str #t #f #f start)))
! (if (not ret)
! #f
! (make-regexp-match str
! (list->vector ret))))))))
! (define (cre-search? cre start-vec end-vec str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
! (if (not re-str)
! #f
! (begin
! (if (not (cre:regexp/nm cre))
! (set-cre:regexp/nm cre (make-regexp re-str
! (regexp-option extended))))
! (regexp-match (cre:regexp/nm cre) str #f #f #f)))))
Index: re-subst.scm
===================================================================
RCS file: /cvsroot/scsh/scsh-0.6/scsh/rx/re-subst.scm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** re-subst.scm 1999/09/23 14:27:40 1.1
--- re-subst.scm 2001/08/09 13:53:18 1.2
***************
*** 12,23 ****
(define (regexp-substitute port match . items)
(let* ((str (regexp-match:string match))
! (sv (regexp-match:start match))
! (ev (regexp-match:end match))
(range (lambda (item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
! (values (vector-ref sv item)
! (vector-ref ev item)))
! ((eq? 'pre item) (values 0 (vector-ref sv 0)))
! ((eq? 'post item) (values (vector-ref ev 0)
(string-length str)))
(else (error "Illegal substitution item."
--- 12,26 ----
(define (regexp-substitute port match . items)
(let* ((str (regexp-match:string match))
! (submatches (regexp-match:submatches match))
(range (lambda (item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
! (let ((submatch (vector-ref submatches item)))
! (values (match-start submatch)
! (match-end submatch))))
! ((eq? 'pre item) (values 0
! (match-start
! (vector-ref submatches 0))))
! ((eq? 'post item) (values (match-end
! (vector-ref submatches 0))
(string-length str)))
(else (error "Illegal substitution item."
***************
*** 55,63 ****
(define (regexp-substitute/global port re str . items)
(let ((str-len (string-length str))
! (range (lambda (start sv ev item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
! (values (vector-ref sv item)
! (vector-ref ev item)))
! ((eq? 'pre item) (values start (vector-ref sv 0)))
(else (error "Illegal substitution item."
item
--- 58,70 ----
(define (regexp-substitute/global port re str . items)
(let ((str-len (string-length str))
! (range (lambda (start submatches item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
! (let ((submatch (vector-ref submatches item)))
! (values (match-start submatch)
! (match-end submatch))))
! ((eq? 'pre item)
! (values start
! (match-start
! (vector-ref submatches 0))))
(else (error "Illegal substitution item."
item
***************
*** 74,81 ****
(let ((match (regexp-search re str start)))
(if match
! (let* ((sv (regexp-match:start match))
! (ev (regexp-match:end match))
! (s (vector-ref sv 0))
! (e (vector-ref ev 0))
(empty? (= s e)))
(for-each (lambda (item)
--- 81,87 ----
(let ((match (regexp-search re str start)))
(if match
! (let* ((submatches (regexp-match:submatches match))
! (s (match-start (vector-ref submatches 0)))
! (e (match-end (vector-ref submatches 0)))
(empty? (= s e)))
(for-each (lambda (item)
***************
*** 92,96 ****
(else (receive (si ei)
! (range start sv ev item)
(write-string str port si
ei)))))
items))
--- 98,102 ----
(else (receive (si ei)
! (range start submatches item)
(write-string str port si
ei)))))
items))
***************
*** 100,111 ****
;; Either we're making a string, or >1 POST.
(let* ((pieces (let recur ((start 0))
! (if (> start str-len) '()
(let ((match (regexp-search re str start))
(cached-post #f))
(if match
! (let* ((sv (regexp-match:start match))
! (ev (regexp-match:end match))
! (s (vector-ref sv 0))
! (e (vector-ref ev 0))
(empty? (= s e)))
(fold (lambda (item pieces)
--- 106,118 ----
;; Either we're making a string, or >1 POST.
(let* ((pieces (let recur ((start 0))
! (if (> start str-len)
! '()
(let ((match (regexp-search re str start))
(cached-post #f))
(if match
! (let* ((submatches
! (regexp-match:submatches match))
! (s (match-start (vector-ref
submatches 0)))
! (e (match-end (vector-ref submatches
0)))
(empty? (= s e)))
(fold (lambda (item pieces)
***************
*** 129,133 ****
(else (receive (si ei)
! (range start sv ev
item)
(cons (substring str
si ei)
pieces)))))
--- 136,140 ----
(else (receive (si ei)
! (range start
submatches item)
(cons (substring str
si ei)
pieces)))))
|