scsh-checkins
[Top] [All Lists]

[Scsh-checkins] CVS: scsh-0.6/scsh/rx regexp.scm,NONE,1.1 regexp1.c,NONE

To: scsh-checkins@lists.sourceforge.net
Subject: [Scsh-checkins] CVS: scsh-0.6/scsh/rx regexp.scm,NONE,1.1 regexp1.c,NONE,1.1
From: Mike Sperber <sperber@users.sourceforge.net>
Date: Thu, 09 Aug 2001 06:50:34 -0700
List-id: <scsh-checkins.lists.sourceforge.net>
Sender: scsh-checkins-admin@lists.sourceforge.net
Update of /cvsroot/scsh/scsh-0.6/scsh/rx
In directory usw-pr-cvs1:/tmp/cvs-serv27794/scsh/rx

Added Files:
        regexp.scm regexp1.c 
Log Message:
>From Scheme 48 0.57, extended by an optional START parameter for
REGEXP-MATCH.


--- NEW FILE ---
; Copyright (c) 1993-2001 by Richard Kelsey and Jonathan Rees. See file COPYING.

; Exports:
;   make-regexp
;   regexp?
;   regexp-match
;   regexp-match?
;   regexp-match-start
;   regexp-match-end
;   regexp-option

; The compiled version of the expression is produced when needed.

(define-record-type regexp :regexp
  (really-make-regexp pattern compiled
                      extended? ignore-case? submatches? newline?)
  regexp?
  (pattern regexp-pattern)                             ; immutable string
  (compiled real-regexp-compiled set-regexp-compiled!) ; #f or a c-record
  (extended?    regexp-extended?)                      ; four flags
  (ignore-case? regexp-ignore-case?)
  (submatches?  regexp-submatches?)
  (newline?     regexp-newline?))

; Drop the compiled version when resuming.  We may be resuming on a different
; architecture, or version of the library, or whatever.

(define-record-resumer :regexp
  (lambda (regexp)
    (set-regexp-compiled! regexp #f)))

; There are four options when making a regular expression.

(define-enumerated-type regexp-option :regexp-option
  regexp-option?
  regexp-options
  regexp-option-name
  regexp-option-index
  (extended ignore-case submatches newline))

; Loop down finding which options are present and checking for duplicates.
; This is not specific to regular expressions.
;
; It would be nice if this could handle values as well, as in
; (make-regexp "sldkjf" (regexp-option size 10))

(define (decode-boolean-options options all-options predicate indexer)
  (let ((map (make-vector (vector-length all-options) #f)))
    (let loop ((options options))
      (if (null? options)
          (vector->list map)
          (let ((option (car options)))
            (if (predicate option)
                (let ((index (indexer option)))
                  (if (vector-ref map index)
                      'duplicates
                      (begin
                        (vector-set! map index #t)
                        (loop (cdr options)))))
                'bad-value))))))

; The only thing we do here is to decode the options and make sure that the
; pattern is immutable, as it won't be used until later.

(define (make-regexp pattern . options)
  (let ((options (decode-boolean-options options
                                         regexp-options
                                         regexp-option?
                                         regexp-option-index)))
    (if (and (string? pattern)
             (pair? options))
        (let* ((pattern (immutable-copy-string pattern))
               (regexp (apply really-make-regexp pattern #f options)))
          (add-finalizer! regexp free-compiled-regexp)
          regexp)
        (apply call-error "invalid argument(s)"
                          make-regexp
                          pattern
                          options))))

; Free up the C-heap storage used for the compiled regexp.

(define (free-compiled-regexp regexp)
  (let ((compiled (real-regexp-compiled regexp)))
    (if compiled
        (call-imported-binding posix-free-regexp compiled))))

; We compile the pattern if that hasn't already been done, raising an error
; if anything goes wrong.

(define (regexp-compiled regexp)
  (or (real-regexp-compiled regexp)
      (let ((compiled (call-imported-binding posix-compile-regexp
                                             (regexp-pattern regexp)
                                             (regexp-extended? regexp)
                                             (regexp-ignore-case? regexp)
                                             (regexp-submatches? regexp)
                                             (regexp-newline? regexp))))
        (if (not (integer? compiled))
            (begin
              (set-regexp-compiled! regexp compiled)
              compiled)
            (let ((message (call-imported-binding posix-regexp-error-message
                                                  (regexp-pattern regexp)
                                                  (regexp-extended? regexp)
                                                  (regexp-ignore-case? regexp)
                                                  (regexp-submatches? regexp)
                                                  (regexp-newline? regexp))))
              (error (if message
                         (string-append "Posix regexp: " message)
                         "inconsistent results from Posix regexp compiler")
                     regexp))))))

; Call the pattern matcher.  We return #F if the match fails.  On a successful
; match, we either return #T or a list of match records, depending on the value
; of SUBMATCHES?.

(define (regexp-match regexp string submatches? starts-line? ends-line? . rest)
  (if (and (regexp? regexp)
           (string? string))
      (let ((start (if (null? rest)
                       0
                       (car rest))))
        (call-imported-binding posix-regexp-match
                               (regexp-compiled regexp)
                               string
                               submatches?
                               starts-line?
                               ends-line?
                               start))
      (call-error "invalid argument"
                  regexp-match
                  regexp string starts-line? ends-line?)))
  
; These are made by the C code.  The SUBMATCHES field is not used by us,
; but is used by the functional interface.

(define-record-type match :match
  (make-match start end submatches)
  match?
  (start match-start)
  (end match-end)
  (submatches match-submatches))

(define-record-discloser :match
  (lambda (rem)
    (list 'match
          (match-start rem)
          (match-end rem))))

(define-exported-binding "posix-regexp-match-type" :match)

; The various C functions we use.

(import-definition posix-compile-regexp)
(import-definition posix-regexp-match)
(import-definition posix-regexp-error-message)
(import-definition posix-free-regexp)


--- NEW FILE ---
/* Copyright (c) 1993-2000 by Richard Kelsey and Jonathan Rees.
   See file COPYING. */

/*
 * Scheme 48/POSIX regex interface
 */

#include <sys/types.h>
#include <regex.h> /* POSIX.2 */
#include <stdlib.h>
#include <unistd.h>

#include "scheme48.h"

extern void             s48_init_posix_regex(void);
static s48_value        posix_compile_regexp(s48_value pattern,
                                             s48_value extended_p,
                                             s48_value ignore_case_p,
                                             s48_value submatches_p,
                                             s48_value newline_p),
                        posix_regexp_match(s48_value sch_regex,
                                           s48_value string,
                                           s48_value submatches_p,
                                           s48_value bol_p,
                                           s48_value eol_p,
                                           s48_value sch_start),
                        posix_regexp_error_message(s48_value pattern,
                                             s48_value extended_p,
                                             s48_value ignore_case_p,
                                             s48_value submatches_p,
                                             s48_value newline_p),
                        posix_free_regexp(s48_value sch_regex);

/*
 * Record type imported from Scheme.
 */
static s48_value        posix_regexp_match_type_binding = S48_FALSE;

/*
 * Install all exported functions in Scheme48.
 */
void
s48_init_posix_regexp(void)
{
  /* Export our stuff. */
  S48_EXPORT_FUNCTION(posix_compile_regexp);
  S48_EXPORT_FUNCTION(posix_regexp_match);
  S48_EXPORT_FUNCTION(posix_regexp_error_message);
  S48_EXPORT_FUNCTION(posix_free_regexp);

  /* Protect and import the regex-match type. */
  S48_GC_PROTECT_GLOBAL(posix_regexp_match_type_binding);
  posix_regexp_match_type_binding =
    s48_get_imported_binding("posix-regexp-match-type");
}

/*
 * Interface to regcomp.  We encode the flags, make the return value, and
 * then call regcomp() to fill it in.
 */
static s48_value
posix_compile_regexp(s48_value pattern,
                     s48_value extended_p, s48_value ignore_case_p,
                     s48_value submatches_p, s48_value newline_p)
{
  s48_value sch_regex;
  int status;
  S48_DECLARE_GC_PROTECT(1);
  int flags = S48_EXTRACT_BOOLEAN(extended_p)    ? REG_EXTENDED : 0 |
              S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE    : 0 |
              S48_EXTRACT_BOOLEAN(submatches_p)  ? 0 : REG_NOSUB |
              S48_EXTRACT_BOOLEAN(newline_p)     ? REG_NEWLINE  : 0;

  S48_GC_PROTECT_1(pattern);

  S48_CHECK_STRING(pattern);

  sch_regex = S48_MAKE_VALUE(regex_t);

  status = regcomp(S48_UNSAFE_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
                   S48_UNSAFE_EXTRACT_STRING(pattern),
                   flags);

  S48_GC_UNPROTECT();

  if (status == 0)
    return sch_regex;
  else
    return S48_UNSAFE_ENTER_FIXNUM(status);   /* not that it can do them much 
good */
}

/*
 * Interface to regexec.
 *
 * Returns #f if there is no match, #t if there is a match and submatches_p
 * is false, and a list of regex-match records otherwise.
 *
 * Most of this is making the buffer for the match structs and then translating
 * them into Scheme match records.
 */
static s48_value
posix_regexp_match(s48_value sch_regex, s48_value string,
                   s48_value submatches_p,
                   s48_value bol_p, s48_value eol_p,
                   s48_value sch_start)
{
  int status;
  s48_value result;
  int start, len;
  /* re_nsub doesn't include the full pattern */
  size_t nmatch   = 1 + S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)->re_nsub;
  regmatch_t *pmatch,
             pmatch_buffer[32];
  int flags = S48_EXTRACT_BOOLEAN(bol_p) ? 0 : REG_NOTBOL |
              S48_EXTRACT_BOOLEAN(eol_p) ? 0 : REG_NOTEOL;

  start = s48_extract_fixnum(sch_start);
  len = S48_STRING_LENGTH(string);
  if ((start < 0) || (start > len))
    s48_raise_range_error(sch_start,
                          s48_enter_fixnum(0), 
                          s48_enter_fixnum(len));

  if (nmatch <= 32)
    pmatch = pmatch_buffer;
  else {
    pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
    if (pmatch == NULL)
      s48_raise_out_of_memory_error(); }
    
  status = regexec(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
                   S48_UNSAFE_EXTRACT_STRING(string) + start,
                   nmatch, pmatch, flags);

  if (status == REG_NOMATCH)
    result = S48_FALSE;
  else if (! S48_EXTRACT_BOOLEAN(submatches_p))
    result = S48_TRUE;
  else {
    s48_value match = S48_FALSE;
    s48_value matches = S48_NULL;
    int i;
    S48_DECLARE_GC_PROTECT(2);
  
    S48_GC_PROTECT_2(match, matches);
    
    for(i = nmatch - 1; i > -1; i--) {
      if (pmatch[i].rm_so == -1)
        match = S48_FALSE;
      else {
        match = s48_make_record(posix_regexp_match_type_binding);
        S48_UNSAFE_RECORD_SET(match, 0,
                              s48_enter_fixnum(pmatch[i].rm_so + start));
        S48_UNSAFE_RECORD_SET(match, 1,
                              s48_enter_fixnum(pmatch[i].rm_eo + start));
        S48_UNSAFE_RECORD_SET(match, 2, S48_FALSE); }  /* submatches */
      matches = s48_cons(match, matches); }
    
    S48_GC_UNPROTECT();

    result = matches; }

  if (nmatch > 32)
    free(pmatch);
  
  return result;
}

/*
 * Interface to regcomp.
 *
 * This takes the same arguments as `compile_regexp' but returns the error
 * message, if any, that `regcomp()' returns.  For some reason `regerror()'
 * requires both the status code and the compiled pattern buffer returned
 * by `regcomp()'.  `compile_regexp' only returned the status so we have to
 * redo the compilation.
 *
 */
static s48_value
posix_regexp_error_message(s48_value pattern,
                           s48_value extended_p, s48_value ignore_case_p,
                           s48_value submatches_p, s48_value newline_p)
{
  regex_t compiled_regex;
  int status;
  int flags = S48_EXTRACT_BOOLEAN(extended_p)    ? REG_EXTENDED : 0 |
              S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE    : 0 |
              S48_EXTRACT_BOOLEAN(submatches_p)  ? 0 : REG_NOSUB |
              S48_EXTRACT_BOOLEAN(newline_p)     ? REG_NEWLINE  : 0;

  S48_CHECK_STRING(pattern);

  status = regcomp(&compiled_regex, S48_UNSAFE_EXTRACT_STRING(pattern), flags);

  if (status == 0)
    return S48_FALSE;
  else {
    size_t buffer_size;
    s48_value buffer;
    
    buffer_size = regerror(status, &compiled_regex, NULL, 0);
    /* For string lengths C counts the nul, Scheme doesn't. */
    buffer = s48_make_string(buffer_size - 1, ' ');
    regerror(status,
             &compiled_regex,
             S48_UNSAFE_EXTRACT_STRING(buffer),
             buffer_size);
    
    return buffer; }
}

/*
 * Stub for regfree().
 */

static s48_value
posix_free_regexp(s48_value sch_regex)
{
  regfree(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t));

  return S48_UNSPECIFIC;
}



<Prev in Thread] Current Thread [Next in Thread>
  • [Scsh-checkins] CVS: scsh-0.6/scsh/rx regexp.scm,NONE,1.1 regexp1.c,NONE,1.1, Mike Sperber <=