scsh-users
[Top] [All Lists]

Re: SCSH status and future

To: scsh-users@scsh.net
Subject: Re: SCSH status and future
From: Daniel Hagerty <hag@linnaean.org>
Date: Mon, 20 Feb 2006 05:55:50 +0100 (MET)
List-id: <scsh-users.list-id.scsh.net>
Reply-to: Daniel Hagerty <hag@linnaean.org>
Sender: Daniel Hagerty <hag@linnaean.org>
 > I had already removed features to work around the lack of support of
 > large files (64 bit C off_t type) in scsh. These features are now back
 > in with the switch to guile.

    Try the attached patch and test program.  It works for me on a
system with 64 bit off_t.  It's not terribly tested.

    At a minimum, it should be validated on:
* Big endian machines
* Machines with 32 bit off_t (should work, but not tested).

    and I don't know exactly what linux does for 32 bit machines
talking to large file support;  I assume that's #define
LARGEFILE_SUPPORT or some such, and you get a 64 bit off_t.

    I'm not testing this further, as I've had two computers die on me
in the last 4 hours, and I'm a little fed up with planet earth.

    Caveats and known areas of improvement:
* There's some scheme code that should be checking for bounds errors
and isn't (marked XXX).  This should be addressed.
* I figure out the size of off_t by calling out to C for every routine
that needs to know.  That's strikes me as a little goofy, and should
probably be detected at autoconf time and smashed into configure.scm
or some such.
* I didn't do anything like an exaustive search for off_t users and
fix them all up;  this should give you the basic idea of what to do.

    Hopefully this gives you something to work with.

diff -urwB scsh-0.6.6/scsh/syscalls.scm scsh-0.6.6-largefile/scsh/syscalls.scm
--- scsh-0.6.6/scsh/syscalls.scm        2003-11-12 06:07:29.000000000 -0500
+++ scsh-0.6.6-largefile/scsh/syscalls.scm      2006-02-19 23:31:04.000000000 
-0500
@@ -272,9 +272,11 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; STAT

-(import-os-error-syscall stat-file (path data chase?) "scheme_stat")
+(import-os-error-syscall stat-file (path data bytevec chase?) "scheme_stat")

-(import-os-error-syscall stat-fdes (fd data) "scheme_fstat")
+(import-os-error-syscall stat-fdes (fd data bytevec) "scheme_fstat")
+
+(import-os-error-syscall %sizeof-offt () "scsh_sizeof_offt")

 (define-record file-info
   type
@@ -294,15 +296,16 @@
 ;;; Should be redone to return multiple-values.
 (define (%file-info fd/port/fname chase?)
   (let ((ans-vec (make-vector 11))
+       (offt-bytevec (make-byte-vector (%sizeof-offt) 0))
        (file-type (lambda (type-code)
                     (vector-ref '#(block-special char-special directory fifo
                                                  regular socket symlink)
                                 type-code))))
     (generic-file-op fd/port/fname
                     (lambda (fd)
-                      (stat-fdes fd ans-vec))
+                      (stat-fdes fd ans-vec offt-bytevec))
                     (lambda (fname)
-                      (stat-file fname ans-vec chase?)))
+                      (stat-file fname ans-vec offt-bytevec chase?)))
     (make-file-info (file-type (vector-ref ans-vec 0))
                    (vector-ref ans-vec 1)
                    (vector-ref ans-vec 2)
@@ -310,7 +313,7 @@
                    (vector-ref ans-vec 4)
                    (vector-ref ans-vec 5)
                    (vector-ref ans-vec 6)
-                   (vector-ref ans-vec 7)
+                   (byte-vector->offset (vector-ref ans-vec 7))
                    (vector-ref ans-vec 8)
                    (vector-ref ans-vec 9)
                    (vector-ref ans-vec 10))))
@@ -383,11 +384,11 @@
       (error "Seek does currently not work on buffered ports" fd/port))
   (let ((whence (:optional maybe-whence seek/set))
        (fd (if (integer? fd/port) fd/port (port->fdes fd/port))))
-    (%fd-seek fd offset whence)))
+    (%fd-seek fd (offset->byte-vector offset) whence)))

 (define (tell fd/port)
   (let ((fd (if (integer? fd/port) fd/port (port->fdes fd/port))))
-    (%fd-seek fd 0 seek/delta)))
+    (%fd-seek fd (offset->byte-vector 0) seek/delta)))

 (import-os-error-syscall %char-ready-fdes? (fd) "char_ready_fdes")

@@ -719,6 +720,41 @@
   (sleazy-call/fdes fd/port
     (lambda (fd) (%fcntl-write fd fcntl/set-status-flags flags))))

+;;; Support routines for off_t as byte-vectors
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; off_t might be 64 bits, which the scheme48 FFI has no native means
+;; for handling.  We use little endian encoded byte vectors to pass
+;; the data back and forth to the FFI.
+(define (offset->byte-vector offset)
+  (let* ((sz (%sizeof-offt))
+        (vec (make-byte-vector sz 0)))
+    ;; XXX need to range check the offset and throw an exception if
+    ;; its too large for the system's off_t.
+    (let next-byte ((i 0))
+      (if (= i sz)
+         vec
+         (let ((byte (bitwise-and
+                      #xFF
+                      (arithmetic-shift offset
+                                        (* -8 i)))))
+           (byte-vector-set! vec i byte)
+           (next-byte (+ i 1)))))))
+
+(define (byte-vector->offset bytevec)
+  ;; XXX validate that the bytevector is the correct size to represent
+  ;; an off_t and raise an error otherwise.
+  (let ((sz (%sizeof-offt)))
+    (let next-byte ((i 0)
+                   (offset 0))
+      (if (= i sz)
+         offset
+         (next-byte (+ i 1)
+                    (+ offset
+                       (arithmetic-shift
+                        (byte-vector-ref bytevec i)
+                        (* 8 i))))))))
+
 ;;; Miscellaneous
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

diff -urwB scsh-0.6.6/scsh/syscalls1.c scsh-0.6.6-largefile/scsh/syscalls1.c
--- scsh-0.6.6/scsh/syscalls1.c 2004-03-11 10:54:06.000000000 -0500
+++ scsh-0.6.6-largefile/scsh/syscalls1.c       2006-02-19 23:13:07.000000000 
-0500
@@ -555,12 +555,12 @@
 */

 /* Internal aux function -- loads stat values into Scheme vector: */
-s48_value really_stat(struct stat *s, s48_value vec)
+s48_value really_stat(struct stat *s, s48_value vec, s48_value bytevec)
 {
   int modes, typecode = -1;
-  S48_DECLARE_GC_PROTECT(1);
+  S48_DECLARE_GC_PROTECT(2);

-  S48_GC_PROTECT_1(vec);
+  S48_GC_PROTECT_2(vec, bytevec);

   modes = s->st_mode;
   if( S_ISBLK(modes) )       typecode = 0;
@@ -571,6 +571,8 @@
   else if( S_ISSOCK(modes) ) typecode = 5;
   else if( S_ISLNK(modes) )  typecode = 6;

+  offt_to_bytevec(s->st_size, bytevec);
+
   S48_VECTOR_SET(vec,0,s48_enter_fixnum(typecode));
   S48_VECTOR_SET(vec,1, s48_enter_integer(s->st_dev));
   S48_VECTOR_SET(vec,2, s48_enter_integer(s->st_ino));
@@ -578,7 +580,7 @@
   S48_VECTOR_SET(vec,4, s48_enter_integer(s->st_nlink));
   S48_VECTOR_SET(vec,5, s48_enter_integer(s->st_uid));
   S48_VECTOR_SET(vec,6, s48_enter_integer(s->st_gid));
-  S48_VECTOR_SET(vec,7, s48_enter_integer(s->st_size));
+  S48_VECTOR_SET(vec,7, bytevec);
   S48_VECTOR_SET(vec,8, s48_enter_integer(s->st_atime));
   S48_VECTOR_SET(vec,9, s48_enter_integer(s->st_mtime));
   S48_VECTOR_SET(vec,10, s48_enter_integer(s->st_ctime));
@@ -590,7 +592,7 @@
   return S48_UNSPECIFIC;
 }

-s48_value scheme_stat(s48_value path, s48_value vec, s48_value chase_p)
+s48_value scheme_stat(s48_value path, s48_value vec, s48_value bytevec, 
s48_value chase_p)
 {
   struct stat s;
   const char * cp_path = s48_extract_string(path);
@@ -598,15 +600,15 @@

   if (retval == -1) s48_raise_os_error_2 (errno, path, chase_p);

-  return really_stat (&s, vec);
+  return really_stat (&s, vec, bytevec);
 }

-s48_value scheme_fstat(s48_value fd, s48_value vec)
+s48_value scheme_fstat(s48_value fd, s48_value vec, s48_value bytevec)
 {
   struct stat s;
   int retval = fstat (s48_extract_fixnum (fd), &s);
   if (retval == -1) s48_raise_os_error_1 (errno, fd);
-  return really_stat (&s, vec);
+  return really_stat (&s, vec, bytevec);
 }

 s48_value scsh_symlink(s48_value sch_name1, s48_value sch_name2)
@@ -706,12 +708,16 @@
                     s48_value sch_whence)
 {
   int retval;
+  off_t offset = 0;
+
   S48_DECLARE_GC_PROTECT(3);

   S48_GC_PROTECT_3(sch_fdes, sch_offset, sch_whence);

+  offset = bytevec_to_offt(sch_offset);
+
   retval = lseek (s48_extract_fixnum (sch_fdes),
-                 s48_extract_integer (sch_offset),
+                 offset,
                  s48_extract_fixnum (sch_whence));

   S48_GC_UNPROTECT();
@@ -1140,6 +1146,58 @@
   return uname_record;
 }

+/* Support for off_t expressed in byte vectors
+******************
+*/
+
+s48_value scsh_sizeof_offt()
+{
+  /* This is a goofy way of accomplishing this since off_t's size is
+     fixed at compile time.  Make part of config.scm?  */
+  return(s48_enter_integer(sizeof(off_t)));
+}
+
+/* It's expected that sch_bytevec is already gc protected */
+off_t bytevec_to_offt(s48_value sch_bytevec) {
+  char *bytevec;
+  int i;
+  const size_t sz = sizeof(off_t);
+  off_t retval = 0;
+
+  if(S48_BYTE_VECTOR_LENGTH(sch_bytevec) != sz)
+    /* From my reading, raising an exception doesn't need to call
+       GC_UNPROTECT functions.  This is based on the extract functions
+       raising errors without doign so.  */
+    s48_raise_argument_type_error(sch_bytevec);
+
+  bytevec = s48_extract_byte_vector(sch_bytevec);
+
+  /* This implies a little endian encoding */
+  for(i = 0; i < sz; i++) {
+    retval |= ((off_t)bytevec[i]) << (i * 8);
+#if 0
+    printf("0x%016qx, %02x, %d, %d\n", retval, bytevec[i], i, (i * 8));
+#endif
+  }
+
+  return(retval);
+}
+
+/* It is expected that sch_bytevec is already gc protected */
+void offt_to_bytevec(off_t offset, s48_value sch_bytevec) {
+  const int sz = sizeof(off_t);
+  char *bytevec;
+  int i;
+
+  if(S48_BYTE_VECTOR_LENGTH(sch_bytevec) != sz)
+    s48_raise_argument_type_error(sch_bytevec);
+
+  bytevec = s48_extract_byte_vector(sch_bytevec);
+
+  for(i = 0; i < sz; i++)
+    bytevec[i] = (offset >> (8 * i)) & 0xff;
+}
+
 void s48_init_syscalls (){
   S48_EXPORT_FUNCTION(scheme_exec);
   S48_EXPORT_FUNCTION(scsh_exit);
@@ -1206,6 +1264,9 @@
   S48_EXPORT_FUNCTION(errno_msg);
   S48_EXPORT_FUNCTION(scm_crypt);
   S48_EXPORT_FUNCTION(scm_uname);
+
+  S48_EXPORT_FUNCTION(scsh_sizeof_offt);
+
   S48_GC_PROTECT_GLOBAL(envvec_record_type_binding);
   S48_GC_PROTECT_GLOBAL(add_envvec_finalizerB_binding);
   S48_GC_PROTECT_GLOBAL(current_env);
diff -urwB scsh-0.6.6/scsh/syscalls1.h scsh-0.6.6-largefile/scsh/syscalls1.h
--- scsh-0.6.6/scsh/syscalls1.h 2001-09-07 12:05:31.000000000 -0400
+++ scsh-0.6.6-largefile/scsh/syscalls1.h       2006-02-19 22:44:37.000000000 
-0500
@@ -87,9 +87,9 @@

 int write_stream_substring(s48_value buf, int start, int end, FILE *f);

-s48_value scheme_stat(s48_value path, s48_value vec, s48_value chase_p);
+s48_value scheme_stat(s48_value path, s48_value vec, s48_value bytevec, 
s48_value chase_p);

-s48_value scheme_fstat(s48_value fd, s48_value vec);
+s48_value scheme_fstat(s48_value fd, s48_value vec, s48_value bytevec);

 s48_value scsh_getgid();

@@ -146,3 +146,9 @@
 s48_value scm_closelog();

 s48_value sleep_until();
+
+/* New support for off_t greater than 32 bits.  Use bytevecs */
+s48_value scsh_sizeof_offt();
+
+off_t bytevec_to_offt(s48_value sch_bytevec);
+void offt_to_bytevec(off_t offset, s48_value sch_bytevec);
(define file-name "tstfile")
(define file (open-output-file file-name))
(set-port-buffering file bufpol/none)

(define bigfile-seek (* 8 1024 1024 1024))
(define test-string "flobby")
(define bigfile-size (+ bigfile-seek (string-length test-string)))

(seek file bigfile-seek  seek/set)
(write-string test-string file)
(close file)

(define actual-size (file-info:size (file-info file-name)))

(format (current-output-port)
        "Expected ~a Actual ~a" bigfile-size actual-size)
(newline)
(format (current-output-port)
        "Matches? ~a" (= bigfile-size actual-size))
(newline)
<Prev in Thread] Current Thread [Next in Thread>