Gmane
Gravatar
From: Andreas Fuchs <asf <at> boinkor.net>
Subject: Re: Lightweight IPC for scripting
Newsgroups: gmane.lisp.clump
Date: 2003-09-09 15:43:36 GMT (5 years, 42 weeks, 4 days, 17 hours and 55 minutes ago)
(resend, as I wasn't subscribed to the list when I tried to post through
gmane. Sorry for the inconvenience)

On 2003-09-08, Luke Gorrie <luke <at> bluetail.com> wrote:
> Joe Armstrong also did yet another IPC protocol. I would summarise it
> as "An XML clone that would not offend Lisp programmers". It has a lot
> in common with sexps, though the encoded format doesn't look like one.
> http://www.sics.se/~joe/ubf/site/home.html if you're interested.

Thanks for the pointer. I read that once, thought about how easy it
would be to write a parser for UBF(a) in lisp and then forgot about
it. Did that now, though.

UBF /is/ a very nice format for data exchange (IMHO it's even nicer than
SEXP, for binary data). I hope to write a UBF(A) generator too, soon.

The attached code is a 5-hour hack and my first "serious" parser for
something. if it doesn't adhere to the spec or is buggy or butt-ugly
(pretty likely) or just wrong, please tell me.

Sometime in the next few days, I will put up an arch archive of the
sources. For now, this source file must be enough:
;; arch-tag: 7b6d2428-e297-11d7-ad53-000c76244c24

;; Written 2003-09-09 by Andreas Fuchs <asf <at> boinkor.net>, to celebrate
;; the end of the SOBIG.F crisis
;;
;; This code is in the public domain. No warranty. Do with this code
;; what you want, but please don't sue me for it.
;;
;; This file is an implementation of a UBF(A) (as specified by
;; http://www.sics.se/~joe/ubf/site/ubfa.html) reader. I have tried to
;; make it as correct, tidy and efficient as possible. If it isn't, do
;; something about it. (-:
;;
;; Mapping of UBF data types into lisp types:
;;
;; * integers map to CL integers
;; * strings map to strings
;; * & map to CONS pairs
;; * # maps to nil
;; * 'constants' map to (case-sensitive) symbols in the UBF.A-CONSTANT
;;   package
;; * `type tags` put the last object on the stack into the OBJECT
;;   slot of a new TAGGED-OBJECT structure, intern the type in the
;;   UBF.A-TYPE package and put the symbol into the TYPE slot
;; * { structures } map to lisp vectors.
;; * 11,~binary data~ maps to the type UBF.A::BINARY-DATA. Data
;;   itself is stored in a (vector (unsigned-byte 8)). In fact,
;;   BINARY-DATA is a deftype for (vector (unsigned-byte 8)). Be
;;   careful not to confuse them with { structures }. Test for
;;   BINARY-DATAness first.
;; * register values are not exported as I think they are internal to
;;   the UBF virtual machine (-:
;;
;; TODO:
;; 
;; * nicer exception handling than (assert)
;; * auditing for recognition errors (this /is/ a one-night hack, after all)
;; * auditing for security errors
;; * implementation of a UBF(A) serializer. This code reads data from
;;   a stream only.

(defpackage #:ubf.a
  (:use #:cl)
  (:export #:tagged-object #:binary-data #:read-message #:read-token #:reset-ubf-state))

(defpackage #:ubf.a-constant)		; constants ('') are interned here
(defpackage #:ubf.a-type)    		; type tags (``) are interned here

(in-package #:ubf.a)

(defvar *recognition-stack* nil
  "The top of the recognition stack.")

(defvar *structure-stack* nil
  "Top of the (unspecified) structure stack.")

(defvar *registers* (make-hash-table :test 'eql)
  "Registers and their values")

(defconstant +control-chars+ "'\"~%-0123456789{}#&$>")
(defconstant +whitespace-chars+ '(#\Space #\Newline #\Return #\Tab #\,))

(defstruct tagged-object
  object
  type)

(deftype binary-data (&optional size)
  `(simple-vector (unsigned-byte 8) ,size)) ; Type of 11,~binary data~

(defun read-until-char (delim-char stream)
  "Read from STREAM until you hit DELIM-CHAR and consume the
  DELIM-CHAR."
  (declare (type delim-char simple-char))
  (prog1
      (read-until (lambda (c) (eql delim-char c))
		  stream)
    (read-char stream)))

(defun read-until (predicate stream)
  "Read from STREAM until PREDICATE returns non-NIL. The last
  character read (i.e. the one that triggered PREDICATE) is made
  unread again."
  (coerce (loop for c = (read-char stream)
	     until (funcall predicate c)
	     collect c
	     finally (unread-char c stream))
	  'simple-string))

(defun push-recognized (elt)
  "Push ELT into the recognition stack and increment the current
  structure count as necessary."
  (push elt *recognition-stack*)
  (when *structure-stack*
    (incf (car *structure-stack*)))
  elt)

(defun pop-recognized ()
  "Pop the recognition stack, decrement structure counts as necessary
  and return the element popped."
  (prog1
      (pop *recognition-stack*)
    (when *structure-stack*
      (decf (car *structure-stack*)))))

(defun digitp (char)
  (and (>= (char-code char) (char-code #\0))
       (<= (char-code char) (char-code #\9))))

(defun tag-object (object type)
  "Make a tagged object and return it. TYPE gets interned
  case-insensitively in the UBF.A-TYPE package."
  (make-tagged-object :object object :type (intern type "UBF.A-TYPE")))

(defun read-token (&optional (stream *standard-input*))
  "Read a UBF token from STREAM and put it on the recognition
  stack. May pop from the stack too, e.g. when reading. "
  (let ((c (read-char stream))
	(end-p nil))
    (loop while (position c +whitespace-chars+)
	  do (setf c (read-char stream)))
    (values
     (cond
       ((eql c #\#)
	(push-recognized nil))
       ((eql c #\&)
	(push-recognized
	 (cons (pop-recognized) (pop-recognized))))
       ((eql c #\')
	(push-recognized
	 (intern (read-until-char #\' stream)
		 "UBF.A-CONSTANT")))
       ((eql c #\`)
	(push-recognized
	 (tag-object (pop-recognized)
		     (read-until-char #\` stream))))
       ((eql c #\")
	(push-recognized (read-until-char #\" stream)))
       ((eql c #\%)
	(read-until-char #\% stream)
	(read-token stream))
       ((eql c #\$)
	(assert (= 1 (length *recognition-stack*)))
	(setf end-p t)
	(pop-recognized))
       ((eql c #\-)
	(read-token stream)
	(push-recognized (- 0 (pop-recognized))))
       ((eql c #\~)
	(let* ((length (first *recognition-stack*))
	       (sequence (make-array length :initial-element 0
				     :element-type '(unsigned-byte 8))))
	  (assert (and (integerp length)
		       (> length 0)))
	  (pop-recognized)
	  (assert (= (read-sequence sequence stream) length))
	  (assert (eql (read-char stream) #\~))
	  (push-recognized sequence)))
       ((eql c #\>)
	(assert (not (position (peek-char nil stream) +control-chars+)))
	(setf (gethash (read-char stream) *registers*) (pop-recognized)))
       ((eql c #\{)
	(push 0 *structure-stack*))
       ((eql c #\})
	(let ((struct-idx (pop *structure-stack*))
	      (rs-copy *recognition-stack*))
	  (setf *recognition-stack* (subseq rs-copy struct-idx))
	  (push-recognized
	   (make-array struct-idx :initial-contents
		       (subseq rs-copy 0 struct-idx)))))
       ((digitp c)
	(unread-char c)
	(push-recognized (parse-integer (read-until (lambda (c) (not (as-digit c)))
						    stream))))
       ((not (position c +control-chars+))
	(multiple-value-bind (val register-bound) (gethash c *registers*)
	  (assert register-bound)
	  (push-recognized val))))
     end-p)))

(defun read-message (stream)
  "Reset the UBF state and read an entire $-terminated message from
  STREAM."
  (reset-ubf-state)
  (loop
     (multiple-value-bind (val end) 
	 (read-token *standard-input*)
       ;; (format t "~A~%" val)
       (when end
	 (return val)))))

(defun reset-ubf-state ()
  "Reset the UBF state: clear registers, empty the recognition stack."
  (setf *registers* (make-hash-table :test 'eql))
  (setf *structure-stack nil)
  (setf *recognition-stack nil))

Have fun,
-- 
Andreas Fuchs, <asf <at> acm.org>, asf <at> jabber.at, antifuchs
_______________________________________________
Clump mailing list
Clump <at> manly.caddr.com
http://manly.caddr.com/mailman/listinfo/clump