;; DOCUMENTATION SOURCES: ;; http://dwarfstd.org/Dwarf3.pdf ;; http://www.x86-64.org/documentation/abi.pdf ;; GCC source code: gcc/dwarf2out.c gcc/dwarf.h gcc/unwind-dw2* ;; This emits an eh_frame structure. ;; It would be better to go through the assembler, here, i think, but ;; for this proof of concept, this is easier. (defvar *out-vector*) (defun make-byte-vector () (make-array 0 :element-type '(unsigned-byte 8) :adjustable t :fill-pointer t)) (defun emit-uleb128 (value) (declare (type (unsigned-byte 128) value)) (tagbody loop (let ((byte (logand value #x7F))) (setf value (ash value -7)) (if (= value 0) (vector-push-extend byte *out-vector*) (progn (vector-push-extend (logior byte #x80) *out-vector*) (go loop)))))) (defun emit-sleb128 (value) (declare (type (signed-byte 128) value)) (tagbody loop (let ((byte (logand value #x7F))) (setf value (ash value -7)) ;; check if done (if (or (and (zerop value) (zerop (logand byte #x40))) (and (= value -1) (/= (logand byte #x40) 0))) (vector-push-extend byte *out-vector*) (progn (vector-push-extend (logior byte #x80) *out-vector*) (go loop)))))) (defun emit-byte (value) (declare (type (unsigned-byte 8) value)) (vector-push-extend value *out-vector*)) (defun emit-word (value) (declare (type (unsigned-byte 16) value)) (vector-push-extend (ldb (byte 8 0) value) *out-vector*) (vector-push-extend (ldb (byte 8 8) value) *out-vector*)) (defun emit-dword (value) (declare (type (unsigned-byte 32) value)) (vector-push-extend (ldb (byte 8 0) value) *out-vector*) (vector-push-extend (ldb (byte 8 8) value) *out-vector*) (vector-push-extend (ldb (byte 8 16) value) *out-vector*) (vector-push-extend (ldb (byte 8 24) value) *out-vector*)) (defun emit-qword (value) (declare (type (unsigned-byte 64) value)) (vector-push-extend (ldb (byte 8 0) value) *out-vector*) (vector-push-extend (ldb (byte 8 8) value) *out-vector*) (vector-push-extend (ldb (byte 8 16) value) *out-vector*) (vector-push-extend (ldb (byte 8 24) value) *out-vector*) (vector-push-extend (ldb (byte 8 32) value) *out-vector*) (vector-push-extend (ldb (byte 8 40) value) *out-vector*) (vector-push-extend (ldb (byte 8 48) value) *out-vector*) (vector-push-extend (ldb (byte 8 56) value) *out-vector*)) (defun emit-c-string (string) (loop for byte across string do (vector-push-extend (char-code byte) *out-vector*)) (vector-push-extend 0 *out-vector*)) ; null terminate (defun emit-align (alignment) (loop with align-mask = (1- alignment) while (/= (logand (length *out-vector*) align-mask) 0) do (vector-push-extend 0 *out-vector*))) (defun emit-encoded-addr (encoding addr) ;; FIXME: support other encodings (assert (= encoding +DW-EH-PE-ABSPTR+)) (emit-qword addr)) (defun fixup (pos fun) (let ((fp (fill-pointer *out-vector*))) (setf (fill-pointer *out-vector*) pos) (funcall fun) (setf (fill-pointer *out-vector*) fp))) (defun preferred-eh-data-format (a b) ;; FIXME. Probably another encoding would be better... +DW-EH-PE-ABSPTR+) ;;;;;;;;; ;; Call Frame Instruction -- an instruction for the Dwarf2 state machine (defstruct cfi opcode arg1 arg2) ;; The Frame Description Entry: represents how to unwind a particular function (defstruct fde addr-start addr-len lsda cfis) ;; Stores some info common to a set of FDEs (which gets emitted as a ;; CIE: Common Information Entry), and the associated FDEs (defstruct unwind-info personality-routine default-cfis fdes) (defun emit-expression (expr) (declare (ignore expr)) ;; FIXME: support emitting DW-OP expressions. (assert nil)) (defun emit-cfi (cfi) (let ((opcode (cfi-opcode cfi)) (arg1 (cfi-arg1 cfi)) (arg2 (cfi-arg2 cfi))) (case opcode (#.+DW-CFA-advance-loc+ ; arg1 = delta (assert (typep arg1 '(unsigned-byte 6))) (emit-byte (logior opcode arg1))) (#.+DW-CFA-offset+ ; arg1 = register (6bit), arg2 = factored uleb128 offset (assert (typep arg1 '(unsigned-byte 6))) (emit-byte (logior opcode arg1)) (emit-uleb128 arg2)) (#.+DW-CFA-restore+ ; arg1 = register (6bit) (assert (typep arg1 '(unsigned-byte 6))) (emit-byte (logior opcode arg1))) (otherwise (emit-byte opcode) (ecase opcode (#.+DW-CFA-set-loc+ ; arg1 = address, in encoding specified by fde-encoding (emit-encoded-addr (preferred-eh-data-format 1 0) arg1)) (#.+DW-CFA-advance-loc1+ ; arg1 = 1 byte delta (emit-byte arg1)) (#.+DW-CFA-advance-loc2+ ; arg1 = 2 byte delta (emit-word arg1)) (#.+DW-CFA-advance-loc4+ ; arg1 = 4 byte delta (emit-dword arg1)) (#.+DW-CFA-MIPS-advance-loc8+ ; arg1 = 8 byte delta (emit-qword arg1)) ((#.+DW-CFA-offset-extended+ #.+DW-CFA-def-cfa+) ; arg1 = uleb128 register, arg2 = uleb128 {f,nf} offset (emit-uleb128 arg1) (emit-uleb128 arg2)) ((#.+DW-CFA-offset-extended-sf+ #.+DW-CFA-def-cfa-sf+) ; arg1 = uleb128 register, arg2 = sleb128 factored offset (emit-uleb128 arg1) (emit-sleb128 arg2)) ((#.+DW-CFA-restore-extended+ #.+DW-CFA-undefined+ #.+DW-CFA-same-value+ #.+DW-CFA-def-cfa-register+) ; arg1 = uleb128 register (emit-uleb128 arg1)) (#.+DW-CFA-register+ ; arg1 = uleb128 register, arg2 = uleb128 register (emit-uleb128 arg1) (emit-uleb128 arg2)) ((#.+DW-CFA-def-cfa-offset+ #.+DW-CFA-GNU-args-size+) ; arg1 = uleb128 nf offset (emit-uleb128 arg1)) (#.+DW-CFA-def-cfa-offset-sf+ ; arg1 = sleb128 factored offset (emit-sleb128 arg1)) (#.+DW-CFA-def-cfa-expression+ ; arg1 = expression (emit-expression arg1)) (#.+DW-CFA-expression+ ; arg1 = uleb128 register, arg2 = expression (emit-uleb128 arg1) (emit-expression arg2)) ((#.+DW-CFA-remember-state+ #.+DW-CFA-restore-state+ #.+DW-CFA-nop+) ; no args ; do nothing )))))) (defun emit-fde (fde cie-start) (let ((fde-start-offset (length *out-vector*)) fde-len-end (fde-encoding (preferred-eh-data-format 1 0))) ; fixme... (emit-dword 0) ; length of FDE, not including this dword (fixup at end) (emit-dword (- (length *out-vector*) cie-start)) ; cie offset (emit-encoded-addr fde-encoding (fde-addr-start fde)) ; first insn covered by this fde (emit-encoded-addr fde-encoding (fde-addr-len fde)) ; number of insn bytes covered. ; FIXME: output augmentation data here (loop for cfi in (fde-cfis fde) do (emit-cfi cfi)) ;; now fix up the length field (setf fde-len-end (length *out-vector*)) (fixup fde-start-offset (lambda () (emit-dword (- fde-len-end fde-start-offset 4)))))) (defun emit-unwind-info (unwind-info) ;; early escape if no FDEs to emit (if (null (unwind-info-fdes unwind-info)) (return-from emit-unwind-info)) (let ((cie-start-offset (length *out-vector*)) cie-len-end (fde-encoding (preferred-eh-data-format 1 0)) ; fixme... (per-encoding (preferred-eh-data-format 2 1)) (lsda-encoding (preferred-eh-data-format 0 0)) (augmentation nil) (augmentation-data (make-byte-vector))) (emit-dword 0) ; length of CIE, not including this dword (fixup at end) (emit-dword +dw-cie-id+) ; CIE identifier (emit-byte +dw-cie-version+) ; CIE version (let ((*out-vector* augmentation-data)) ; emit to augmentation vector (when (unwind-info-personality-routine unwind-info) ;; the language's exception handler "personality routine" (push #\P augmentation) (emit-byte per-encoding) (emit-encoded-addr per-encoding (unwind-info-personality-routine unwind-info))) (when (some (lambda (fde) (fde-lsda fde)) (unwind-info-fdes unwind-info)) ;; Note if any of the FDEs have "language specific data areas" ;; (useful for denoting catch frames, e.g.) (push #\L augmentation) (emit-byte lsda-encoding)) (when (/= fde-encoding +DW-EH-PE-absptr+) ;; Set the encoding of addresses in the CFA (push #\R augmentation) (emit-byte fde-encoding)) (setf augmentation (nreverse augmentation))) (if augmentation (emit-c-string (concatenate 'vector '(#\z) augmentation)) ; output augmentation string (emit-byte 0)) ; emit empty string (emit-uleb128 1) ; code alignment factor (emit-sleb128 +dwarf-cie-data-alignment+) ; data alignment factor (if (= +dw-cie-version+ 1) ; frame return value (emit-byte +dw-reg-ra+) ; in dwarf2, a byte (emit-uleb128 +dw-reg-ra+)) ; in dwarf3, a uleb128 ;; output augmentation data ;; note: dwarf2out.c does some strange alignment here when ;; per_encoding is DW-EH-PE-aligned i wonder if that's required. (when augmentation (emit-uleb128 (length augmentation-data)) (loop for byte across augmentation-data do (emit-byte byte))) (loop for cfi in (unwind-info-default-cfis unwind-info) do (emit-cfi cfi)) (emit-align +ptr-size+) ;; now fix up the length field (setf cie-len-end (length *out-vector*)) (fixup cie-start-offset (lambda () (emit-dword (- cie-len-end cie-start-offset 4)))) ;; now emit the FDEs (loop for fde in (unwind-info-fdes unwind-info) do (emit-fde fde cie-start-offset)))) (defmacro with-emit-output (&body body) `(let ((*out-vector* (make-byte-vector))) ,@body *out-vector*)) ;; tests #+nil (defun test-emit-LEB () (flet ((run-test (entry fun) (with-emit-output (funcall fun (first entry)) (format t "~%~A ~A ~A ~A" (first entry) *out-vector* (equalp (second entry) *out-vector*) (second entry))))) (let ((table '((2 #(2)) (127 #(127)) (128 #(128 1)) (129 #(129 1)) (130 #(130 1)) (12857 #(185 100))))) (loop for entry in table do (run-test entry #'emit-unsigned-LEB128))) (let ((table '((2 #(2)) (-2 #(#x7e)) (127 #(#.(+ 127 #x80) 0)) (-127 #(#.(+ 1 #x80) #x7f)) (128 #(#.(+ 0 #x80) 1)) (-128 #(#.(+ 0 #x80) #x7f)) (129 #(#.(+ 1 #x80) 1)) (-129 #(#.(+ #x7f #x80) #x7e))))) (loop for entry in table do (run-test entry #'emit-signed-LEB128)))))