(* The Universal Machine
*
* By Daniel Lyons <fusion AT storytotell dot org>
* Abstraction Anonymous
* 2006-07-24 @ 2:33 PM
*
*
* This file is meant to eventually be a complete implementation of
* the Universal Machine. Right now it is a complete disassembler for
* Universal bytecode. It seems to be pretty fast -- it can
* disassemble Volume 8 of the Codex in about 2 seconds and umix.umz
* in about 20 seconds.
*
* I am still learning OCaml. There are probably plenty of stupid
* mistakes or inefficiencies in it, but bear in mind this file is not
* complete.
*)
(* This was something I needed in the interpretter but I'm not sure I
* really need it in practice. This probably could be avoided if I
* could remember the bloody type information when I'm writing. *)
(* let make_nativeint_queue () =
let y = Queue.create() in
Queue.push 0n y; Queue.pop y; y;; *)
(* This is an ML structure for the execution state of the UM. In the
* parlance of the codex, that consists of the 8 registers, the platters,
* and the finger. In the context of a modern computer, those
* would be called the registers, the memory, and the program counter.
*
* I added a free list queue so that I could reuse the same memory
* integers. That was based on the C implementation of the memory; in
* practice there may be a better way in OCaml.
*)
type machine_state =
State of nativeint array (* registers *)
* nativeint array array (* platters *)
* nativeint
Queue.
t (* memory free list *) * nativeint;; (* the finger *)
(* This type represents an opcode for the UM. I think this is pretty
* fast in OCaml but I think it was a serious speed hit to do this kind
* of thing in Ruby with symbols.
*)
type opcode = CONDITIONAL_MOVE | DIVISION | OUTPUT
| ARRAY_INDEX | NOT_AND | INPUT
| ARRAY_AMENDMENT | HALT | LOAD_PROGRAM
| ADDITION | ALLOCATION | ORTHOGRAPHY
| MULTIPLICATION | ABANDONMENT | INVALID
(* This translates the opcode integer into an opcode.
*
* I wonder if there is a rational way to combine this with the type
* definition above. There probably is. God I wish I knew OCaml
* better.
*)
let opcode_to_instruction = function
0 -> CONDITIONAL_MOVE | 5 -> DIVISION | 10 -> OUTPUT
| 1 -> ARRAY_INDEX | 6 -> NOT_AND | 11 -> INPUT
| 2 -> ARRAY_AMENDMENT | 7 -> HALT | 12 -> LOAD_PROGRAM
| 3 -> ADDITION | 8 -> ALLOCATION | 13 -> ORTHOGRAPHY
| 4 -> MULTIPLICATION | 9 -> ABANDONMENT | _ -> INVALID
(* This is a cool instruction type.
*
* One interesting thing you'll see a bit later in the instruction
* decoder: an instruction for UM fits into a single regular integer
* in OCaml, but that integer is signed. All of the parts of an
* instruction fit into a regular integer, obviously, because they come
* packed into one. In order to correctly decode the instruction we need
* to make the sign go away while we do our arithmetic. The best way I
* came up with to do this (thanks to Bill) was to convert it to an
* Int64 during decoding, which produces this expanded instruction time,
* laden with fast-acting ints instead.
*
* Anybody with a clue might want to let me in on better ways of doing this.
*)
type instruction =
Instruction of opcode (* this is an opcode type *)
* int (* register A *)
* int (* register B *)
* int (* register C *)
* int (* data *)
* int (* register A for 'orthography' *)
(* Well, here it is. This bitmath is slightly nicer with regular ints
* than with Int64's:
*
* (inst lsr 6) land 0x7
*
* versus
*
* Int64.logand (Int64.shift_right inst 6) 0x7
*
* Yeah. If you're less broken than me, please clue me in.
*
* Otherwise I think this function is pretty reasonable. It seems to
* be pretty fast.
*)
let decode_instruction binst =
let inst =
(Int64.
logand (Int64.
of_int binst
) 0x00000000FFFFFFFFL
) in let opcode = opcode_to_instruction
(Int64.
to_int (Int64.
shift_right inst
28)) in let register_a =
Int64.
to_int (Int64.
logand (Int64.
shift_right inst
6) 0x7L
) in let register_b =
Int64.
to_int (Int64.
logand (Int64.
shift_right inst
3) 0x7L
) in let register_c =
Int64.
to_int (Int64.
logand inst 0x7L
) in let data =
Int64.
to_int (Int64.
logand inst 0x1fffffffL
) in let register_a13 =
Int64.
to_int (Int64.
logand (Int64.
shift_right inst
25) 0x7L
) in Instruction(opcode, register_a, register_b, register_c, data, register_a13)
(* This function takes a filename and a function as args, and
* recursively applies the function to the instructions being decoded
* from the file.
*
* I'm proud of this function. This strikes me as functional and
* mostly-correct. However, I'm having difficulty imagining how to implement
* the VM this way. Right now it suffices for the disassembler routine.
*
* I think what should be happening here instead is passing the result
* of afun back through step somehow, and thence back through afun,
* recursively. The disassembler should then be rewritten to pass unit
* instead of anything meaningful, and the interpretter could be coded
* to pass state through.
*)
let interpret_with filename afun =
let rec step file afun =
afun inst;
step file afun
in
(* This doesn't yet work. I hope the compiler elides it when it makes
* machine code. :) *)
(* let universal_machine () =
(* initial state *)
(* let state = State(Array.make 0 0n, Array.make_matrix 0 0 0n, Queue.create) in *)
let registers =
Array.
make 0 0 in let platters =
Array.
make_matrix 0 0 0 in let free_list =
Queue.
create in let interpret = function
State(op, rega, regb, regc, data, rega13) ->
let register_a = registers.get(rega) in
let register_b = registers.get(regb) in
let register_c = registers.get(regc) in
match op with
CONDITIONAL_MOVE ->
if register_c != 0 then
registers.set(register_a, register_b)
| ARRAY_AMENDMENT ->
registers.set(register_a, platters.get(register_b).get(register_c))
| _ ->
in
interpret *)
(* This is my happy disassemble-instruction routine.
*
* It works and it seems to be pretty fast (when compiled with
* ocamlopt). I don't think it would be impossible to rewrite to take
* unit and do nothing with it, if I could wrap my sleepy brain around
* the step function that would accompany it.
*)
let disassemble inst =
let regular_op inst a b c =
in
match inst with
Instruction(CONDITIONAL_MOVE, a, b, c, _, _) ->
regular_op "CONDITIONAL MOVE" a b c
| Instruction(ARRAY_AMENDMENT, a, b, c, _, _) ->
regular_op "ARRAY AMENDMENT" a b c
| Instruction(ARRAY_INDEX, a, b, c, _, _) ->
regular_op "ARRAY_INDEX" a b c
| Instruction(ADDITION, a, b, c, _, _) ->
regular_op "ADDITION" a b c
| Instruction(MULTIPLICATION, a, b, c, _, _) ->
regular_op "MULTIPLICATION" a b c
| Instruction(DIVISION, a, b, c, _, _) ->
regular_op "DIVISION" a b c
| Instruction(NOT_AND, a, b, c, _, _) ->
regular_op "NOT_AND" a b c
| Instruction(HALT, a, b, c, _, _) ->
regular_op "HALT" a b c
| Instruction(ALLOCATION, a, b, c, _, _) ->
regular_op "ALLOCATION" a b c
| Instruction(ABANDONMENT, a, b, c, _, _) ->
regular_op "ABANDONMENT" a b c
| Instruction(OUTPUT, a, b, c, _, _) ->
regular_op "OUTPUT" a b c
| Instruction(INPUT, a, b, c, _, _) ->
regular_op "INPUT" a b c
| Instruction(LOAD_PROGRAM, a, b, c, _, _) ->
regular_op "LOAD_PROGRAM" a b c
| Instruction(ORTHOGRAPHY, _, _, _, data, a13) ->
(* This main routine demonstrates using interpret_with and the future
* universal machine. *)
(* let _ = interpret_with (Array.get Sys.argv 1) (universal_machine
()) *)
(* This is the disassembler main routine. I could use option parsing
* to make a combined UM intepretter/disassembler later on, that would be
* cool. *)
let _ = interpret_with
(Array.
get Sys.
argv 1) disassemble