(* The Universal Machine * * By Daniel Lyons * Abstraction Anonymous * 2006-07-24 @ 2:33 PM * * * This file is meant to eventually be a complete implementation of * the Universal Machine. Right now it is a complete disassembler for * Universal bytecode. It seems to be pretty fast -- it can * disassemble Volume 8 of the Codex in about 2 seconds and umix.umz * in about 20 seconds. * * I am still learning OCaml. There are probably plenty of stupid * mistakes or inefficiencies in it, but bear in mind this file is not * complete. *) (* This was something I needed in the interpretter but I'm not sure I * really need it in practice. This probably could be avoided if I * could remember the bloody type information when I'm writing. *) (* let make_nativeint_queue () = let y = Queue.create() in Queue.push 0n y; Queue.pop y; y;; *) (* This is an ML structure for the execution state of the UM. In the * parlance of the codex, that consists of the 8 registers, the platters, * and the finger. In the context of a modern computer, those * would be called the registers, the memory, and the program counter. * * I added a free list queue so that I could reuse the same memory * integers. That was based on the C implementation of the memory; in * practice there may be a better way in OCaml. *) type machine_state = State of nativeint array (* registers *) * nativeint array array (* platters *) * nativeint Queue.t (* memory free list *) * nativeint;; (* the finger *) (* This type represents an opcode for the UM. I think this is pretty * fast in OCaml but I think it was a serious speed hit to do this kind * of thing in Ruby with symbols. *) type opcode = CONDITIONAL_MOVE | DIVISION | OUTPUT | ARRAY_INDEX | NOT_AND | INPUT | ARRAY_AMENDMENT | HALT | LOAD_PROGRAM | ADDITION | ALLOCATION | ORTHOGRAPHY | MULTIPLICATION | ABANDONMENT | INVALID (* This translates the opcode integer into an opcode. * * I wonder if there is a rational way to combine this with the type * definition above. There probably is. God I wish I knew OCaml * better. *) let opcode_to_instruction = function 0 -> CONDITIONAL_MOVE | 5 -> DIVISION | 10 -> OUTPUT | 1 -> ARRAY_INDEX | 6 -> NOT_AND | 11 -> INPUT | 2 -> ARRAY_AMENDMENT | 7 -> HALT | 12 -> LOAD_PROGRAM | 3 -> ADDITION | 8 -> ALLOCATION | 13 -> ORTHOGRAPHY | 4 -> MULTIPLICATION | 9 -> ABANDONMENT | _ -> INVALID (* This is a cool instruction type. * * One interesting thing you'll see a bit later in the instruction * decoder: an instruction for UM fits into a single regular integer * in OCaml, but that integer is signed. All of the parts of an * instruction fit into a regular integer, obviously, because they come * packed into one. In order to correctly decode the instruction we need * to make the sign go away while we do our arithmetic. The best way I * came up with to do this (thanks to Bill) was to convert it to an * Int64 during decoding, which produces this expanded instruction time, * laden with fast-acting ints instead. * * Anybody with a clue might want to let me in on better ways of doing this. *) type instruction = Instruction of opcode (* this is an opcode type *) * int (* register A *) * int (* register B *) * int (* register C *) * int (* data *) * int (* register A for 'orthography' *) (* Well, here it is. This bitmath is slightly nicer with regular ints * than with Int64's: * * (inst lsr 6) land 0x7 * * versus * * Int64.logand (Int64.shift_right inst 6) 0x7 * * Yeah. If you're less broken than me, please clue me in. * * Otherwise I think this function is pretty reasonable. It seems to * be pretty fast. *) let decode_instruction binst = let inst = (Int64.logand (Int64.of_int binst) 0x00000000FFFFFFFFL) in let opcode = opcode_to_instruction (Int64.to_int (Int64.shift_right inst 28)) in let register_a = Int64.to_int (Int64.logand (Int64.shift_right inst 6) 0x7L) in let register_b = Int64.to_int (Int64.logand (Int64.shift_right inst 3) 0x7L) in let register_c = Int64.to_int (Int64.logand inst 0x7L) in let data = Int64.to_int (Int64.logand inst 0x1fffffffL) in let register_a13 = Int64.to_int (Int64.logand (Int64.shift_right inst 25) 0x7L) in Instruction(opcode, register_a, register_b, register_c, data, register_a13) (* This function takes a filename and a function as args, and * recursively applies the function to the instructions being decoded * from the file. * * I'm proud of this function. This strikes me as functional and * mostly-correct. However, I'm having difficulty imagining how to implement * the VM this way. Right now it suffices for the disassembler routine. * * I think what should be happening here instead is passing the result * of afun back through step somehow, and thence back through afun, * recursively. The disassembler should then be rewritten to pass unit * instead of anything meaningful, and the interpretter could be coded * to pass state through. *) let interpret_with filename afun = let rec step file afun = let inst = decode_instruction (input_binary_int file) in afun inst; step file afun in step (open_in_bin filename) afun (* This doesn't yet work. I hope the compiler elides it when it makes * machine code. :) *) (* let universal_machine () = (* initial state *) (* let state = State(Array.make 0 0n, Array.make_matrix 0 0 0n, Queue.create) in *) let registers = Array.make 0 0 in let platters = Array.make_matrix 0 0 0 in let free_list = Queue.create in let interpret = function State(op, rega, regb, regc, data, rega13) -> let register_a = registers.get(rega) in let register_b = registers.get(regb) in let register_c = registers.get(regc) in match op with CONDITIONAL_MOVE -> if register_c != 0 then registers.set(register_a, register_b) | ARRAY_AMENDMENT -> registers.set(register_a, platters.get(register_b).get(register_c)) | _ -> print_string "Unimplemented!"; exit in interpret *) (* This is my happy disassemble-instruction routine. * * It works and it seems to be pretty fast (when compiled with * ocamlopt). I don't think it would be impossible to rewrite to take * unit and do nothing with it, if I could wrap my sleepy brain around * the step function that would accompany it. *) let disassemble inst = let regular_op inst a b c = print_endline (inst ^ " " ^ (string_of_int a) ^ " " ^ (string_of_int b) ^ " " ^ (string_of_int c)) in match inst with Instruction(CONDITIONAL_MOVE, a, b, c, _, _) -> regular_op "CONDITIONAL MOVE" a b c | Instruction(ARRAY_AMENDMENT, a, b, c, _, _) -> regular_op "ARRAY AMENDMENT" a b c | Instruction(ARRAY_INDEX, a, b, c, _, _) -> regular_op "ARRAY_INDEX" a b c | Instruction(ADDITION, a, b, c, _, _) -> regular_op "ADDITION" a b c | Instruction(MULTIPLICATION, a, b, c, _, _) -> regular_op "MULTIPLICATION" a b c | Instruction(DIVISION, a, b, c, _, _) -> regular_op "DIVISION" a b c | Instruction(NOT_AND, a, b, c, _, _) -> regular_op "NOT_AND" a b c | Instruction(HALT, a, b, c, _, _) -> regular_op "HALT" a b c | Instruction(ALLOCATION, a, b, c, _, _) -> regular_op "ALLOCATION" a b c | Instruction(ABANDONMENT, a, b, c, _, _) -> regular_op "ABANDONMENT" a b c | Instruction(OUTPUT, a, b, c, _, _) -> regular_op "OUTPUT" a b c | Instruction(INPUT, a, b, c, _, _) -> regular_op "INPUT" a b c | Instruction(LOAD_PROGRAM, a, b, c, _, _) -> regular_op "LOAD_PROGRAM" a b c | Instruction(ORTHOGRAPHY, _, _, _, data, a13) -> print_endline ("ORTHOGRAPHY " ^ (string_of_int a13) ^ " " ^ (Printf.sprintf "0x%X" data)) | Instruction(INVALID, _, _, _, _, _) -> print_endline "INVALID" (* This main routine demonstrates using interpret_with and the future * universal machine. *) (* let _ = interpret_with (Array.get Sys.argv 1) (universal_machine ()) *) (* This is the disassembler main routine. I could use option parsing * to make a combined UM intepretter/disassembler later on, that would be * cool. *) let _ = interpret_with (Array.get Sys.argv 1) disassemble