ICFP Contest 2006
Team: Abstraction Anonymous

disassemble.ml

Download

  1. (* The Universal Machine
  2. *
  3. * By Daniel Lyons <fusion AT storytotell dot org>
  4. * Abstraction Anonymous
  5. * 2006-07-24 @ 2:33 PM
  6. *
  7. *
  8. * This file is meant to eventually be a complete implementation of
  9. * the Universal Machine. Right now it is a complete disassembler for
  10. * Universal bytecode. It seems to be pretty fast -- it can
  11. * disassemble Volume 8 of the Codex in about 2 seconds and umix.umz
  12. * in about 20 seconds.
  13. *
  14. * I am still learning OCaml. There are probably plenty of stupid
  15. * mistakes or inefficiencies in it, but bear in mind this file is not
  16. * complete.
  17. *)
  18.  
  19. (* This was something I needed in the interpretter but I'm not sure I
  20. * really need it in practice. This probably could be avoided if I
  21. * could remember the bloody type information when I'm writing. *)
  22. (* let make_nativeint_queue () =
  23. let y = Queue.create() in
  24. Queue.push 0n y; Queue.pop y; y;; *)
  25.  
  26. (* This is an ML structure for the execution state of the UM. In the
  27. * parlance of the codex, that consists of the 8 registers, the platters,
  28. * and the finger. In the context of a modern computer, those
  29. * would be called the registers, the memory, and the program counter.
  30. *
  31. * I added a free list queue so that I could reuse the same memory
  32. * integers. That was based on the C implementation of the memory; in
  33. * practice there may be a better way in OCaml.
  34. *)
  35. type machine_state =
  36. State of nativeint array (* registers *)
  37. * nativeint array array (* platters *)
  38. * nativeint Queue.t (* memory free list *)
  39. * nativeint;; (* the finger *)
  40.  
  41. (* This type represents an opcode for the UM. I think this is pretty
  42. * fast in OCaml but I think it was a serious speed hit to do this kind
  43. * of thing in Ruby with symbols.
  44. *)
  45. type opcode = CONDITIONAL_MOVE | DIVISION | OUTPUT
  46. | ARRAY_INDEX | NOT_AND | INPUT
  47. | ARRAY_AMENDMENT | HALT | LOAD_PROGRAM
  48. | ADDITION | ALLOCATION | ORTHOGRAPHY
  49. | MULTIPLICATION | ABANDONMENT | INVALID
  50.  
  51. (* This translates the opcode integer into an opcode.
  52. *
  53. * I wonder if there is a rational way to combine this with the type
  54. * definition above. There probably is. God I wish I knew OCaml
  55. * better.
  56. *)
  57. let opcode_to_instruction = function
  58. 0 -> CONDITIONAL_MOVE | 5 -> DIVISION | 10 -> OUTPUT
  59. | 1 -> ARRAY_INDEX | 6 -> NOT_AND | 11 -> INPUT
  60. | 2 -> ARRAY_AMENDMENT | 7 -> HALT | 12 -> LOAD_PROGRAM
  61. | 3 -> ADDITION | 8 -> ALLOCATION | 13 -> ORTHOGRAPHY
  62. | 4 -> MULTIPLICATION | 9 -> ABANDONMENT | _ -> INVALID
  63.  
  64. (* This is a cool instruction type.
  65. *
  66. * One interesting thing you'll see a bit later in the instruction
  67. * decoder: an instruction for UM fits into a single regular integer
  68. * in OCaml, but that integer is signed. All of the parts of an
  69. * instruction fit into a regular integer, obviously, because they come
  70. * packed into one. In order to correctly decode the instruction we need
  71. * to make the sign go away while we do our arithmetic. The best way I
  72. * came up with to do this (thanks to Bill) was to convert it to an
  73. * Int64 during decoding, which produces this expanded instruction time,
  74. * laden with fast-acting ints instead.
  75. *
  76. * Anybody with a clue might want to let me in on better ways of doing this.
  77. *)
  78. type instruction =
  79. Instruction of opcode (* this is an opcode type *)
  80. * int (* register A *)
  81. * int (* register B *)
  82. * int (* register C *)
  83. * int (* data *)
  84. * int (* register A for 'orthography' *)
  85.  
  86. (* Well, here it is. This bitmath is slightly nicer with regular ints
  87. * than with Int64's:
  88. *
  89. * (inst lsr 6) land 0x7
  90. *
  91. * versus
  92. *
  93. * Int64.logand (Int64.shift_right inst 6) 0x7
  94. *
  95. * Yeah. If you're less broken than me, please clue me in.
  96. *
  97. * Otherwise I think this function is pretty reasonable. It seems to
  98. * be pretty fast.
  99. *)
  100. let decode_instruction binst =
  101. let inst = (Int64.logand (Int64.of_int binst) 0x00000000FFFFFFFFL) in
  102. let opcode = opcode_to_instruction (Int64.to_int (Int64.shift_right inst 28)) in
  103. let register_a = Int64.to_int (Int64.logand (Int64.shift_right inst 6) 0x7L) in
  104. let register_b = Int64.to_int (Int64.logand (Int64.shift_right inst 3) 0x7L) in
  105. let register_c = Int64.to_int (Int64.logand inst 0x7L) in
  106. let data = Int64.to_int (Int64.logand inst 0x1fffffffL) in
  107. let register_a13 = Int64.to_int (Int64.logand (Int64.shift_right inst 25) 0x7L) in
  108. Instruction(opcode, register_a, register_b, register_c, data, register_a13)
  109.  
  110. (* This function takes a filename and a function as args, and
  111. * recursively applies the function to the instructions being decoded
  112. * from the file.
  113. *
  114. * I'm proud of this function. This strikes me as functional and
  115. * mostly-correct. However, I'm having difficulty imagining how to implement
  116. * the VM this way. Right now it suffices for the disassembler routine.
  117. *
  118. * I think what should be happening here instead is passing the result
  119. * of afun back through step somehow, and thence back through afun,
  120. * recursively. The disassembler should then be rewritten to pass unit
  121. * instead of anything meaningful, and the interpretter could be coded
  122. * to pass state through.
  123. *)
  124. let interpret_with filename afun =
  125. let rec step file afun =
  126. let inst = decode_instruction (input_binary_int file) in
  127. afun inst;
  128. step file afun
  129. in
  130. step (open_in_bin filename) afun
  131.  
  132. (* This doesn't yet work. I hope the compiler elides it when it makes
  133. * machine code. :) *)
  134. (* let universal_machine () =
  135. (* initial state *)
  136. (* let state = State(Array.make 0 0n, Array.make_matrix 0 0 0n, Queue.create) in *)
  137. let registers = Array.make 0 0 in
  138. let platters = Array.make_matrix 0 0 0 in
  139. let free_list = Queue.create in
  140. let interpret = function
  141. State(op, rega, regb, regc, data, rega13) ->
  142. let register_a = registers.get(rega) in
  143. let register_b = registers.get(regb) in
  144. let register_c = registers.get(regc) in
  145. match op with
  146. CONDITIONAL_MOVE ->
  147. if register_c != 0 then
  148. registers.set(register_a, register_b)
  149. | ARRAY_AMENDMENT ->
  150. registers.set(register_a, platters.get(register_b).get(register_c))
  151. | _ ->
  152. print_string "Unimplemented!"; exit
  153. in
  154. interpret *)
  155.  
  156. (* This is my happy disassemble-instruction routine.
  157. *
  158. * It works and it seems to be pretty fast (when compiled with
  159. * ocamlopt). I don't think it would be impossible to rewrite to take
  160. * unit and do nothing with it, if I could wrap my sleepy brain around
  161. * the step function that would accompany it.
  162. *)
  163. let disassemble inst =
  164. let regular_op inst a b c =
  165. print_endline (inst ^ " " ^ (string_of_int a) ^ " " ^ (string_of_int b) ^ " " ^
  166. in
  167. match inst with
  168. Instruction(CONDITIONAL_MOVE, a, b, c, _, _) ->
  169. regular_op "CONDITIONAL MOVE" a b c
  170. | Instruction(ARRAY_AMENDMENT, a, b, c, _, _) ->
  171. regular_op "ARRAY AMENDMENT" a b c
  172. | Instruction(ARRAY_INDEX, a, b, c, _, _) ->
  173. regular_op "ARRAY_INDEX" a b c
  174. | Instruction(ADDITION, a, b, c, _, _) ->
  175. regular_op "ADDITION" a b c
  176. | Instruction(MULTIPLICATION, a, b, c, _, _) ->
  177. regular_op "MULTIPLICATION" a b c
  178. | Instruction(DIVISION, a, b, c, _, _) ->
  179. regular_op "DIVISION" a b c
  180. | Instruction(NOT_AND, a, b, c, _, _) ->
  181. regular_op "NOT_AND" a b c
  182. | Instruction(HALT, a, b, c, _, _) ->
  183. regular_op "HALT" a b c
  184. | Instruction(ALLOCATION, a, b, c, _, _) ->
  185. regular_op "ALLOCATION" a b c
  186. | Instruction(ABANDONMENT, a, b, c, _, _) ->
  187. regular_op "ABANDONMENT" a b c
  188. | Instruction(OUTPUT, a, b, c, _, _) ->
  189. regular_op "OUTPUT" a b c
  190. | Instruction(INPUT, a, b, c, _, _) ->
  191. regular_op "INPUT" a b c
  192. | Instruction(LOAD_PROGRAM, a, b, c, _, _) ->
  193. regular_op "LOAD_PROGRAM" a b c
  194. | Instruction(ORTHOGRAPHY, _, _, _, data, a13) ->
  195. print_endline ("ORTHOGRAPHY " ^ (string_of_int a13) ^ " " ^ (Printf.sprintf "0x%X" data))
  196. | Instruction(INVALID, _, _, _, _, _) -> print_endline "INVALID"
  197.  
  198. (* This main routine demonstrates using interpret_with and the future
  199. * universal machine. *)
  200. (* let _ = interpret_with (Array.get Sys.argv 1) (universal_machine
  201. ()) *)
  202.  
  203. (* This is the disassembler main routine. I could use option parsing
  204. * to make a combined UM intepretter/disassembler later on, that would be
  205. * cool. *)
  206. let _ = interpret_with (Array.get Sys.argv 1) disassemble
  207.  
  208.  

Hell is other programming languages. -- Sartran
Hell is that programming language! -- Dan
Ordinarily, one would enrich this language with more powerful means of computation. Instead I take a different tack... -- Harmonious Monk