#!/opt/local/bin/ruby # Abstraction Anonymous # Initial Ruby VM Implementation # 2006-07-24 @ 2:43 PM # Daniel Lyons, Bill Weiss and David Baird # # This is pretty gross, because it has a lot of stillborn ideas in # it. Ultimately we wound up scrapping it altogether and writing a C # implementation which rocked our world. Check it out: um.c. # # Amid the chaos there are some interesting ideas which people might # want to rip off. I'll discuss the gross with the glorious. # This is for the Ruby profiler, which totally rocks. Unfortunately, # it also was one of the things which convinced us Ruby just wasn't # powerful enough. #require 'profile' # There were several FileArrayProxy implementations before we arrived # at this one. I know you aren't supposed to subclass Array but it was # simply the easiest way to achieve to achieve this. class FileArrayProxy < Array def initialize(file) @file = file super() end # This method was way slow, because of calling read(4).unpack('N') # Let's think about what really happens when we call # read(4).unpack('N'): # # read(1) -> string * 4 # unpack('N') -> load the string into a byte array and screw with # it # # Compare to this: # # File.read.unpack('N*') # # That loads all of the file into big endian integers in one # pass. Much faster. def [](i) (i+1 - length).times do #self << @file.read(4).unpack("N")[0] self << ((@file.getc << 24) | (@file.getc << 16) | (@file.getc << 8) | (@file.getc)) end super end # Oh, right. All of the methods on array wound up needing to be # overridden to call [] and load the rest of the array to that location def []=(i, val) self[i] super end def index(i) self[i] super end def insert(i, b) self[i] super end end # We learned in this project that every Ruby program begins by molding # the built-in types into what you need for this particular program. # # This reminds me of Lisp, a lot. class Array # Basically, we rewrote Array's print method to print itself as an # array rather than as a string, which it does by defaut def to_s return '[' + self.collect { |x| x.to_s }.join(', ') + ']' end end # We wanted nil to print "nil" rather than "" when we print it. Easy. class NilClass def to_s 'nil' end end # We also changed both integer types to display themselves in # hex. This took far more work than it should have, because the to_hex # function depended on the old to_s method, and we were rewriting the # new to_s method to call to_hex. And thus we entered the Hell of Many # Aliases. class Fixnum alias old_to_s to_s def to_hex '0x' + old_to_s(16) end alias to_s to_hex end class Bignum alias old_to_s to_s def too_big? (self >> 32) != 0 end # One different thing about Bignum: we wanted it to tell us if it # was bigger than 1 bit larger than Fixnum. In retrospect, this may # have been a part of the performance hit. def to_hex (too_big? ? 'BIG: ' : '') + '0x' + old_to_s(16) end alias to_s to_hex end # This object represents, well, the machine. :) It keeps track of the # registers, finger, and platters. Ruby's memory management meant we # didn't have to think particularly hard about handling the # platters. The printing bit was added later for some signal-based # debugging: we would "kill -USR1 um.rb" to get it to spit out what it # was working on more cheaply than printing it out always. class MachineState attr_accessor :registers, :finger, :platters, :printing def initialize(stream) @printing = false @registers = Array.new(8,0) # eight registers of 0 @platters = [stream] @finger = 0 @file = File.open("key", "r") end # We wrote this to facilitate debugging the machine. Platters tended # to either be very small or very huge, so we only printed it when we # really needed to. def to_s(with_platters=false) "Instruction: #{Instruction.new(@platters[0][@finger-1]).to_s}\n" + "Registers: #{@registers.to_s}\n" + (with_platters ? "Platters: #{@platters[1..-1]}\n" : "") + "Finger: #{@finger}\n" end # A couple of convenience methods def debug_out $stderr.puts self + "\n" end def debug_out2 $stderr.puts self.to_s(true) + "\n" end # Step the execution forward by one instruction. This method was # rewritten a lot. def step # ins = Instruction.new(@platters[0][@finger]) ins = Instruction.new(@platters.at(0).at(@finger)) @finger += 1 execute(ins) end # We used Ruby's dispatch method to handle executing # instructions. The line # # send(inst.name, inst) # # deserves some special attention. Using the Instruction ADT we # wrote, which has a Instruction#name method which returns a symbol # of the instruction name, we can then apply that instruction to the # MachineState, passing in the instruction itself. We thought it was # a pretty classy design, but it seemed to add ~12% overhead. def execute(inst) # $stderr.puts self.to_s(true) + "\n\n" send(inst.name, inst) end # From here, we started putting the spec into the code so we could # check it more easily. It turned out not to be broken. # #0. Conditional Move. # The register A receives the value in register B, # unless the register C contains 0. def cond_mov(inst) # unless (@registers[inst.c] == 0) unless (@registers.at(inst.c) == 0) # @registers[inst.a] = @registers[inst.b] @registers[inst.a] = @registers.at(inst.b) end end # #1. Array Index. # The register A receives the value stored at offset # in register C in the array identified by B. def index(inst) @registers[inst.a] = @platters.at(@registers.at(inst.b)).at(@registers.at(inst.c)) # @registers[inst.a] = @platters[@registers[inst.b]][@registers[inst.c]] end # #2. Array Amendment. # The array identified by A is amended at the offset # in register B to store the value in register C. def amend(inst) # @platters[@registers[inst.a]][@registers[inst.b]] = @registers[inst.c] @platters.at(@registers.at(inst.a))[@registers.at(inst.b)] = @registers.at(inst.c) end # #3. Addition. # The register A receives the value in register B plus # the value in register C, modulo 2^32. def add(inst) # @registers[inst.a] = (@registers[inst.b] + @registers[inst.c]) % (1 << 32) @registers[inst.a] = (@registers.at(inst.b) + @registers.at(inst.c)) % (1 << 32) end # #4. Multiplication. # The register A receives the value in register B times # the value in register C, modulo 2^32. def mult(inst) # @registers[inst.a] = (@registers[inst.b] * @registers[inst.c]) % (1 << 32) @registers[inst.a] = (@registers.at(inst.b) * @registers.at(inst.c)) % (1 << 32) end # #5. Division. # The register A receives the value in register B # divided by the value in register C, if any, where # each quantity is treated treated as an unsigned 32 # bit number. def div(inst) # @registers[inst.a] = ((@registers[inst.b] % (1 << 32)) / (@registers[inst.c] % (1 << 32))) % (1 << 32) @registers[inst.a] = (@registers.at(inst.b) / @registers.at(inst.c)) % (1 << 32) end # #6. Not-And. # Each bit in the register A receives the 1 bit if # either register B or register C has a 0 bit in that # position. Otherwise the bit in register A receives # the 0 bit. def not_and(inst) # @registers[inst.a] = (~ (@registers[inst.b] & @registers[inst.c])) % (1 << 32) @registers[inst.a] = (~ (@registers.at(inst.b) & @registers.at(inst.c))) % (1 << 32) end # #7. Halt. # The universal machine stops computation. def halt(inst) raise "Halt encountered" end # #8. Allocation. # A new array is created with a capacity of platters # commensurate to the value in the register C. This # new array is initialized entirely with platters # holding the value 0. A bit pattern not consisting of # exclusively the 0 bit, and that identifies no other # active allocated array, is placed in the B register. def alloc(inst) if (i = @platters.index(nil)) # @platters[i] = Array.new(@registers[inst.c], 0) @platters[i] = Array.new(@registers.at(inst.c), 0) @registers[inst.b] = i else # @platters << Array.new(@registers[inst.c], 0) @platters << Array.new(@registers.at(inst.c), 0) @registers[inst.b] = @platters.length - 1 end end # #9. Abandonment. # The array identified by the register C is abandoned. # Future allocations may then reuse that identifier. def abandon(inst) # @platters[@registers[inst.c]] = nil @platters[@registers.at(inst.c)] = nil end # #10. Output. # The value in the register C is displayed on the console # immediately. Only values between and including 0 and 255 # are allowed. def out(inst) # putc @registers[inst.c] unless @registers[inst.c] > 255 putc @registers.at(inst.c) unless @registers.at(inst.c) > 255 $stdout.flush $stderr.puts "Wrote #{@registers[inst.c].chr} at Time: #{Time.now}" end # #11. Input. # The universal machine waits for input on the console. # When input arrives, the register C is loaded with the # input, which must be between and including 0 and 255. # If the end of input has been signaled, then the # register C is endowed with a uniform value pattern # where every place is pregnant with the 1 bit. def in(inst) if (c = @file.getc) == nil @registers[inst.c] = 0xFFFFFFFF else @registers[inst.c] = c end #if (c = STDIN.getc) == nil # @registers[inst.c] = 0xFFFFFFFF #else # @registers[inst.c] = c #end end # #12. Load Program. # The array identified by the B register is duplicated # and the duplicate shall replace the '0' array, # regardless of size. The execution finger is placed # to indicate the platter of this array that is # described by the offset given in C, where the value # 0 denotes the first platter, 1 the second, et # cetera. # # The '0' array shall be the most sublime choice for # loading, and shall be handled with the utmost # velocity. def load(inst) # $stderr.puts self.to_s unless @registers[inst.b] == 0 # @platters[0] = @platters[@registers[inst.b]].dup unless @registers[inst.b] == 0 # @finger = @registers[inst.c] # $stderr.puts self.to_s + "\n\n" unless @registers[inst.b] == 0 $stderr.puts self.to_s unless @registers.at(inst.b) == 0 @platters[0] = @platters.at(@registers.at(inst.b)).dup unless @registers.at(inst.b) == 0 @finger = @registers.at(inst.c) $stderr.puts self.to_s + "\n\n" unless @registers.at(inst.b) == 0 end # #13. Orthography. # The value indicated is loaded into the register A # forthwith. def data(inst) @registers[inst.a13] = inst.data end def inv(inst) raise "inv:" + inst end end # This was our first instruction loader. It was then replaced with two # different FileArray implementations: first FileArrayCacher which # used a Ruby Generator to load instructions, and then the Array # subclass. Then we came back and rewrote this to use unpack('N*') and # things were better; it took about 0.5 seconds to load and cache the # whole file with no per-step hit. # # Of course, it turned out not to help enough in the end. class Runtime def Runtime.load(filename) #arr = [] #File.open(filename, 'r') do |f| # while not f.eof? # arr << f.read(4).unpack('N')[0] # end #end #arr File.open(filename, 'r').read.unpack('L*') end end # Our beautiful Instruction ADT. I'm convinced that without the heavy # designs we have in this file, we would have taken at least twice as # long to write the C. I'm not sure whether that's a substantial gain # or not. class Instruction attr_accessor :value @@OPCODE_NAMES = [:cond_mov, :index, :amend, :add, :mult, :div, :not_and, :halt, :alloc, :abandon, :out, :in, :load, :data, :inv, :inv] def initialize(value) @value = value end # This was our instruction disassembler routine. We ran this against # the codex once and then had a reference disassemble which we used # to step through the execution on a white board to check # ourselves. # # Later on I wrote another disassembler in OCaml which ran much # faster. Of course, it wasn't much use by then. def to_s sprintf("%-10s ", name) + if (opcode < 13) sprintf("%d %d %d", a, b, c) elsif (opcode == 13) sprintf("%d 0x%X", a13, data) else "" end end # Bitmath def opcode value >> 28 end def name @@OPCODE_NAMES[opcode] end def data value & 0x1ffffff end def a13 (value >> 25) & 7 end def a (value >> 6) & 7 end def b (value >> 3) & 7 end def c value & 7 end end # Observe the evolution: #m = MachineState.new(Runtime.load(ARGV[0])) #m = MachineState.new(FileArrayProxy.new(File.open(ARGV[0]))) m = MachineState.new(Runtime.load(ARGV[0])) # Debugging signals Signal.trap("USR1") {m.debug_out2; m.printing = true} Signal.trap("USR2") {m.printing = false} # The main loop while 1 m.step end