#!/opt/local/bin/ruby
# Abstraction Anonymous
# Initial Ruby VM Implementation
# 2006-07-24 @ 2:43 PM
# Daniel Lyons, Bill Weiss and David Baird
# <fusion AT storytotell dot org>
# This is pretty gross, because it has a lot of stillborn ideas in
# it. Ultimately we wound up scrapping it altogether and writing a C
# implementation which rocked our world. Check it out: um.c.
#
# Amid the chaos there are some interesting ideas which people might
# want to rip off. I'll discuss the gross with the glorious.
# This is for the Ruby profiler, which totally rocks. Unfortunately,
# it also was one of the things which convinced us Ruby just wasn't
# powerful enough.
#require 'profile'
# There were several FileArrayProxy implementations before we arrived
# at this one. I know you aren't supposed to subclass Array but it was
# simply the easiest way to achieve to achieve this.
class FileArrayProxy < Array
def initialize(file)
@file = file
super()
end
# This method was way slow, because of calling read(4).unpack('N')
# Let's think about what really happens when we call
# read(4).unpack('N'):
#
# read(1) -> string * 4
# unpack('N') -> load the string into a byte array and screw with
# it
#
# Compare to this:
#
# File.read.unpack('N*')
#
# That loads all of the file into big endian integers in one
# pass. Much faster.
def [](i)
(i+1 - length).times do
#self << @file.read(4).unpack("N")[0]
self << ((@file.getc << 24) | (@file.getc << 16) | (@file.getc << 8) | (@file.getc))
end
super
end
# Oh, right. All of the methods on array wound up needing to be
# overridden to call [] and load the rest of the array to that location
def []=(i, val)
self[i]
super
end
def index(i)
self[i]
super
end
def insert(i, b)
self[i]
super
end
end
# We learned in this project that every Ruby program begins by molding
# the built-in types into what you need for this particular program.
#
# This reminds me of Lisp, a lot.
class Array
# Basically, we rewrote Array's print method to print itself as an
# array rather than as a string, which it does by defaut
def to_s
return '[' + self.collect { |x| x.to_s }.join(', ') + ']'
end
end
# We wanted nil to print "nil" rather than "" when we print it. Easy.
class NilClass
def to_s
'nil'
end
end
# We also changed both integer types to display themselves in
# hex. This took far more work than it should have, because the to_hex
# function depended on the old to_s method, and we were rewriting the
# new to_s method to call to_hex. And thus we entered the Hell of Many
# Aliases.
class Fixnum
alias old_to_s to_s
def to_hex
'0x' + old_to_s(16)
end
alias to_s to_hex
end
class Bignum
alias old_to_s to_s
def too_big?
(self >> 32) != 0
end
# One different thing about Bignum: we wanted it to tell us if it
# was bigger than 1 bit larger than Fixnum. In retrospect, this may
# have been a part of the performance hit.
def to_hex
(too_big? ? 'BIG: ' : '') + '0x' + old_to_s(16)
end
alias to_s to_hex
end
# This object represents, well, the machine. :) It keeps track of the
# registers, finger, and platters. Ruby's memory management meant we
# didn't have to think particularly hard about handling the
# platters. The printing bit was added later for some signal-based
# debugging: we would "kill -USR1 um.rb" to get it to spit out what it
# was working on more cheaply than printing it out always.
class MachineState
attr_accessor :registers, :finger, :platters, :printing
def initialize(stream)
@printing = false
@registers = Array.new(8,0) # eight registers of 0
@platters = [stream]
@finger = 0
@file = File.open("key", "r")
end
# We wrote this to facilitate debugging the machine. Platters tended
# to either be very small or very huge, so we only printed it when we
# really needed to.
def to_s(with_platters=false)
"Instruction: #{Instruction.new(@platters[0][@finger-1]).to_s}\n" +
"Registers: #{@registers.to_s}\n" +
(with_platters ? "Platters: #{@platters[1..-1]}\n" : "") +
"Finger: #{@finger}\n"
end
# A couple of convenience methods
def debug_out
$stderr.puts self + "\n"
end
def debug_out2
$stderr.puts self.to_s(true) + "\n"
end
# Step the execution forward by one instruction. This method was
# rewritten a lot.
def step
# ins = Instruction.new(@platters[0][@finger])
ins = Instruction.new(@platters.at(0).at(@finger))
@finger += 1
execute(ins)
end
# We used Ruby's dispatch method to handle executing
# instructions. The line
#
# send(inst.name, inst)
#
# deserves some special attention. Using the Instruction ADT we
# wrote, which has a Instruction#name method which returns a symbol
# of the instruction name, we can then apply that instruction to the
# MachineState, passing in the instruction itself. We thought it was
# a pretty classy design, but it seemed to add ~12% overhead.
def execute(inst)
# $stderr.puts self.to_s(true) + "\n\n"
send(inst.name, inst)
end
# From here, we started putting the spec into the code so we could
# check it more easily. It turned out not to be broken.
# #0. Conditional Move.
# The register A receives the value in register B,
# unless the register C contains 0.
def cond_mov(inst)
# unless (@registers[inst.c] == 0)
unless (@registers.at(inst.c) == 0)
# @registers[inst.a] = @registers[inst.b]
@registers[inst.a] = @registers.at(inst.b)
end
end
# #1. Array Index.
# The register A receives the value stored at offset
# in register C in the array identified by B.
def index(inst)
@registers[inst.a] = @platters.at(@registers.at(inst.b)).at(@registers.at(inst.c))
# @registers[inst.a] = @platters[@registers[inst.b]][@registers[inst.c]]
end
# #2. Array Amendment.
# The array identified by A is amended at the offset
# in register B to store the value in register C.
def amend(inst)
# @platters[@registers[inst.a]][@registers[inst.b]] = @registers[inst.c]
@platters.at(@registers.at(inst.a))[@registers.at(inst.b)] = @registers.at(inst.c)
end
# #3. Addition.
# The register A receives the value in register B plus
# the value in register C, modulo 2^32.
def add(inst)
# @registers[inst.a] = (@registers[inst.b] + @registers[inst.c]) % (1 << 32)
@registers[inst.a] = (@registers.at(inst.b) + @registers.at(inst.c)) % (1 << 32)
end
# #4. Multiplication.
# The register A receives the value in register B times
# the value in register C, modulo 2^32.
def mult(inst)
# @registers[inst.a] = (@registers[inst.b] * @registers[inst.c]) % (1 << 32)
@registers[inst.a] = (@registers.at(inst.b) * @registers.at(inst.c)) % (1 << 32)
end
# #5. Division.
# The register A receives the value in register B
# divided by the value in register C, if any, where
# each quantity is treated treated as an unsigned 32
# bit number.
def div(inst)
# @registers[inst.a] = ((@registers[inst.b] % (1 << 32)) / (@registers[inst.c] % (1 << 32))) % (1 << 32)
@registers[inst.a] = (@registers.at(inst.b) / @registers.at(inst.c)) % (1 << 32)
end
# #6. Not-And.
# Each bit in the register A receives the 1 bit if
# either register B or register C has a 0 bit in that
# position. Otherwise the bit in register A receives
# the 0 bit.
def not_and(inst)
# @registers[inst.a] = (~ (@registers[inst.b] & @registers[inst.c])) % (1 << 32)
@registers[inst.a] = (~ (@registers.at(inst.b) & @registers.at(inst.c))) % (1 << 32)
end
# #7. Halt.
# The universal machine stops computation.
def halt(inst)
raise "Halt encountered"
end
# #8. Allocation.
# A new array is created with a capacity of platters
# commensurate to the value in the register C. This
# new array is initialized entirely with platters
# holding the value 0. A bit pattern not consisting of
# exclusively the 0 bit, and that identifies no other
# active allocated array, is placed in the B register.
def alloc(inst)
if (i = @platters.index(nil))
# @platters[i] = Array.new(@registers[inst.c], 0)
@platters[i] = Array.new(@registers.at(inst.c), 0)
@registers[inst.b] = i
else
# @platters << Array.new(@registers[inst.c], 0)
@platters << Array.new(@registers.at(inst.c), 0)
@registers[inst.b] = @platters.length - 1
end
end
# #9. Abandonment.
# The array identified by the register C is abandoned.
# Future allocations may then reuse that identifier.
def abandon(inst)
# @platters[@registers[inst.c]] = nil
@platters[@registers.at(inst.c)] = nil
end
# #10. Output.
# The value in the register C is displayed on the console
# immediately. Only values between and including 0 and 255
# are allowed.
def out(inst)
# putc @registers[inst.c] unless @registers[inst.c] > 255
putc @registers.at(inst.c) unless @registers.at(inst.c) > 255
$stdout.flush
$stderr.puts "Wrote #{@registers[inst.c].chr} at Time: #{Time.now}"
end
# #11. Input.
# The universal machine waits for input on the console.
# When input arrives, the register C is loaded with the
# input, which must be between and including 0 and 255.
# If the end of input has been signaled, then the
# register C is endowed with a uniform value pattern
# where every place is pregnant with the 1 bit.
def in(inst)
if (c = @file.getc) == nil
@registers[inst.c] = 0xFFFFFFFF
else
@registers[inst.c] = c
end
#if (c = STDIN.getc) == nil
# @registers[inst.c] = 0xFFFFFFFF
#else
# @registers[inst.c] = c
#end
end
# #12. Load Program.
# The array identified by the B register is duplicated
# and the duplicate shall replace the '0' array,
# regardless of size. The execution finger is placed
# to indicate the platter of this array that is
# described by the offset given in C, where the value
# 0 denotes the first platter, 1 the second, et
# cetera.
#
# The '0' array shall be the most sublime choice for
# loading, and shall be handled with the utmost
# velocity.
def load(inst)
# $stderr.puts self.to_s unless @registers[inst.b] == 0
# @platters[0] = @platters[@registers[inst.b]].dup unless @registers[inst.b] == 0
# @finger = @registers[inst.c]
# $stderr.puts self.to_s + "\n\n" unless @registers[inst.b] == 0
$stderr.puts self.to_s unless @registers.at(inst.b) == 0
@platters[0] = @platters.at(@registers.at(inst.b)).dup unless @registers.at(inst.b) == 0
@finger = @registers.at(inst.c)
$stderr.puts self.to_s + "\n\n" unless @registers.at(inst.b) == 0
end
# #13. Orthography.
# The value indicated is loaded into the register A
# forthwith.
def data(inst)
@registers[inst.a13] = inst.data
end
def inv(inst)
raise "inv:" + inst
end
end
# This was our first instruction loader. It was then replaced with two
# different FileArray implementations: first FileArrayCacher which
# used a Ruby Generator to load instructions, and then the Array
# subclass. Then we came back and rewrote this to use unpack('N*') and
# things were better; it took about 0.5 seconds to load and cache the
# whole file with no per-step hit.
#
# Of course, it turned out not to help enough in the end.
class Runtime
def Runtime.load(filename)
#arr = []
#File.open(filename, 'r') do |f|
# while not f.eof?
# arr << f.read(4).unpack('N')[0]
# end
#end
#arr
File.open(filename, 'r').read.unpack('L*')
end
end
# Our beautiful Instruction ADT. I'm convinced that without the heavy
# designs we have in this file, we would have taken at least twice as
# long to write the C. I'm not sure whether that's a substantial gain
# or not.
class Instruction
attr_accessor :value
@@OPCODE_NAMES = [:cond_mov, :index, :amend,
:add, :mult, :div, :not_and,
:halt, :alloc, :abandon, :out, :in,
:load, :data, :inv, :inv]
def initialize(value)
@value = value
end
# This was our instruction disassembler routine. We ran this against
# the codex once and then had a reference disassemble which we used
# to step through the execution on a white board to check
# ourselves.
#
# Later on I wrote another disassembler in OCaml which ran much
# faster. Of course, it wasn't much use by then.
def to_s
sprintf("%-10s ", name) +
if (opcode < 13)
sprintf("%d %d %d", a, b, c)
elsif (opcode == 13)
sprintf("%d 0x%X", a13, data)
else
""
end
end
# Bitmath
def opcode
value >> 28
end
def name
@@OPCODE_NAMES[opcode]
end
def data
value & 0x1ffffff
end
def a13
(value >> 25) & 7
end
def a
(value >> 6) & 7
end
def b
(value >> 3) & 7
end
def c
value & 7
end
end
# Observe the evolution:
#m = MachineState.new(Runtime.load(ARGV[0]))
#m = MachineState.new(FileArrayProxy.new(File.open(ARGV[0])))
m = MachineState.new(Runtime.load(ARGV[0]))
# Debugging signals
Signal.trap("USR1") {m.debug_out2; m.printing = true}
Signal.trap("USR2") {m.printing = false}
# The main loop
while 1
m.step
end