marcheiligers · November 27, 2025 22:50
diff --git a/sha2_256.rb b/sha2_256.rb
 # https://en.wikipedia.org/wiki/SHA-2
 # with additional hints from:
 #   - https://github.com/ruby/rubygems/pull/4989/files
 #   - https://github.com/kaloos/sha256/blob/master/lib/TBAddress.rb
 #   - https://github.com/eliblurr/sha256-algorithm
 #
 # Note 1: All variables are 32 bit unsigned integers and addition is calculated modulo 232
 # Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 ≤ i ≤ 63
 # Note 3: The compression function uses 8 working variables, a through h
 # Note 4: Big-endian convention is used when expressing the constants in this pseudocode,
 #     and when parsing message block data from bytes to words, for example,
 #     the first word of the input message "abc" after padding is 0x61626380

 # Initialize hash values:
 # (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
 # h0 := 0x6a09e667
 # h1 := 0xbb67ae85
 # h2 := 0x3c6ef372
 # h3 := 0xa54ff53a
 # h4 := 0x510e527f
 # h5 := 0x9b05688c
 # h6 := 0x1f83d9ab
 # h7 := 0x5be0cd19
 # Marc says: done below in the digest method

 module SHA2_256
  extend self
  # Initialize array of round constants:
  # (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
  # k[0..63] :=
  #    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  #    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  #    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  #    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  #    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  #    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  #    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  #    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2

  K = [
    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  ]

  # Pre-processing (Padding):
  # begin with the original message of length L bits
  # append a single '1' bit
  # append K '0' bits, where K is the minimum number >= 0 such that (L + 1 + K + 64) is a multiple of 512
  # append L as a 64-bit big-endian integer, making the total post-processed length a multiple of 512 bits
  # such that the bits in the message are: <original message of length L> 1 <K zeros> <L as 64 bit integer> ,
  #   (the number of bits will be a multiple of 512)

  # Marc says: our strings are always byte aligned, so
  #   1) we'll be working with complete bytes
  #   2) the single 1 bit with 7 zeros is 0b10000000 or 0x80 or 128
  #   3) padding up with zeros 0b00000000, 0x0, or 0
  #   4) L + 1 + K + 64 % 512 == 0 translates to l + 1 + k + 8 % 64 == 0, so we pad up to l + 1 + k % 56 == 0
  #   5) pack('Q>') converts a number to 64-bit big endian

  def digest(message)
    bytes = message.bytes
    l = bytes.length
    bytes.concat([0x80]).concat([0] * ((56 - (bytes.length % 64)) % 64)).concat([l * 8].pack('Q>').bytes)
 puts "bytes.length #{bytes.length}"

    # Marc says: from above, line 10
    h0 = 0x6a09e667
    h1 = 0xbb67ae85
    h2 = 0x3c6ef372
    h3 = 0xa54ff53a
    h4 = 0x510e527f
    h5 = 0x9b05688c
    h6 = 0x1f83d9ab
    h7 = 0x5be0cd19
  # Process the message in successive 512-bit chunks:
  # break message into 512-bit chunks
  # for each chunk
  # Marc says: 512-bit chunks == 64 byte chunks
  # Marc says: mruby doesn't have step
  #   also while is faster in DragonRuby
    # (0...bytes.length).step(64) do |offset|
    offset = -64
    while (offset += 64) < bytes.length
  #     create a 64-entry message schedule array w[0..63] of 32-bit words
  #     (The initial values in w[0..63] don't matter, so many implementations zero them here)
      chunk = bytes[offset, 64]
      w = Array.new(64, 0)
  #     copy chunk into first 16 words w[0..15] of the message schedule array
  # Marc says: 32-bit words, presumably still big endian. 32-bits is 4 bytes
  #   pack('C*') packs the 8-bit unsinged bytes - 'C4' would also work since we know there are 4 in each word
      # (0...chunk.length).step(4).with_index do |word_offset, i|
      word_offset = -4
      i = -1
      while (word_offset += 4) < 64
        w[i += 1] = chunk[word_offset, 4].pack('C*').unpack1('L>')
      end

  #     Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
  #     for i from 16 to 63
  #         s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
  #         s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
  #         w[i] := w[i-16] + s0 + w[i-7] + s1

      # (16..63).each do |i|
      i = 15
      while (i += 1) < 64
        s0 = right_rotate(w[i - 15], 7) ^ right_rotate(w[i - 15], 18) ^ (w[i - 15] >> 3)
        s1 = right_rotate(w[i - 2], 17) ^ right_rotate(w[i - 2], 19) ^ (w[i - 2] >> 10)
        w[i] = (w[i - 16] + s0 + w[i - 7] + s1) & 0xFFFFFFFF
      end
  #     Initialize working variables to current hash value:
  #     a := h0
  #     b := h1
  #     c := h2
  #     d := h3
  #     e := h4
  #     f := h5
  #     g := h6
  #     h := h7
      a = h0
      b = h1
      c = h2
      d = h3
      e = h4
      f = h5
      g = h6
      h = h7

  #     Compression function main loop:
  #     for i from 0 to 63
  #         S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
  #         ch := (e and f) xor ((not e) and g)
  #         temp1 := h + S1 + ch + k[i] + w[i]
  #         S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
  #         maj := (a and b) xor (a and c) xor (b and c)
  #         temp2 := S0 + maj

  #         h := g
  #         g := f
  #         f := e
  #         e := d + temp1
  #         d := c
  #         c := b
  #         b := a
  #         a := temp1 + temp2
  # Marc says: the `& 0xffffffff` are to ensure we don't overflow the words
      # 64.times do |i|
      i = -1
      while (i += 1) < 64
        s1 = right_rotate(e, 6) ^ right_rotate(e, 11) ^ right_rotate(e, 25)
        ch = (e & f) ^ ((~e) & g)
        temp1 = h + s1 + ch + K[i] + w[i]
        s0 = right_rotate(a, 2) ^ right_rotate(a, 13) ^ right_rotate(a, 22)
        maj = (a & b) ^ (a & c) ^ (b & c)
        temp2 = s0 + maj

        h = g
        g = f
        f = e
        e = (d + temp1) & 0xffffffff
        d = c
        c = b
        b = a
        a = (temp1 + temp2) & 0xffffffff
      end

  #     Add the compressed chunk to the current hash value:
  #     h0 := h0 + a
  #     h1 := h1 + b
  #     h2 := h2 + c
  #     h3 := h3 + d
  #     h4 := h4 + e
  #     h5 := h5 + f
  #     h6 := h6 + g
  #     h7 := h7 + h
      h0 = (h0 + a) & 0xffffffff
      h1 = (h1 + b) & 0xffffffff
      h2 = (h2 + c) & 0xffffffff
      h3 = (h3 + d) & 0xffffffff
      h4 = (h4 + e) & 0xffffffff
      h5 = (h5 + f) & 0xffffffff
      h6 = (h6 + g) & 0xffffffff
      h7 = (h7 + h) & 0xffffffff
    end

  # Produce the final hash value (big-endian):
  # digest := hash := h0 append h1 append h2 append h3 append h4 append h5 append h6 append h7
    [h0, h1, h2, h3, h4, h5, h6, h7].pack('N*')
  end

  def hexdigest(message)
    digest(message).unpack1('H*')
  end

  def hexdigest_upper(message)
    hexdigest(message).upcase
  end

 private 
  
  def right_rotate(value, count)
    ((value >> count) | (value << (32 - count))) & 0xffffffff
  end
 end
	# https://en.wikipedia.org/wiki/SHA-2
	# with additional hints from:
	# - https://github.com/ruby/rubygems/pull/4989/files
	# - https://github.com/kaloos/sha256/blob/master/lib/TBAddress.rb
	# - https://github.com/eliblurr/sha256-algorithm
	#
	# Note 1: All variables are 32 bit unsigned integers and addition is calculated modulo 232
	# Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 ≤ i ≤ 63
	# Note 3: The compression function uses 8 working variables, a through h
	# Note 4: Big-endian convention is used when expressing the constants in this pseudocode,
	# and when parsing message block data from bytes to words, for example,
	# the first word of the input message "abc" after padding is 0x61626380

	# Initialize hash values:
	# (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
	# h0 := 0x6a09e667
	# h1 := 0xbb67ae85
	# h2 := 0x3c6ef372
	# h3 := 0xa54ff53a
	# h4 := 0x510e527f
	# h5 := 0x9b05688c
	# h6 := 0x1f83d9ab
	# h7 := 0x5be0cd19
	# Marc says: done below in the digest method

	module SHA2_256
	extend self
	# Initialize array of round constants:
	# (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
	# k[0..63] :=
	# 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	# 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
	# 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
	# 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
	# 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
	# 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
	# 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
	# 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2

	K = [
	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
	]

	# Pre-processing (Padding):
	# begin with the original message of length L bits
	# append a single '1' bit
	# append K '0' bits, where K is the minimum number >= 0 such that (L + 1 + K + 64) is a multiple of 512
	# append L as a 64-bit big-endian integer, making the total post-processed length a multiple of 512 bits
	# such that the bits in the message are: <original message of length L> 1 <K zeros> <L as 64 bit integer> ,
	# (the number of bits will be a multiple of 512)

	# Marc says: our strings are always byte aligned, so
	# 1) we'll be working with complete bytes
	# 2) the single 1 bit with 7 zeros is 0b10000000 or 0x80 or 128
	# 3) padding up with zeros 0b00000000, 0x0, or 0
	# 4) L + 1 + K + 64 % 512 == 0 translates to l + 1 + k + 8 % 64 == 0, so we pad up to l + 1 + k % 56 == 0
	# 5) pack('Q>') converts a number to 64-bit big endian

	def digest(message)
	bytes = message.bytes
	l = bytes.length
	bytes.concat([0x80]).concat([0] * ((56 - (bytes.length % 64)) % 64)).concat([l * 8].pack('Q>').bytes)
	puts "bytes.length #{bytes.length}"

	# Marc says: from above, line 10
	h0 = 0x6a09e667
	h1 = 0xbb67ae85
	h2 = 0x3c6ef372
	h3 = 0xa54ff53a
	h4 = 0x510e527f
	h5 = 0x9b05688c
	h6 = 0x1f83d9ab
	h7 = 0x5be0cd19
	# Process the message in successive 512-bit chunks:
	# break message into 512-bit chunks
	# for each chunk
	# Marc says: 512-bit chunks == 64 byte chunks
	# Marc says: mruby doesn't have step
	# also while is faster in DragonRuby
	# (0...bytes.length).step(64) do \|offset\|
	offset = -64
	while (offset += 64) < bytes.length
	# create a 64-entry message schedule array w[0..63] of 32-bit words
	# (The initial values in w[0..63] don't matter, so many implementations zero them here)
	chunk = bytes[offset, 64]
	w = Array.new(64, 0)
	# copy chunk into first 16 words w[0..15] of the message schedule array
	# Marc says: 32-bit words, presumably still big endian. 32-bits is 4 bytes
	# pack('C*') packs the 8-bit unsinged bytes - 'C4' would also work since we know there are 4 in each word
	# (0...chunk.length).step(4).with_index do \|word_offset, i\|
	word_offset = -4
	i = -1
	while (word_offset += 4) < 64
	w[i += 1] = chunk[word_offset, 4].pack('C*').unpack1('L>')
	end

	# Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
	# for i from 16 to 63
	# s0 := (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
	# s1 := (w[i-2] rightrotate 17) xor (w[i-2] rightrotate 19) xor (w[i-2] rightshift 10)
	# w[i] := w[i-16] + s0 + w[i-7] + s1

	# (16..63).each do \|i\|
	i = 15
	while (i += 1) < 64
	s0 = right_rotate(w[i - 15], 7) ^ right_rotate(w[i - 15], 18) ^ (w[i - 15] >> 3)
	s1 = right_rotate(w[i - 2], 17) ^ right_rotate(w[i - 2], 19) ^ (w[i - 2] >> 10)
	w[i] = (w[i - 16] + s0 + w[i - 7] + s1) & 0xFFFFFFFF
	end
	# Initialize working variables to current hash value:
	# a := h0
	# b := h1
	# c := h2
	# d := h3
	# e := h4
	# f := h5
	# g := h6
	# h := h7
	a = h0
	b = h1
	c = h2
	d = h3
	e = h4
	f = h5
	g = h6
	h = h7

	# Compression function main loop:
	# for i from 0 to 63
	# S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
	# ch := (e and f) xor ((not e) and g)
	# temp1 := h + S1 + ch + k[i] + w[i]
	# S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
	# maj := (a and b) xor (a and c) xor (b and c)
	# temp2 := S0 + maj

	# h := g
	# g := f
	# f := e
	# e := d + temp1
	# d := c
	# c := b
	# b := a
	# a := temp1 + temp2
	# Marc says: the `& 0xffffffff` are to ensure we don't overflow the words
	# 64.times do \|i\|
	i = -1
	while (i += 1) < 64
	s1 = right_rotate(e, 6) ^ right_rotate(e, 11) ^ right_rotate(e, 25)
	ch = (e & f) ^ ((~e) & g)
	temp1 = h + s1 + ch + K[i] + w[i]
	s0 = right_rotate(a, 2) ^ right_rotate(a, 13) ^ right_rotate(a, 22)
	maj = (a & b) ^ (a & c) ^ (b & c)
	temp2 = s0 + maj

	h = g
	g = f
	f = e
	e = (d + temp1) & 0xffffffff
	d = c
	c = b
	b = a
	a = (temp1 + temp2) & 0xffffffff
	end

	# Add the compressed chunk to the current hash value:
	# h0 := h0 + a
	# h1 := h1 + b
	# h2 := h2 + c
	# h3 := h3 + d
	# h4 := h4 + e
	# h5 := h5 + f
	# h6 := h6 + g
	# h7 := h7 + h
	h0 = (h0 + a) & 0xffffffff
	h1 = (h1 + b) & 0xffffffff
	h2 = (h2 + c) & 0xffffffff
	h3 = (h3 + d) & 0xffffffff
	h4 = (h4 + e) & 0xffffffff
	h5 = (h5 + f) & 0xffffffff
	h6 = (h6 + g) & 0xffffffff
	h7 = (h7 + h) & 0xffffffff
	end

	# Produce the final hash value (big-endian):
	# digest := hash := h0 append h1 append h2 append h3 append h4 append h5 append h6 append h7
	[h0, h1, h2, h3, h4, h5, h6, h7].pack('N*')
	end

	def hexdigest(message)
	digest(message).unpack1('H*')
	end

	def hexdigest_upper(message)
	hexdigest(message).upcase
	end

	private

	def right_rotate(value, count)
	((value >> count) \| (value << (32 - count))) & 0xffffffff
	end
	end
No results found