Skip to content

Instantly share code, notes, and snippets.

@Lohann
Last active July 28, 2025 21:42
Show Gist options
  • Select an option

  • Save Lohann/f01e2558691ba8648f1e5a7c6e8d37da to your computer and use it in GitHub Desktop.

Select an option

Save Lohann/f01e2558691ba8648f1e5a7c6e8d37da to your computer and use it in GitHub Desktop.
// Efficient SQRT method
// Constant gas cost of 331 discounting RETURN and CALLDATALOAD logic, or 349 including everything.
//
// Special thanks to @caironeth for have found a better log2(x) here:
// - https://github.com/Lohann/openzeppelin-contracts/pull/1
//
// For testing use this tool: https://www.evm.codes/playground?fork=cancun
// Authors:
// - Lohann Paterno Coutinho Ferreira <[email protected]>
// - Cairo <https://github.com/cairoeth>
// LOAD X
PUSH0
CALLDATALOAD
// ------ 2 ** (log2(x) / 2) ------
PUSH1 1 // stack: 1 x
DUP2 // stack: x 1 x
// Lookup Table, stack: table x 1 x
PUSH30 0x010102020202030303030303030300000000000000000000000000000000
// If value has upper 128 bits set, log2 result is at least 128
DUP2 // stack: x table x 1 x
PUSH16 0xffffffffffffffffffffffffffffffff // stack: 2**128-1 x table x 1 x
LT // stack: x>=2**128 table x 1 x
PUSH1 7 // stack: 7 x>=2**128 table x 1 x
SHL // stack: ±log2(x) table x 1 x
// If upper 64 bits of 128-bit half set, add 64 to result
// stack: log2(x) table x 1 x
DUP3 // stack: x ±log2(x) table x 1 x
DUP2 // stack: ±log2(x) x ±log2(x) table x 1 x
SHR // stack: x/±log2(x) ±log2(x) table x 1 x
PUSH8 0xffffffffffffffff // stack: 2**64-1 x/±log2(x) ±log2(x) table x 1 x
LT // stack: (x/2**±log2(x) >= 2**64) ±log2(x) table x 1 x
PUSH1 6 // stack: 6 (x/2**±log2(x) >= 2**64) ±log2(x) table x 1 x
SHL // stack: ±log2(x) ±log2(x) table x 1 x
OR // stack: ±log2(x) table x 1 x
// If upper 32 bits of 64-bit half set, add 32 to result
DUP3
DUP2
SHR
PUSH4 0xffffffff
LT
PUSH1 5
SHL
OR
// If upper 16 bits of 32-bit half set, add 16 to result
DUP3
DUP2
SHR
PUSH2 0xffff
LT
PUSH1 4
SHL
OR
// If upper 8 bits of 16-bit half set, add 8 to result
DUP3
DUP2
SHR
PUSH1 0xff
LT
PUSH1 3
SHL
OR
// If upper 4 bits of 8-bit half set, add 4 to result
DUP3
DUP2
SHR
PUSH1 0x0f
LT
PUSH1 2
SHL
OR // stack: ±log2(x) table x 1 x
// Table lookup
SWAP2 // stack: x table ±log2(x) 1 x
DUP3 // stack: ±log2(x) x table ±log2(x) 1 x
SHR // stack: i table ±log2(x) 1 x
BYTE // stack: table[i] ±log2(x) 1 x
OR // stack: floor(log2(x)) 1 x
// Compute: 2 ** (floor(log2(x)) / 2)
DUP2 // stack: 1 log2(x) 1 x
SHR // stack: log2(x)/2 1 x
SHL // stack: 2**(log2(x)/2) x
// ---------------------------------
// Newton's method
// r = (r + x/r) / 2
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
DUP1
DUP3
DIV
ADD
PUSH1 1
SHR
// y = x/r
DUP1
SWAP2
DIV
// min(r, x/r)
DUP2
GT
SWAP1
SUB
// Return result
PUSH0
MSTORE
PUSH1 32
PUSH0
RETURN
// SPDX-License-Identifier: GPL-2.0
// Gas efficient SQRT method, computes floor(sqrt(x)).
// Constant gas cost of 407 + solidity overhead.
//
// Author: Lohann Ferreira
pragma solidity >=0.7.0 <0.9.0;
contract Sqrt {
function sqrt(uint256 x) public pure returns (uint256 r) {
assembly ("memory-safe") {
// r = floor(log2(x))
r := shl(7, gt(x, 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF))
let xx := shr(r, x)
let rr := shl(6, gt(x, 0xFFFFFFFFFFFFFFFF))
xx := shr(rr, xx)
r := or(r, rr)
rr := shl(5, gt(xx, 0xFFFFFFFF))
xx := shr(rr, xx)
r := or(r, rr)
rr := shl(4, gt(xx, 0xFFFF))
xx := shr(rr, xx)
r := or(r, rr)
rr := shl(3, gt(xx, 0xFF))
xx := shr(rr, xx)
r := or(r, rr)
rr := shl(2, gt(xx, 0x0F))
xx := shr(rr, xx)
r := or(r, rr)
rr := shl(1, gt(xx, 0x03))
xx := shr(rr, xx)
r := or(r, rr)
r := shl(shr(1, r), 1)
// Newton's method
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
r := shr(1, add(r, div(x, r)))
// r = min(r, x/r)
r := sub(r, gt(r, div(x, r)))
}
}
}
@marmitar
Copy link

marmitar commented Jul 26, 2025

Caution

OUTDATED

Possible optimizations:

1. Instruction ordering (unnecessary swaps)

SWAP1 on line 70 may be skipped if you do that PUSH1 1 before the very first PUSH0 on line 4.

2. One additional Newton step

Instead of doing min(x, x/r) on lines 123-136, which costs 35 gas, you can do one additional step of Newton's method (20 gas) and average it with the last result (12 gas) like this:

// r' = (r + x / r) >> 1
dup1
swap2
div
dup2 // save r
add
push1 1
shr
// r = (r + r') / 2
add
push1 1
shr

This works because min(x, x/r) is a guard for divergent sequences which appear for all $x$ such that $x + 1$ is a perfect square. Is this case, the divergent sequence oscillates between $\lfloor\sqrt{x}\rfloor$ and $\lfloor\sqrt{x}\rfloor + 1$, and the average of them is just $\lfloor\sqrt{x}\rfloor$.

Tip

Actually, your old implementation is even more efficient: r := sub(r, gt(r, div(x, r))):

// y = x/r
DUP1
SWAP2
DIV

// min(r, x/r)
DUP2
GT
SWAP1
SUB

@Lohann
Copy link
Author

Lohann commented Jul 26, 2025

Thank you for the heads up @marmitar !! I just noticed that I pick the wrong code, the actual latest version born from a discussion I had with @caironeth here: Lohann/openzeppelin-contracts#1 (comment), the actual GAS savings between this and latest version are thanks to him for finding a better constant gas LOG2 method.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment