Last active
July 28, 2025 21:42
-
-
Save Lohann/f01e2558691ba8648f1e5a7c6e8d37da to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Efficient SQRT method | |
| // Constant gas cost of 331 discounting RETURN and CALLDATALOAD logic, or 349 including everything. | |
| // | |
| // Special thanks to @caironeth for have found a better log2(x) here: | |
| // - https://github.com/Lohann/openzeppelin-contracts/pull/1 | |
| // | |
| // For testing use this tool: https://www.evm.codes/playground?fork=cancun | |
| // Authors: | |
| // - Lohann Paterno Coutinho Ferreira <[email protected]> | |
| // - Cairo <https://github.com/cairoeth> | |
| // LOAD X | |
| PUSH0 | |
| CALLDATALOAD | |
| // ------ 2 ** (log2(x) / 2) ------ | |
| PUSH1 1 // stack: 1 x | |
| DUP2 // stack: x 1 x | |
| // Lookup Table, stack: table x 1 x | |
| PUSH30 0x010102020202030303030303030300000000000000000000000000000000 | |
| // If value has upper 128 bits set, log2 result is at least 128 | |
| DUP2 // stack: x table x 1 x | |
| PUSH16 0xffffffffffffffffffffffffffffffff // stack: 2**128-1 x table x 1 x | |
| LT // stack: x>=2**128 table x 1 x | |
| PUSH1 7 // stack: 7 x>=2**128 table x 1 x | |
| SHL // stack: ±log2(x) table x 1 x | |
| // If upper 64 bits of 128-bit half set, add 64 to result | |
| // stack: log2(x) table x 1 x | |
| DUP3 // stack: x ±log2(x) table x 1 x | |
| DUP2 // stack: ±log2(x) x ±log2(x) table x 1 x | |
| SHR // stack: x/±log2(x) ±log2(x) table x 1 x | |
| PUSH8 0xffffffffffffffff // stack: 2**64-1 x/±log2(x) ±log2(x) table x 1 x | |
| LT // stack: (x/2**±log2(x) >= 2**64) ±log2(x) table x 1 x | |
| PUSH1 6 // stack: 6 (x/2**±log2(x) >= 2**64) ±log2(x) table x 1 x | |
| SHL // stack: ±log2(x) ±log2(x) table x 1 x | |
| OR // stack: ±log2(x) table x 1 x | |
| // If upper 32 bits of 64-bit half set, add 32 to result | |
| DUP3 | |
| DUP2 | |
| SHR | |
| PUSH4 0xffffffff | |
| LT | |
| PUSH1 5 | |
| SHL | |
| OR | |
| // If upper 16 bits of 32-bit half set, add 16 to result | |
| DUP3 | |
| DUP2 | |
| SHR | |
| PUSH2 0xffff | |
| LT | |
| PUSH1 4 | |
| SHL | |
| OR | |
| // If upper 8 bits of 16-bit half set, add 8 to result | |
| DUP3 | |
| DUP2 | |
| SHR | |
| PUSH1 0xff | |
| LT | |
| PUSH1 3 | |
| SHL | |
| OR | |
| // If upper 4 bits of 8-bit half set, add 4 to result | |
| DUP3 | |
| DUP2 | |
| SHR | |
| PUSH1 0x0f | |
| LT | |
| PUSH1 2 | |
| SHL | |
| OR // stack: ±log2(x) table x 1 x | |
| // Table lookup | |
| SWAP2 // stack: x table ±log2(x) 1 x | |
| DUP3 // stack: ±log2(x) x table ±log2(x) 1 x | |
| SHR // stack: i table ±log2(x) 1 x | |
| BYTE // stack: table[i] ±log2(x) 1 x | |
| OR // stack: floor(log2(x)) 1 x | |
| // Compute: 2 ** (floor(log2(x)) / 2) | |
| DUP2 // stack: 1 log2(x) 1 x | |
| SHR // stack: log2(x)/2 1 x | |
| SHL // stack: 2**(log2(x)/2) x | |
| // --------------------------------- | |
| // Newton's method | |
| // r = (r + x/r) / 2 | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| DUP1 | |
| DUP3 | |
| DIV | |
| ADD | |
| PUSH1 1 | |
| SHR | |
| // y = x/r | |
| DUP1 | |
| SWAP2 | |
| DIV | |
| // min(r, x/r) | |
| DUP2 | |
| GT | |
| SWAP1 | |
| SUB | |
| // Return result | |
| PUSH0 | |
| MSTORE | |
| PUSH1 32 | |
| PUSH0 | |
| RETURN |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // SPDX-License-Identifier: GPL-2.0 | |
| // Gas efficient SQRT method, computes floor(sqrt(x)). | |
| // Constant gas cost of 407 + solidity overhead. | |
| // | |
| // Author: Lohann Ferreira | |
| pragma solidity >=0.7.0 <0.9.0; | |
| contract Sqrt { | |
| function sqrt(uint256 x) public pure returns (uint256 r) { | |
| assembly ("memory-safe") { | |
| // r = floor(log2(x)) | |
| r := shl(7, gt(x, 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF)) | |
| let xx := shr(r, x) | |
| let rr := shl(6, gt(x, 0xFFFFFFFFFFFFFFFF)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| rr := shl(5, gt(xx, 0xFFFFFFFF)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| rr := shl(4, gt(xx, 0xFFFF)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| rr := shl(3, gt(xx, 0xFF)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| rr := shl(2, gt(xx, 0x0F)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| rr := shl(1, gt(xx, 0x03)) | |
| xx := shr(rr, xx) | |
| r := or(r, rr) | |
| r := shl(shr(1, r), 1) | |
| // Newton's method | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| r := shr(1, add(r, div(x, r))) | |
| // r = min(r, x/r) | |
| r := sub(r, gt(r, div(x, r))) | |
| } | |
| } | |
| } |
Author
Thank you for the heads up @marmitar !! I just noticed that I pick the wrong code, the actual latest version born from a discussion I had with @caironeth here: Lohann/openzeppelin-contracts#1 (comment), the actual GAS savings between this and latest version are thanks to him for finding a better constant gas LOG2 method.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Caution
OUTDATED
Possible optimizations:
1. Instruction ordering (unnecessary swaps)
SWAP1on line 70 may be skipped if you do thatPUSH1 1before the very firstPUSH0on line 4.2. One additional Newton step
Instead of doing
min(x, x/r)on lines 123-136, which costs 35 gas, you can do one additional step of Newton's method (20 gas) and average it with the last result (12 gas) like this:This works because$x$ such that $x + 1$ is a perfect square. Is this case, the divergent sequence oscillates between $\lfloor\sqrt{x}\rfloor$ and $\lfloor\sqrt{x}\rfloor + 1$ , and the average of them is just $\lfloor\sqrt{x}\rfloor$ .
min(x, x/r)is a guard for divergent sequences which appear for allTip
Actually, your old implementation is even more efficient:
r := sub(r, gt(r, div(x, r))):