Skip to content

Instantly share code, notes, and snippets.

@ar1ja
Created December 7, 2025 15:43
Show Gist options
  • Select an option

  • Save ar1ja/e60599fc52185043cf411e51bae84c86 to your computer and use it in GitHub Desktop.

Select an option

Save ar1ja/e60599fc52185043cf411e51bae84c86 to your computer and use it in GitHub Desktop.
Safe and Sophie Germain prime number generator
#!/bin/sh
set -eux
main() {
rm -f default.profraw genprime.profdata
clang -o genprime -fprofile-instr-generate -Wpedantic -flto=full -fno-trapping-math -fno-math-errno -fno-stack-check -fno-strict-overflow -funroll-loops -fno-stack-protector -fvisibility-inlines-hidden -mfancy-math-387 -fomit-frame-pointer -fstrict-overflow -Wshadow -fno-exceptions -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -Wall -Wextra -fno-signed-zeros -fno-strict-aliasing -pedantic -O3 -fvisibility=hidden -ffast-math -funsafe-math-optimizations -std=c99 -fno-asynchronous-unwind-tables -Werror -fdiscard-value-names -femit-all-decls -fmerge-all-constants -fno-use-cxa-atexit -fno-use-init-array -march=native -mtune=native -pedantic-errors genprime.c -lgmp -lpthread -s -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries
llvm-strip --strip-debug --strip-sections --strip-unneeded -T --remove-section=.note.gnu.gold-version --remove-section=.note --strip-all --discard-locals --remove-section=.gnu.version --remove-section=.eh_frame --remove-section=.note.gnu.build-id --remove-section=.note.ABI-tag --strip-symbol=__gmon_start__ --strip-all-gnu --remove-section=.comment --remove-section=.eh_frame_ptr --discard-all genprime
echo 'Generating optimisation profile. DO NOT interrupt! This could take anywhere between 1 and 10 minutes depending on your CPU. Or you can edit the ./genprime parameters in this script (such as 128)'
./genprime 512 "$(nproc)"
llvm-profdata merge -o genprime.profdata default.profraw
clang -o genprime -fprofile-instr-use=genprime.profdata -Wpedantic -flto=full -fno-trapping-math -fno-math-errno -fno-stack-check -fno-strict-overflow -funroll-loops -fno-stack-protector -fvisibility-inlines-hidden -mfancy-math-387 -fomit-frame-pointer -fstrict-overflow -Wshadow -fno-exceptions -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -Wall -Wextra -fno-signed-zeros -fno-strict-aliasing -pedantic -O3 -fvisibility=hidden -ffast-math -funsafe-math-optimizations -std=c99 -fno-asynchronous-unwind-tables -Werror -fdiscard-value-names -femit-all-decls -fmerge-all-constants -fno-use-cxa-atexit -fno-use-init-array -march=native -mtune=native -pedantic-errors genprime.c -lgmp -lpthread -s -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries
llvm-strip --strip-debug --strip-sections --strip-unneeded -T --remove-section=.note.gnu.gold-version --remove-section=.note --strip-all --discard-locals --remove-section=.gnu.version --remove-section=.eh_frame --remove-section=.note.gnu.build-id --remove-section=.note.ABI-tag --strip-symbol=__gmon_start__ --strip-all-gnu --remove-section=.comment --remove-section=.eh_frame_ptr --discard-all genprime
rm -f default.profraw genprime.profdata
}
main "$@"
/*
* Generate prime numbers p in set K_n
*
* Set K_n is a special subset of numbers P where every number p in set K_n
* meets:
*
* - p is prime
* - 2p+1 is prime
* - (p-1)/2 is prime
* - p is n bits
* - p transitions from 0 to 1 and 1 to 0 n/2 times (aka. gray_code - 1)
* - p has n/2 bits set
*
* (c) 2024-2025 Arija A. <[email protected]> is licensed under CC BY-SA 4.0
*
* Read more about CC BY-SA 4.0 here:
* https://creativecommons.org/licenses/by-sa/4.0/
*
* Configuration macros:
*
* - USE_MANUAL_SMALLPRIMES -- Use manual small prime checking
* - USE_OPENSSL_RAND -- Use OpenSSL RAND() over getrandom()
*
* Usage: ./genprime bits(int) threads(int) perfect(y|n)
* Last argument decides whether the prime is perfect: EXACTLY half transitions.
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <gmp.h>
#include <time.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#if defined(_WIN32)
#include <windows.h>
#elif defined(__unix__) || defined(__APPLE__)
#include <sys/time.h>
#endif
/* Randomness support */
#if defined(USE_OPENSSL_RAND)
#define HAS_OPENSSL 1
#else
/* If Linux, getrandom() is available */
#if defined(__linux__)
#define HAS_OPENSSL 0
#else
/* Other platforms: fall back to OpenSSL */
#define HAS_OPENSSL 1
#endif
#endif
#if HAS_OPENSSL
#include <openssl/rand.h>
#else
#include <sys/random.h>
#include <unistd.h>
#endif
typedef struct {
mpz_t result;
uint16_t n;
volatile bool found;
pthread_mutex_t *lock;
} ThreadedPrime;
typedef struct {
ThreadedPrime *shared;
uint16_t thread_index;
uint64_t *tries_slots;
bool perfect;
} ThreadArg;
#if defined(USE_MANUAL_SMALLPRIMES) || !defined(__GNU_MP_VERSION) || \
(__GNU_MP_VERSION < 6) || \
(__GNU_MP_VERSION == 6 && __GNU_MP_VERSION_MINOR < 2)
/* OLD GMP or version macros missing -> do manual small primes */
#define USE_MANUAL_SMALLPRIMES 1
#else
/* GMP >= 6.2 -> it already does small primes & Lucas/BPSW checks */
#define USE_MANUAL_SMALLPRIMES 0
#endif
#if defined(__GNUC__) || defined(__clang__)
#define HAS_BUILTIN_POPCOUNTLL 1
#else
#define HAS_BUILTIN_POPCOUNTLL 0
#endif
static inline double get_wall_time(void) {
#if defined(_WIN32)
LARGE_INTEGER freq, counter;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&counter);
return (double)counter.QuadPart / (double)freq.QuadPart;
#elif defined(__unix__) || defined(__APPLE__)
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + tv.tv_usec * 1e-6;
#else
return (double)clock() / CLOCKS_PER_SEC; /* fallback */
#endif
}
/* Lightweight wrapper for GMP primality test mapping to bool */
static inline bool is_prime(const mpz_t n, uint16_t reps) {
#if USE_MANUAL_SMALLPRIMES
uint8_t idx;
/* Small primes for cheap trial division */
static const uint16_t small_primes[] = {
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107,
109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181,
191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263,
269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433,
439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521,
523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613,
617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701,
709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809,
811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887,
907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997};
static const uint8_t small_primes_count =
sizeof(small_primes) / sizeof(small_primes[0]);
for (idx = 0; idx < small_primes_count; ++idx) {
const uint16_t pr = small_primes[idx];
/* if p equals a small prime, treat as passing */
if (mpz_cmp_ui(n, pr) == 0) {
return true;
}
/* if divisible by pr, composite */
if (mpz_divisible_ui_p(n, pr)) {
return false;
}
}
#endif
return mpz_probab_prime_p(n, (int)reps) != 0;
}
/* Count set bits */
static inline uint16_t count_set_bits(const mpz_t n) {
return (uint16_t)mpz_popcount(n);
}
/* Count transitions 0->1 or 1->0 (using XOR trick) */
static inline uint16_t transitions(const mpz_t n, mpz_t tmp) {
mpz_fdiv_q_2exp(tmp, n, 1U); /* tmp = n >> 1 */
mpz_xor(tmp, n, tmp); /* tmp = n ^ tmp = n ^ (n >> 1) */
/* transitions = gray_code - 1 */
/* NOTE: For cryptographic primes the gray code is never 0 so we can assume
* no underflow. */
return (uint16_t)(mpz_popcount(tmp) - 1);
}
/* Helpers for thread worker */
static inline bool init_gmp_state_from_getrandom(gmp_randstate_t *gmp_state,
size_t seed_bytes) {
mpz_t seed;
uint8_t *seedbuf;
size_t total = 0;
mpz_init(seed);
seedbuf = malloc(seed_bytes);
if (!seedbuf) {
return false;
}
while (total < seed_bytes) {
#if HAS_OPENSSL
const int ret = RAND_bytes(seedbuf + total, (int)(seed_bytes - total));
#else
const ssize_t ret = getrandom(seedbuf + total, seed_bytes - total, 0);
#endif
if (ret < 0) {
if (errno == EINTR) {
continue;
}
free(seedbuf);
mpz_clear(seed);
return false;
}
total += (size_t)ret;
}
mpz_import(seed, seed_bytes, 1, sizeof(seedbuf[0]), 0, 0, seedbuf);
gmp_randinit_default(*gmp_state);
gmp_randseed(*gmp_state, seed);
free(seedbuf);
mpz_clear(seed);
return true;
}
/* Generate an n-bit candidate for prime */
static inline void generate_nbit_candidate(mpz_t p,
mpz_t tmp,
gmp_randstate_t gmp_state,
uint16_t nm1,
uint16_t halfbits,
bool perfect) {
do {
mpz_urandomb(p, gmp_state, nm1);
mpz_setbit(p, nm1); /* Ensure p is n bits long */
mpz_setbit(p, 0); /* Ensure p is odd */
/* Adjust p % 12 == 11 */
const unsigned long rem = mpz_fdiv_ui(p, 12UL);
if (rem != 11UL) {
mpz_add_ui(p, p, 11UL - rem);
}
} while (count_set_bits(p) != halfbits ||
(perfect ? (transitions(p, tmp) != halfbits)
: (transitions(p, tmp) < halfbits)));
}
/*
* Quick checks on p
* Compute exponent = (p-1)/2 and MR on exponent and 2p+1.
* Returns true on pass; exponent and tmp are filled for reuse.
*/
static inline bool quick_p_and_compute_exponent(mpz_t p,
mpz_t tmp,
mpz_t exponent,
uint16_t weak_reps) {
if (!is_prime(p, weak_reps)) {
return false;
}
/* Compute exponent = (p-1)/2 */
mpz_sub_ui(tmp, p, 1U); /* tmp = p - 1 */
mpz_fdiv_q_2exp(exponent, tmp, 1U); /* exponent = tmp >> 1 */
if (!is_prime(exponent, weak_reps)) {
return false;
}
/* Compute 2p + 1 */
mpz_mul_2exp(tmp, p, 1); /* tmp = p << 1 */
mpz_add_ui(tmp, tmp, 1); /* tmp = 2p + 1 */
if (!is_prime(tmp, weak_reps)) {
return false;
}
return true;
}
/* Mid-level checks reusing precomputed exponent and tmp */
static inline bool
mid_level_checks(mpz_t p, mpz_t exponent, mpz_t tmp, uint16_t mid_reps) {
if (!is_prime(p, mid_reps)) {
return false;
}
if (!is_prime(exponent, mid_reps)) {
return false;
}
if (!is_prime(tmp, mid_reps)) { /* reuse 2p+1 */
return false;
}
return true;
}
/* Strong-level checks reusing precomputed exponent and tmp */
static inline bool
strong_level_checks(mpz_t p, mpz_t exponent, mpz_t tmp, uint16_t strong_reps) {
if (!is_prime(p, strong_reps)) {
return false;
}
if (!is_prime(exponent, strong_reps)) {
return false;
}
if (!is_prime(tmp, strong_reps)) { /* reuse 2p+1 */
return false;
}
return true;
}
/* Thread worker */
static void *thread_generate_large_prime(void *arg) {
ThreadArg *targ = (ThreadArg *)arg;
ThreadedPrime *shared = targ->shared;
const uint16_t n = shared->n;
const uint16_t nm1 = n - 1;
const size_t bytes = (size_t)((n + 7ULL) / 8ULL);
const uint16_t halfbits = (uint16_t)(n / 2UL);
const uint16_t idx = targ->thread_index;
const bool perfect = targ->perfect;
/* Seed GMP RNG */
gmp_randstate_t gmp_state;
if (!init_gmp_state_from_getrandom(&gmp_state, bytes)) {
fprintf(stderr,
"Thread %u: failed to initialize GMP RNG from getrandom\n",
(unsigned)idx);
targ->tries_slots[idx] = 0;
return NULL;
}
/* Compute */
mpz_t p;
mpz_t tmp;
mpz_t exponent;
mpz_init(p);
mpz_init(tmp);
mpz_init(exponent);
uint64_t local_tries = 0;
const uint16_t weak_reps = 2U;
const uint16_t mid_reps =
(n >= 32U) ? (uint16_t)(((n / 8U) < 16U) ? (n / 8U) : 16U) : 4U;
const uint16_t strong_reps = (uint16_t)(n / 4U);
while (!shared->found) {
/* Get random number */
generate_nbit_candidate(p, tmp, gmp_state, nm1, halfbits, perfect);
/* Try count */
++local_tries;
/* Very quick prime filter + compute exponent + weak checks on exponent
* and 2p+1 */
if (!quick_p_and_compute_exponent(p, tmp, exponent, weak_reps)) {
continue;
}
/* Mid-level checks */
if (!mid_level_checks(p, exponent, tmp, mid_reps)) {
continue;
}
/* Final, very-sure checks */
if (!strong_level_checks(p, exponent, tmp, strong_reps)) {
continue;
}
/* JACKPOT! */
pthread_mutex_lock(shared->lock);
if (!shared->found) {
mpz_set(shared->result, p);
shared->found = true;
targ->tries_slots[idx] = local_tries;
local_tries = 0;
}
pthread_mutex_unlock(shared->lock);
}
/* Store remaining local tries for the main thread to sum */
targ->tries_slots[idx] = local_tries;
mpz_clear(p);
mpz_clear(tmp);
mpz_clear(exponent);
gmp_randclear(gmp_state);
return NULL;
}
int main(const int argc, const char *const *argv) {
uint16_t n;
uint16_t idx;
uint16_t num_threads;
bool perfect;
if (argc != 3 && argc != 4) {
fprintf(stderr, "Usage: %s <bits> <threads> <perfect? y|=n>\n",
argv[0]);
return 1;
}
n = (uint16_t)atoi(argv[1]);
num_threads = (uint16_t)atoi(argv[2]);
perfect = (argc == 4) && (argv[3][0] != 'n');
if (n < 8) {
fputs("Expected number of bits to be at least 8.\n", stderr);
return 1;
}
if (num_threads < 1) {
fputs("At least 1 thread is required\n", stderr);
return 1;
}
pthread_t *threads = (pthread_t *)malloc(sizeof(*threads) * num_threads);
if (!threads) {
perror("malloc");
return 1;
}
ThreadArg *targs = (ThreadArg *)malloc(sizeof(*targs) * num_threads);
if (!targs) {
perror("malloc");
free(threads);
return 1;
}
uint64_t *tries_slots = calloc((size_t)num_threads, sizeof(*tries_slots));
if (!tries_slots) {
perror("calloc");
free(threads);
free(targs);
return 1;
}
ThreadedPrime tp;
mpz_init(tp.result);
tp.n = n;
tp.found = false;
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
tp.lock = &lock;
const double start_time = get_wall_time();
/* create worker threads */
for (idx = 0; idx < num_threads; ++idx) {
targs[idx].shared = &tp;
targs[idx].thread_index = idx;
targs[idx].tries_slots = tries_slots;
targs[idx].perfect = perfect;
int ret = pthread_create(&threads[idx], NULL,
thread_generate_large_prime, &targs[idx]);
if (ret != 0) {
fprintf(stderr, "pthread_create failed: %s\n", strerror(ret));
while (idx-- > 0) {
pthread_join(threads[idx], NULL);
}
free(threads);
free(targs);
free(tries_slots);
mpz_clear(tp.result);
return 1;
}
}
/* join worker threads */
for (idx = 0; idx < num_threads; ++idx) {
pthread_join(threads[idx], NULL);
}
/* sum tries from per-thread slots */
uint64_t total_tries = 0;
for (idx = 0; idx < num_threads; ++idx) {
total_tries += tries_slots[idx];
}
if (tp.found) {
gmp_printf("%Zd\n", tp.result);
} else {
puts("0");
}
printf("[STATS] Took %ju tries to find this prime.\n",
(uintmax_t)total_tries);
{
const double end_time = get_wall_time();
const double elapsed = end_time - start_time;
const uintmax_t hours = (uintmax_t)(elapsed / 3600.0);
const uintmax_t minutes =
(uintmax_t)((elapsed - hours * 3600.0) / 60.0);
const uintmax_t secs =
(uintmax_t)(elapsed - hours * 3600.0 - minutes * 60.0);
const uintmax_t frac =
(uintmax_t)((elapsed - (uintmax_t)elapsed) * 100000.0);
printf("[STATS] Took %02ju:%02ju:%02ju.%05ju to compute this prime\n",
hours, minutes, secs, frac);
}
mpz_clear(tp.result);
pthread_mutex_destroy(&lock);
free(threads);
free(targs);
free(tries_slots);
return 0;
}
@ar1ja
Copy link
Author

ar1ja commented Dec 7, 2025

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment