Created
December 7, 2025 15:43
-
-
Save ar1ja/e60599fc52185043cf411e51bae84c86 to your computer and use it in GitHub Desktop.
Safe and Sophie Germain prime number generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| set -eux | |
| main() { | |
| rm -f default.profraw genprime.profdata | |
| clang -o genprime -fprofile-instr-generate -Wpedantic -flto=full -fno-trapping-math -fno-math-errno -fno-stack-check -fno-strict-overflow -funroll-loops -fno-stack-protector -fvisibility-inlines-hidden -mfancy-math-387 -fomit-frame-pointer -fstrict-overflow -Wshadow -fno-exceptions -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -Wall -Wextra -fno-signed-zeros -fno-strict-aliasing -pedantic -O3 -fvisibility=hidden -ffast-math -funsafe-math-optimizations -std=c99 -fno-asynchronous-unwind-tables -Werror -fdiscard-value-names -femit-all-decls -fmerge-all-constants -fno-use-cxa-atexit -fno-use-init-array -march=native -mtune=native -pedantic-errors genprime.c -lgmp -lpthread -s -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries | |
| llvm-strip --strip-debug --strip-sections --strip-unneeded -T --remove-section=.note.gnu.gold-version --remove-section=.note --strip-all --discard-locals --remove-section=.gnu.version --remove-section=.eh_frame --remove-section=.note.gnu.build-id --remove-section=.note.ABI-tag --strip-symbol=__gmon_start__ --strip-all-gnu --remove-section=.comment --remove-section=.eh_frame_ptr --discard-all genprime | |
| echo 'Generating optimisation profile. DO NOT interrupt! This could take anywhere between 1 and 10 minutes depending on your CPU. Or you can edit the ./genprime parameters in this script (such as 128)' | |
| ./genprime 512 "$(nproc)" | |
| llvm-profdata merge -o genprime.profdata default.profraw | |
| clang -o genprime -fprofile-instr-use=genprime.profdata -Wpedantic -flto=full -fno-trapping-math -fno-math-errno -fno-stack-check -fno-strict-overflow -funroll-loops -fno-stack-protector -fvisibility-inlines-hidden -mfancy-math-387 -fomit-frame-pointer -fstrict-overflow -Wshadow -fno-exceptions -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -Wall -Wextra -fno-signed-zeros -fno-strict-aliasing -pedantic -O3 -fvisibility=hidden -ffast-math -funsafe-math-optimizations -std=c99 -fno-asynchronous-unwind-tables -Werror -fdiscard-value-names -femit-all-decls -fmerge-all-constants -fno-use-cxa-atexit -fno-use-init-array -march=native -mtune=native -pedantic-errors genprime.c -lgmp -lpthread -s -Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wl,--as-needed -Wl,--no-copy-dt-needed-entries | |
| llvm-strip --strip-debug --strip-sections --strip-unneeded -T --remove-section=.note.gnu.gold-version --remove-section=.note --strip-all --discard-locals --remove-section=.gnu.version --remove-section=.eh_frame --remove-section=.note.gnu.build-id --remove-section=.note.ABI-tag --strip-symbol=__gmon_start__ --strip-all-gnu --remove-section=.comment --remove-section=.eh_frame_ptr --discard-all genprime | |
| rm -f default.profraw genprime.profdata | |
| } | |
| main "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Generate prime numbers p in set K_n | |
| * | |
| * Set K_n is a special subset of numbers P where every number p in set K_n | |
| * meets: | |
| * | |
| * - p is prime | |
| * - 2p+1 is prime | |
| * - (p-1)/2 is prime | |
| * - p is n bits | |
| * - p transitions from 0 to 1 and 1 to 0 n/2 times (aka. gray_code - 1) | |
| * - p has n/2 bits set | |
| * | |
| * (c) 2024-2025 Arija A. <[email protected]> is licensed under CC BY-SA 4.0 | |
| * | |
| * Read more about CC BY-SA 4.0 here: | |
| * https://creativecommons.org/licenses/by-sa/4.0/ | |
| * | |
| * Configuration macros: | |
| * | |
| * - USE_MANUAL_SMALLPRIMES -- Use manual small prime checking | |
| * - USE_OPENSSL_RAND -- Use OpenSSL RAND() over getrandom() | |
| * | |
| * Usage: ./genprime bits(int) threads(int) perfect(y|n) | |
| * Last argument decides whether the prime is perfect: EXACTLY half transitions. | |
| */ | |
| #include <stdbool.h> | |
| #include <stdint.h> | |
| #include <stdlib.h> | |
| #include <unistd.h> | |
| #include <stdio.h> | |
| #include <gmp.h> | |
| #include <time.h> | |
| #include <errno.h> | |
| #include <string.h> | |
| #include <unistd.h> | |
| #include <pthread.h> | |
| #if defined(_WIN32) | |
| #include <windows.h> | |
| #elif defined(__unix__) || defined(__APPLE__) | |
| #include <sys/time.h> | |
| #endif | |
| /* Randomness support */ | |
| #if defined(USE_OPENSSL_RAND) | |
| #define HAS_OPENSSL 1 | |
| #else | |
| /* If Linux, getrandom() is available */ | |
| #if defined(__linux__) | |
| #define HAS_OPENSSL 0 | |
| #else | |
| /* Other platforms: fall back to OpenSSL */ | |
| #define HAS_OPENSSL 1 | |
| #endif | |
| #endif | |
| #if HAS_OPENSSL | |
| #include <openssl/rand.h> | |
| #else | |
| #include <sys/random.h> | |
| #include <unistd.h> | |
| #endif | |
| typedef struct { | |
| mpz_t result; | |
| uint16_t n; | |
| volatile bool found; | |
| pthread_mutex_t *lock; | |
| } ThreadedPrime; | |
| typedef struct { | |
| ThreadedPrime *shared; | |
| uint16_t thread_index; | |
| uint64_t *tries_slots; | |
| bool perfect; | |
| } ThreadArg; | |
| #if defined(USE_MANUAL_SMALLPRIMES) || !defined(__GNU_MP_VERSION) || \ | |
| (__GNU_MP_VERSION < 6) || \ | |
| (__GNU_MP_VERSION == 6 && __GNU_MP_VERSION_MINOR < 2) | |
| /* OLD GMP or version macros missing -> do manual small primes */ | |
| #define USE_MANUAL_SMALLPRIMES 1 | |
| #else | |
| /* GMP >= 6.2 -> it already does small primes & Lucas/BPSW checks */ | |
| #define USE_MANUAL_SMALLPRIMES 0 | |
| #endif | |
| #if defined(__GNUC__) || defined(__clang__) | |
| #define HAS_BUILTIN_POPCOUNTLL 1 | |
| #else | |
| #define HAS_BUILTIN_POPCOUNTLL 0 | |
| #endif | |
| static inline double get_wall_time(void) { | |
| #if defined(_WIN32) | |
| LARGE_INTEGER freq, counter; | |
| QueryPerformanceFrequency(&freq); | |
| QueryPerformanceCounter(&counter); | |
| return (double)counter.QuadPart / (double)freq.QuadPart; | |
| #elif defined(__unix__) || defined(__APPLE__) | |
| struct timeval tv; | |
| gettimeofday(&tv, NULL); | |
| return tv.tv_sec + tv.tv_usec * 1e-6; | |
| #else | |
| return (double)clock() / CLOCKS_PER_SEC; /* fallback */ | |
| #endif | |
| } | |
| /* Lightweight wrapper for GMP primality test mapping to bool */ | |
| static inline bool is_prime(const mpz_t n, uint16_t reps) { | |
| #if USE_MANUAL_SMALLPRIMES | |
| uint8_t idx; | |
| /* Small primes for cheap trial division */ | |
| static const uint16_t small_primes[] = { | |
| 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, | |
| 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, | |
| 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, | |
| 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, | |
| 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, | |
| 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, | |
| 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, | |
| 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, | |
| 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, | |
| 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, | |
| 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, | |
| 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997}; | |
| static const uint8_t small_primes_count = | |
| sizeof(small_primes) / sizeof(small_primes[0]); | |
| for (idx = 0; idx < small_primes_count; ++idx) { | |
| const uint16_t pr = small_primes[idx]; | |
| /* if p equals a small prime, treat as passing */ | |
| if (mpz_cmp_ui(n, pr) == 0) { | |
| return true; | |
| } | |
| /* if divisible by pr, composite */ | |
| if (mpz_divisible_ui_p(n, pr)) { | |
| return false; | |
| } | |
| } | |
| #endif | |
| return mpz_probab_prime_p(n, (int)reps) != 0; | |
| } | |
| /* Count set bits */ | |
| static inline uint16_t count_set_bits(const mpz_t n) { | |
| return (uint16_t)mpz_popcount(n); | |
| } | |
| /* Count transitions 0->1 or 1->0 (using XOR trick) */ | |
| static inline uint16_t transitions(const mpz_t n, mpz_t tmp) { | |
| mpz_fdiv_q_2exp(tmp, n, 1U); /* tmp = n >> 1 */ | |
| mpz_xor(tmp, n, tmp); /* tmp = n ^ tmp = n ^ (n >> 1) */ | |
| /* transitions = gray_code - 1 */ | |
| /* NOTE: For cryptographic primes the gray code is never 0 so we can assume | |
| * no underflow. */ | |
| return (uint16_t)(mpz_popcount(tmp) - 1); | |
| } | |
| /* Helpers for thread worker */ | |
| static inline bool init_gmp_state_from_getrandom(gmp_randstate_t *gmp_state, | |
| size_t seed_bytes) { | |
| mpz_t seed; | |
| uint8_t *seedbuf; | |
| size_t total = 0; | |
| mpz_init(seed); | |
| seedbuf = malloc(seed_bytes); | |
| if (!seedbuf) { | |
| return false; | |
| } | |
| while (total < seed_bytes) { | |
| #if HAS_OPENSSL | |
| const int ret = RAND_bytes(seedbuf + total, (int)(seed_bytes - total)); | |
| #else | |
| const ssize_t ret = getrandom(seedbuf + total, seed_bytes - total, 0); | |
| #endif | |
| if (ret < 0) { | |
| if (errno == EINTR) { | |
| continue; | |
| } | |
| free(seedbuf); | |
| mpz_clear(seed); | |
| return false; | |
| } | |
| total += (size_t)ret; | |
| } | |
| mpz_import(seed, seed_bytes, 1, sizeof(seedbuf[0]), 0, 0, seedbuf); | |
| gmp_randinit_default(*gmp_state); | |
| gmp_randseed(*gmp_state, seed); | |
| free(seedbuf); | |
| mpz_clear(seed); | |
| return true; | |
| } | |
| /* Generate an n-bit candidate for prime */ | |
| static inline void generate_nbit_candidate(mpz_t p, | |
| mpz_t tmp, | |
| gmp_randstate_t gmp_state, | |
| uint16_t nm1, | |
| uint16_t halfbits, | |
| bool perfect) { | |
| do { | |
| mpz_urandomb(p, gmp_state, nm1); | |
| mpz_setbit(p, nm1); /* Ensure p is n bits long */ | |
| mpz_setbit(p, 0); /* Ensure p is odd */ | |
| /* Adjust p % 12 == 11 */ | |
| const unsigned long rem = mpz_fdiv_ui(p, 12UL); | |
| if (rem != 11UL) { | |
| mpz_add_ui(p, p, 11UL - rem); | |
| } | |
| } while (count_set_bits(p) != halfbits || | |
| (perfect ? (transitions(p, tmp) != halfbits) | |
| : (transitions(p, tmp) < halfbits))); | |
| } | |
| /* | |
| * Quick checks on p | |
| * Compute exponent = (p-1)/2 and MR on exponent and 2p+1. | |
| * Returns true on pass; exponent and tmp are filled for reuse. | |
| */ | |
| static inline bool quick_p_and_compute_exponent(mpz_t p, | |
| mpz_t tmp, | |
| mpz_t exponent, | |
| uint16_t weak_reps) { | |
| if (!is_prime(p, weak_reps)) { | |
| return false; | |
| } | |
| /* Compute exponent = (p-1)/2 */ | |
| mpz_sub_ui(tmp, p, 1U); /* tmp = p - 1 */ | |
| mpz_fdiv_q_2exp(exponent, tmp, 1U); /* exponent = tmp >> 1 */ | |
| if (!is_prime(exponent, weak_reps)) { | |
| return false; | |
| } | |
| /* Compute 2p + 1 */ | |
| mpz_mul_2exp(tmp, p, 1); /* tmp = p << 1 */ | |
| mpz_add_ui(tmp, tmp, 1); /* tmp = 2p + 1 */ | |
| if (!is_prime(tmp, weak_reps)) { | |
| return false; | |
| } | |
| return true; | |
| } | |
| /* Mid-level checks reusing precomputed exponent and tmp */ | |
| static inline bool | |
| mid_level_checks(mpz_t p, mpz_t exponent, mpz_t tmp, uint16_t mid_reps) { | |
| if (!is_prime(p, mid_reps)) { | |
| return false; | |
| } | |
| if (!is_prime(exponent, mid_reps)) { | |
| return false; | |
| } | |
| if (!is_prime(tmp, mid_reps)) { /* reuse 2p+1 */ | |
| return false; | |
| } | |
| return true; | |
| } | |
| /* Strong-level checks reusing precomputed exponent and tmp */ | |
| static inline bool | |
| strong_level_checks(mpz_t p, mpz_t exponent, mpz_t tmp, uint16_t strong_reps) { | |
| if (!is_prime(p, strong_reps)) { | |
| return false; | |
| } | |
| if (!is_prime(exponent, strong_reps)) { | |
| return false; | |
| } | |
| if (!is_prime(tmp, strong_reps)) { /* reuse 2p+1 */ | |
| return false; | |
| } | |
| return true; | |
| } | |
| /* Thread worker */ | |
| static void *thread_generate_large_prime(void *arg) { | |
| ThreadArg *targ = (ThreadArg *)arg; | |
| ThreadedPrime *shared = targ->shared; | |
| const uint16_t n = shared->n; | |
| const uint16_t nm1 = n - 1; | |
| const size_t bytes = (size_t)((n + 7ULL) / 8ULL); | |
| const uint16_t halfbits = (uint16_t)(n / 2UL); | |
| const uint16_t idx = targ->thread_index; | |
| const bool perfect = targ->perfect; | |
| /* Seed GMP RNG */ | |
| gmp_randstate_t gmp_state; | |
| if (!init_gmp_state_from_getrandom(&gmp_state, bytes)) { | |
| fprintf(stderr, | |
| "Thread %u: failed to initialize GMP RNG from getrandom\n", | |
| (unsigned)idx); | |
| targ->tries_slots[idx] = 0; | |
| return NULL; | |
| } | |
| /* Compute */ | |
| mpz_t p; | |
| mpz_t tmp; | |
| mpz_t exponent; | |
| mpz_init(p); | |
| mpz_init(tmp); | |
| mpz_init(exponent); | |
| uint64_t local_tries = 0; | |
| const uint16_t weak_reps = 2U; | |
| const uint16_t mid_reps = | |
| (n >= 32U) ? (uint16_t)(((n / 8U) < 16U) ? (n / 8U) : 16U) : 4U; | |
| const uint16_t strong_reps = (uint16_t)(n / 4U); | |
| while (!shared->found) { | |
| /* Get random number */ | |
| generate_nbit_candidate(p, tmp, gmp_state, nm1, halfbits, perfect); | |
| /* Try count */ | |
| ++local_tries; | |
| /* Very quick prime filter + compute exponent + weak checks on exponent | |
| * and 2p+1 */ | |
| if (!quick_p_and_compute_exponent(p, tmp, exponent, weak_reps)) { | |
| continue; | |
| } | |
| /* Mid-level checks */ | |
| if (!mid_level_checks(p, exponent, tmp, mid_reps)) { | |
| continue; | |
| } | |
| /* Final, very-sure checks */ | |
| if (!strong_level_checks(p, exponent, tmp, strong_reps)) { | |
| continue; | |
| } | |
| /* JACKPOT! */ | |
| pthread_mutex_lock(shared->lock); | |
| if (!shared->found) { | |
| mpz_set(shared->result, p); | |
| shared->found = true; | |
| targ->tries_slots[idx] = local_tries; | |
| local_tries = 0; | |
| } | |
| pthread_mutex_unlock(shared->lock); | |
| } | |
| /* Store remaining local tries for the main thread to sum */ | |
| targ->tries_slots[idx] = local_tries; | |
| mpz_clear(p); | |
| mpz_clear(tmp); | |
| mpz_clear(exponent); | |
| gmp_randclear(gmp_state); | |
| return NULL; | |
| } | |
| int main(const int argc, const char *const *argv) { | |
| uint16_t n; | |
| uint16_t idx; | |
| uint16_t num_threads; | |
| bool perfect; | |
| if (argc != 3 && argc != 4) { | |
| fprintf(stderr, "Usage: %s <bits> <threads> <perfect? y|=n>\n", | |
| argv[0]); | |
| return 1; | |
| } | |
| n = (uint16_t)atoi(argv[1]); | |
| num_threads = (uint16_t)atoi(argv[2]); | |
| perfect = (argc == 4) && (argv[3][0] != 'n'); | |
| if (n < 8) { | |
| fputs("Expected number of bits to be at least 8.\n", stderr); | |
| return 1; | |
| } | |
| if (num_threads < 1) { | |
| fputs("At least 1 thread is required\n", stderr); | |
| return 1; | |
| } | |
| pthread_t *threads = (pthread_t *)malloc(sizeof(*threads) * num_threads); | |
| if (!threads) { | |
| perror("malloc"); | |
| return 1; | |
| } | |
| ThreadArg *targs = (ThreadArg *)malloc(sizeof(*targs) * num_threads); | |
| if (!targs) { | |
| perror("malloc"); | |
| free(threads); | |
| return 1; | |
| } | |
| uint64_t *tries_slots = calloc((size_t)num_threads, sizeof(*tries_slots)); | |
| if (!tries_slots) { | |
| perror("calloc"); | |
| free(threads); | |
| free(targs); | |
| return 1; | |
| } | |
| ThreadedPrime tp; | |
| mpz_init(tp.result); | |
| tp.n = n; | |
| tp.found = false; | |
| pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; | |
| tp.lock = &lock; | |
| const double start_time = get_wall_time(); | |
| /* create worker threads */ | |
| for (idx = 0; idx < num_threads; ++idx) { | |
| targs[idx].shared = &tp; | |
| targs[idx].thread_index = idx; | |
| targs[idx].tries_slots = tries_slots; | |
| targs[idx].perfect = perfect; | |
| int ret = pthread_create(&threads[idx], NULL, | |
| thread_generate_large_prime, &targs[idx]); | |
| if (ret != 0) { | |
| fprintf(stderr, "pthread_create failed: %s\n", strerror(ret)); | |
| while (idx-- > 0) { | |
| pthread_join(threads[idx], NULL); | |
| } | |
| free(threads); | |
| free(targs); | |
| free(tries_slots); | |
| mpz_clear(tp.result); | |
| return 1; | |
| } | |
| } | |
| /* join worker threads */ | |
| for (idx = 0; idx < num_threads; ++idx) { | |
| pthread_join(threads[idx], NULL); | |
| } | |
| /* sum tries from per-thread slots */ | |
| uint64_t total_tries = 0; | |
| for (idx = 0; idx < num_threads; ++idx) { | |
| total_tries += tries_slots[idx]; | |
| } | |
| if (tp.found) { | |
| gmp_printf("%Zd\n", tp.result); | |
| } else { | |
| puts("0"); | |
| } | |
| printf("[STATS] Took %ju tries to find this prime.\n", | |
| (uintmax_t)total_tries); | |
| { | |
| const double end_time = get_wall_time(); | |
| const double elapsed = end_time - start_time; | |
| const uintmax_t hours = (uintmax_t)(elapsed / 3600.0); | |
| const uintmax_t minutes = | |
| (uintmax_t)((elapsed - hours * 3600.0) / 60.0); | |
| const uintmax_t secs = | |
| (uintmax_t)(elapsed - hours * 3600.0 - minutes * 60.0); | |
| const uintmax_t frac = | |
| (uintmax_t)((elapsed - (uintmax_t)elapsed) * 100000.0); | |
| printf("[STATS] Took %02ju:%02ju:%02ju.%05ju to compute this prime\n", | |
| hours, minutes, secs, frac); | |
| } | |
| mpz_clear(tp.result); | |
| pthread_mutex_destroy(&lock); | |
| free(threads); | |
| free(targs); | |
| free(tries_slots); | |
| return 0; | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is from: