Replaced sieve with seg sieve (#166)

Chillee · web-flow · commit 924539612643 · 2020-06-25T21:51:34.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+*
+!*/
+!*.*
 /build/
 a.out
 header.tmp
diff --git a/content/number-theory/Eratosthenes.h b/content/number-theory/Eratosthenes.h
diff --git a/content/number-theory/FastEratosthenes.h b/content/number-theory/FastEratosthenes.h
@@ -0,0 +1,39 @@
+/**
+ * Author: Jakob Kogler, chilli, pajenegod
+ * Date: 2020-04-12
+ * License: CC0
+ * Description: Prime sieve for generating all primes smaller than LIM.
+ * Status: Stress-tested
+ * Time: LIM=1e9 $\approx$ 1.5s
+ * Details: Despite its n log log n complexity, segmented sieve is still faster
+ * than other options, including bitset sieves and linear sieves. This is
+ * primarily due to its low memory usage, which reduces cache misses. This
+ * implementation skips even numbers.
+ *
+ * Benchmark can be found here: https://ideone.com/e7TbX4
+ *
+ * The line `for (int i=idx; i<S+L; idx = (i += p))` is done on purpose for performance reasons.
+ * Se https://github.com/kth-competitive-programming/kactl/pull/166#discussion_r408354338
+ */
+#pragma once
+
+const int LIM = 1e6;
+bitset<LIM> isPrime;
+vi eratosthenes() {
+	const int S = round(sqrt(LIM)), R = LIM / 2;
+	vi pr = {2}, sieve(S+1); pr.reserve(int(LIM/log(LIM)*1.1));
+	vector<pii> cp;
+	for (int i = 3; i <= S; i += 2) if (!sieve[i]) {
+		cp.push_back({i, i * i / 2});
+		for (int j = i * i; j <= S; j += 2 * i) sieve[j] = 1;
+	}
+	for (int L = 1; L <= R; L += S) {
+		array<bool, S> block{};
+		for (auto &[p, idx] : cp)
+			for (int i=idx; i < S+L; idx = (i+=p)) block[i-L] = 1;
+		rep(i,0,min(S, R - L))
+			if (!block[i]) pr.push_back((L + i) * 2 + 1);
+	}
+	for (int i : pr) isPrime[i] = 1;
+	return pr;
+}
diff --git a/content/number-theory/chapter.tex b/content/number-theory/chapter.tex
@@ -10,7 +10,7 @@ \section{Modular arithmetic}
 	\kactlimport{ModSqrt.h}
 
 \section{Primality}
-	\kactlimport{eratosthenes.h}
+	\kactlimport{FastEratosthenes.h}
 	\kactlimport{MillerRabin.h}
 	\kactlimport{Factor.h}
 
diff --git a/stress-tests/number-theory/Eratosthenes.cpp b/stress-tests/number-theory/Eratosthenes.cpp
@@ -1,53 +1,38 @@
 #include "../utilities/template.h"
 
-struct prime_sieve {
-	typedef unsigned char uchar;
-	typedef unsigned int uint;
-	static const int pregen = 3*5*7*11*13;
-	uint n, sqrtn;
-	uchar *isprime;
-	int *prime, primes; // prime[i] is i:th prime
-
-	bool is_prime(int n) { // primality check
-		if(n%2==0 || n<=2) return n==2;
-		return isprime[(n-3)>>4] & 1 << ((n-3) >> 1&7);
+namespace dynamic {
+vi eratosthenes(int LIM) {
+	const int S = round(sqrt(LIM)), R = LIM / 2;
+	vi pr({2}), sieve(S + 1); pr.reserve(LIM / (int)log(LIM));
+	vector<array<int, 2>> cp;
+	for (int i = 3; i <= S; i += 2) if (!sieve[i]) {
+		cp.push_back({i, i * i / 2});
+		for (int j = i * i; j <= S; j += 2 * i) sieve[j] = 1;
 	}
-
-	prime_sieve(int _n) : n(_n), sqrtn((int)ceil(sqrt(1.0*n))) {
-		int n0 = max(n>>4, (uint)pregen) + 1;
-		prime = new int[max(2775, (int)(1.12*n/log(1.0*n)))];
-		prime[0]=2; prime[1]=3; prime[2]=5;
-		prime[3]=7; prime[4]=11; prime[5]=13;
-		primes=6;
-		isprime = new uchar[n0];
-		memset(isprime, 255, n0);
-
-		for(int j=1,p=prime[j];j<6;p=prime[++j])
-			for(int i=(p*p-3)>>4,s=(p*p-3)>>1&7;
-				i<=pregen; i+= (s+=p)>>3, s&=7)
-					isprime[i] &= (uchar)~(1<<s);
-		for(int d=pregen, b=pregen+1; b<n0; b+=d,d<<=1)
-			memcpy(isprime+b,isprime+1,(n0<b+d)?n0-b:d);
-		for(uint p=17,i=0,s=7; p<n; p+=2, i+= ++s>>3, s&=7)
-			if(isprime[i]&1<<s) {
-				prime[primes++] = p;
-				if(p<sqrtn) {
-					int ii=i, ss=s+(p-1)*p/2;
-					for(uint pp=p*p; pp<n; pp+=p<<1, ss+=p) {
-						ii += ss>>3;
-						ss &=7;
-						isprime[ii] &= (uchar)~(1<<ss);
-}	}		}	}	};
+	for (int L = 1; L <= R; L += S) {
+		vector<bool> block(S);
+		// array<bool, S> block{};
+		for (auto &[p, idx] : cp)
+			for (int i=idx; i < S+L; idx = (i+=p)) block[i-L] = 1;
+		rep(i,0,min(S, R - L))
+			if (!block[i]) pr.push_back((L + i) * 2 + 1);
+	}
+	return pr;
+}
+}
+#include "../../content/number-theory/FastEratosthenes.h"
+#include "../../content/number-theory/Eratosthenes.h"
 
 
-#include "../../content/number-theory/eratosthenes.h"
+int main() {
+	vi pr1 = eratosthenesSieve(LIM);
+	vi pr2 = eratosthenes();
+	assert(pr1 == pr2);
 
-int main(int argc, char** argv) {
-	ll s = 0, s2 = 0;
-	prime_sieve ps(MAX_PR);
-	rep(i,0,ps.primes) s += ps.prime[i];
-	vi r = eratosthenesSieve(MAX_PR);
-	for(auto &x: r) s2 += x;
-	assert(s==s2);
+	for (int lim=121; lim<1000; lim++) {
+		vi pr = eratosthenesSieve(lim);
+		vi r = dynamic::eratosthenes(lim);
+		assert(pr == r);
+	}
 	cout<<"Tests passed!"<<endl;
 }
diff --git a/stress-tests/number-theory/MillerRabin.cpp b/stress-tests/number-theory/MillerRabin.cpp
@@ -1,12 +1,14 @@
 #include "../utilities/template.h"
 
 #include "../../content/number-theory/MillerRabin.h"
-#include "../../content/number-theory/eratosthenes.h"
+namespace sieve {
+#include "../../content/number-theory/FastEratosthenes.h"
+}
 
 ull A[] = {2, 325, 9375, 28178, 450775, 9780504, 1795265022};
 int afactors[] = {2, 3, 5, 13, 19, 73, 193, 407521, 299210837};
 
-const ull LIM = 3ULL << 61;
+const ull MR_LIM = 1ULL << 62;
 
 // Accurate for arbitrary 64-bit numbers
 ull int128_mod_mul(ull a, ull b, ull m) { return (ull)((__uint128_t)a * b % m); }
@@ -47,11 +49,14 @@ void rec(ull div, ll num, int ind, int factors) {
 	}
 }
 
+const int MAXPR = 1e6;
 int main() {
-	eratosthenesSieve(MAX_PR);
+	auto prs = sieve::eratosthenes();
+	vector<bool> isprime(MAXPR);
+	for (auto i: prs) isprime[i] = true;
 	for(auto &a: A) rec(1, a, 0, 0);
 
-	rep(n,0,MAX_PR) {
+	rep(n,0,MAXPR) {
 		if (isPrime(n) != isprime[n]) {
 			cout << "fails for " << n << endl;
 			return 1;
@@ -62,7 +67,7 @@ int main() {
 	rep(i,0,1000000) {
 		n ^= (ull)rand();
 		n *= 1237618231ULL;
-		if (n < LIM && oldIsPrime(n) != isPrime(n)) {
+		if (n < MR_LIM && oldIsPrime(n) != isPrime(n)) {
 			cout << "differs from old for " << n << endl;
 			cout << "old says " << oldIsPrime(n) << endl;
 			cout << "new says " << isPrime(n) << endl;

-Original file line number
+Diff line change
@@ @@ -1,3 +1,6 @@ @@
 +*
 +!*/
 +!*.*
 /build/
 a.out
 header.tmp