using Base.Threads
@show nthreads()
versioninfo()
nthreads() = 6 Julia Version 1.6.0-rc1 Commit a58bdd9010 (2021-02-06 15:49 UTC) Platform Info: OS: Windows (x86_64-w64-mingw32) CPU: AMD Ryzen 5 3500 6-Core Processor WORD_SIZE: 64 LIBM: libopenlibm LLVM: libLLVM-11.0.1 (ORCJIT, znver2)
# @threads を削除すれば、シングルスレッドで動作する
# 6スレッドでシングルスレッドの約2倍しか速くならない
function sieve!(x::AbstractVector{Bool})
#all(x) || fill!(x, true)
n = length(x)
x[1] = false
@inbounds for p in 2:isqrt(n)
x[p] || continue
@threads for i in p^2:p:n
x[i] && (x[i] = false)
end
end
end
# Vector{Bool}
function sieve(n::Integer)
x = ones(Bool, n)
sieve!(x)
x
end
# BitVector
function sieve_conflict(n::Integer)
x = trues(n)
sieve!(x)
x
end
sieve(100) |> findall
25-element Vector{Int64}: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97
# 100 以下の素数は 25 のはずだが、 BitVector を使った場合は増えることがある
[count(sieve_conflict(100)) for _ in 1:300] |> print
[25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 28, 25, 25, 25, 26, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 28, 28, 25, 25, 25, 25, 28, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 28, 28, 25, 25, 25, 25, 26, 25, 26, 34, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 33, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 30, 25, 25, 28, 25, 25, 25, 25, 25, 28, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 28, 25, 25, 25, 30, 25, 25, 25, 25, 25, 25, 25, 28, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 26, 25]
function bench(f, ks=5:9)
println(f)
for k in ks
stats = @timed f(10^k)
println("n=10^$k\t$(stats.time) sec\t$(stats.bytes) bytes")
end
println()
end
bench(sieve)
bench(sieve_conflict)
sieve n=10^5 0.0017951 sec 311264 bytes n=10^6 0.0017002 sec 1541424 bytes n=10^7 0.0077115 sec 11436848 bytes n=10^8 0.2752849 sec 103977552 bytes n=10^9 3.3042045 sec 1011067280 bytes sieve_conflict n=10^5 0.0005514 sec 222256 bytes n=10^6 0.0044288 sec 670928 bytes n=10^7 0.0102306 sec 2690832 bytes n=10^8 0.0811478 sec 16472688 bytes n=10^9 2.1571622 sec 136007824 bytes
@time sieve(10^10) |> count |> ==(455052511)
42.134723 seconds (309.41 k allocations: 9.342 GiB, 0.13% gc time)
true
# Atomic
function sieve_atomic(n::Integer)
x = [Atomic{Bool}(true) for _ in 1:n]
x[1][] = false
@inbounds for p in 2:isqrt(n)
x[p][] || continue
@threads for i in p^2:p:n
atomic_cas!(x[i], true, false)
end
end
getindex.(x)
end
# 素直に配列を使ったほうが速い
bench(sieve_atomic, 5:8)
sieve_atomic n=10^5 0.0169946 sec 3769689 bytes n=10^6 0.4866018 sec 24686448 bytes n=10^7 0.4409641 sec 242715632 bytes n=10^8 3.9859158 sec 2416508816 bytes