In [1]:

using Base.Threads

@show nthreads()
versioninfo()

nthreads() = 6
Julia Version 1.6.0-rc1
Commit a58bdd9010 (2021-02-06 15:49 UTC)
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: AMD Ryzen 5 3500 6-Core Processor              
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-11.0.1 (ORCJIT, znver2)

In [2]:

# @threads を削除すれば、シングルスレッドで動作する
# ６スレッドでシングルスレッドの約２倍しか速くならない

function sieve!(x::AbstractVector{Bool})
    #all(x) || fill!(x, true)
    n = length(x)
    x[1] = false
    @inbounds for p in 2:isqrt(n)
        x[p] || continue
        @threads for i in p^2:p:n
            x[i] && (x[i] = false)
        end
    end
end

# Vector{Bool}
function sieve(n::Integer)
    x = ones(Bool, n)
    sieve!(x)
    x
end

# BitVector
function sieve_conflict(n::Integer)
    x = trues(n)
    sieve!(x)
    x
end

sieve(100) |> findall

Out[2]:

25-element Vector{Int64}:
  2
  3
  5
  7
 11
 13
 17
 19
 23
 29
 31
 37
 41
 43
 47
 53
 59
 61
 67
 71
 73
 79
 83
 89
 97

In [3]:

# 100 以下の素数は 25 のはずだが、 BitVector を使った場合は増えることがある
[count(sieve_conflict(100)) for _ in 1:300] |> print

[25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 28, 25, 25, 25, 26, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 28, 28, 25, 25, 25, 25, 28, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 28, 28, 25, 25, 25, 25, 26, 25, 26, 34, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 33, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 30, 25, 25, 28, 25, 25, 25, 25, 25, 28, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 28, 25, 25, 25, 30, 25, 25, 25, 25, 25, 25, 25, 28, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 28, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 28, 25, 25, 25, 26, 25]

In [4]:

function bench(f, ks=5:9)
    println(f)
    for k in ks
        stats = @timed f(10^k)
        println("n=10^$k\t$(stats.time) sec\t$(stats.bytes) bytes")
    end
    println()
end

bench(sieve)
bench(sieve_conflict)

sieve
n=10^5	0.0017951 sec	311264 bytes
n=10^6	0.0017002 sec	1541424 bytes
n=10^7	0.0077115 sec	11436848 bytes
n=10^8	0.2752849 sec	103977552 bytes
n=10^9	3.3042045 sec	1011067280 bytes

sieve_conflict
n=10^5	0.0005514 sec	222256 bytes
n=10^6	0.0044288 sec	670928 bytes
n=10^7	0.0102306 sec	2690832 bytes
n=10^8	0.0811478 sec	16472688 bytes
n=10^9	2.1571622 sec	136007824 bytes

In [5]:

@time sieve(10^10) |> count |> ==(455052511)

 42.134723 seconds (309.41 k allocations: 9.342 GiB, 0.13% gc time)

Out[5]:

true

In [6]:

# Atomic
function sieve_atomic(n::Integer)
    x = [Atomic{Bool}(true) for _ in 1:n]
    x[1][] = false
    @inbounds for p in 2:isqrt(n)
        x[p][] || continue
        @threads for i in p^2:p:n
            atomic_cas!(x[i], true, false)
        end
    end
    getindex.(x)
end

# 素直に配列を使ったほうが速い
bench(sieve_atomic, 5:8)

sieve_atomic
n=10^5	0.0169946 sec	3769689 bytes
n=10^6	0.4866018 sec	24686448 bytes
n=10^7	0.4409641 sec	242715632 bytes
n=10^8	3.9859158 sec	2416508816 bytes

In [ ]: