using BenchmarkTools, Base.Test, PyPlot function E₁_slow{T<:AbstractFloat}(z::Union{T,Complex{T}}) real(z) < 0 && error("real(z) < 0 not implemented") return quadgk(u -> exp(-z/u)/u, 0, 1, reltol=eps(T)*10)[1] end E₁_slow{T<:Real}(z::Union{T,Complex{T}}) = E₁_slow(float(z)) # compute the relative error of approx compared to exact relerr(approx,exact) = norm(approx - exact) / norm(exact) relerr(E₁_slow(1.2), 0.158408436851462561424955970710861738534157976840578963378) relerr(E₁_slow(1.2 + 3.4im), -0.0196798781439709839467398951111963946354437628483798953 + 0.0708764302707789307217865597073426047954413415009995511im) function E₁_taylor(z::Number) E₁ = -eulergamma - log(z) # iteratively compute the terms in the series, starting with k=1 term = oftype(E₁, z) # use oftype to ensure type stability E₁ += term for k=2:1000000 term = -term * z * (k-1) / (k * k) prev_E₁ = E₁ E₁ += term E₁ == prev_E₁ && return (E₁, k-1) end return E₁, typemax(Int) end relerr(E₁_taylor(1.2)[1], 0.158408436851462561424955970710861738534157976840578963378) relerr(E₁_taylor(1.2 + 3.4im)[1], -0.0196798781439709839467398951111963946354437628483798953 + 0.0708764302707789307217865597073426047954413415009995511im) x = linspace(0,10,200) pcolor(x',x, [E₁_taylor(x+y*im)[2] for y in x, x in x], cmap="hsv") colorbar() xlabel("real(z)") ylabel("imag(z)") title(L"number of Taylor-series terms for $E_1(z)$") # SOLUTION code # n coefficients of the Taylor series of E₁(z) + log(z), in type T: function E₁_taylor_coefficients{T<:Number}(::Type{T}, n::Integer) n < 0 && throw(ArgumentError("$n ≥ 0 is required")) n == 0 && return T[] n == 1 && return T[-eulergamma] # iteratively compute the terms in the series, starting with k=1 term::T = 1 terms = T[-eulergamma, term] for k=2:n term = -term * (k-1) / (k * k) push!(terms, term) end return terms end E₁_taylor_coefficients(Float64, 10) # SOLUTION code macro E₁_taylor64(z, n::Integer) c = E₁_taylor_coefficients(Float64, n) taylor = Expr(:macrocall, Symbol("@evalpoly"), :t, c...) quote let t = $(esc(z)) $taylor - log(t) end end end relerr(@E₁_taylor64(2.0, 30), E₁_taylor(2.0)[1]) # compute E₁ via n terms of the continued-fraction expansion, implemented # in the simplest way: function E₁_cf(z::Number, n::Integer) # starting with z seems to give many fewer terms for intermediate |z| ~ 3 cf::typeof(inv(z)) = z for i = n:-1:1 cf = z + (1+i)/cf cf = 1 + i/cf end return exp(-z) / (z + inv(cf)) end [relerr(E₁_cf(4, n), E₁_slow(4)) for n in 1:20] [relerr(E₁_cf(8, n), E₁_slow(8)) for n in 1:20] function E₁_cf_nterms(z::Number, reltol=1e-14) for n = 1:100 # give up after 100 terms doubled = E₁_cf(z, 2n) if abs(E₁_cf(z, n) - doubled) ≤ reltol*abs(doubled) return n end end return 101 end x = linspace(1,10,100) pcolor(x',x, [E₁_cf_nterms(x+y*im) for y in x, x in x], cmap="hsv") colorbar() xlabel("real(z)") ylabel("imag(z)") title(L"number of cf-series terms for $E_1(z)$") using SymPy display("text/latex", E₁_cf(Sym(:z), 4)) # SOLUTION code # for numeric-literal coefficients: simplify to a ratio of two polynomials: import Polynomials # return (p,q): the polynomials p(x) / q(x) corresponding to E₁_cf(x, a...), # but without the exp(-x) term function E₁_cfpoly{T<:Real}(n::Integer, ::Type{T}=BigInt) q = Polynomials.Poly(T[1]) p = x = Polynomials.Poly(T[0,1]) for i = n:-1:1 p, q = x*p+(1+i)*q, p # from cf = x + (1+i)/cf = x + (1+i)*q/p p, q = p + i*q, p # from cf = 1 + i/cf = 1 + i*q/p end # do final 1/(x + inv(cf)) = 1/(x + q/p) = p/(x*p + q) return p, x*p + q end macro E₁_cf64(x, n::Integer) p,q = E₁_cfpoly(n, BigInt) evalpoly = Symbol("@evalpoly") num_expr = Expr(:macrocall, evalpoly, :t, Float64.(Polynomials.coeffs(p))...) den_expr = Expr(:macrocall, evalpoly, :t, Float64.(Polynomials.coeffs(q))...) quote let t = $(esc(x)) exp(-t) * $num_expr / $den_expr end end end display("text/latex", cancel(E₁_cf(Sym(:z), 4))) E₁_cfpoly(4) display("text/latex", @E₁_cf64 Sym(:z) 4) semilogy(0:30, [relerr(@eval(@E₁_cf64(3.0, $n)), E₁_cf(3.0, n)) for n in 0:30], "ro") xlabel("order n") ylabel("relative error") function time_taylor(z) E₁, n = E₁_taylor(z) f = gensym() # generate a function name so that we can benchmark in a function @eval $f(z) = @E₁_taylor64 z $n b = @eval @benchmark $f($z) return time(minimum(b)) # time in ns end function time_cf(z) n = E₁_cf_nterms(z) f = gensym() # generate a function name so that we can benchmark in a function @eval $f(z) = @E₁_cf64 z $n b = @eval @benchmark $f($z) return time(minimum(b)) # time in ns end x = linspace(2,5, 10) semilogy(x, [time_taylor(x) for x in x], "ro-") semilogy(x, [time_cf(x) for x in x], "bs-") xlabel("real(z)") ylabel("time (ns)") legend(["taylor", "cf"]) title("timing comparison for inlined Taylor vs. cf (real z)") x = linspace(2,5, 10) semilogy(x, [time_taylor(x + 0im) for x in x], "ro-") semilogy(x, [time_cf(x + 0im) for x in x], "bs-") xlabel("real(z)") ylabel("time (ns)") legend(["taylor", "cf"]) title("timing comparison for inlined Taylor vs. cf (complex z)") x = linspace(3,8, 10) semilogy(x, [time_taylor(x*im) for x in x], "ro-") semilogy(x, [time_cf(x*im) for x in x], "bs-") xlabel("real(z)") ylabel("time (ns)") legend(["taylor", "cf"]) title("timing comparison for inlined Taylor vs. cf (imaginary z)") E₁_cf_nterms(3.0), E₁_cf_nterms(5.0im) E₁_taylor(3.0)[2], E₁_taylor(5.0im)[2] x = 1:0.2:1000 cf_cutoffs = hcat([ x[findfirst(x -> E₁_cf_nterms(x) < n, x)] for n in (30,15,8,4) ], [ x[findfirst(x -> E₁_cf_nterms(x*im) < n, x)] for n in (30,15,8,4) ]) hcat( (cf_cutoffs[:,1]./cf_cutoffs[:,2]).^2, cf_cutoffs[:,1].^2 ) x = logspace(-8,1,10000) taylor_cutoffs = hcat([ x[findlast(x -> E₁_taylor(x)[2] < n, x)] for n in (30,15,8,4) ], [ x[findlast(x -> E₁_taylor(x*im)[2] < n, x)] for n in (30,15,8,4) ]) hcat( (taylor_cutoffs[:,1]./taylor_cutoffs[:,2]).^2, taylor_cutoffs[:,1].^2 ) E₁_taylor(5.8*im)[2] x = linspace(1,2000,10000) x[findfirst(x -> E₁_cf(x,0) == 0.0, x)] E₁_cf(739 - 4im,0) 739^2 # SOLUTION: function E₁(z::Union{Float64,Complex{Float64}}) x² = real(z)^2 y² = imag(z)^2 if x² + 0.233*y² ≥ 7.84 # use cf expansion, ≤ 30 terms if (x² ≥ 546121) & (real(z) > 0) # underflow return zero(z) elseif x² + 0.401*y² ≥ 58.0 # ≤ 15 terms if x² + 0.649*y² ≥ 540.0 # ≤ 8 terms x² + y² ≥ 4e4 && return @E₁_cf z 4 return @E₁_cf64 z 8 end return @E₁_cf64 z 15 end return @E₁_cf64 z 30 else # use Taylor expansion, ≤ 37 terms r² = x² + y² return r² ≤ 0.36 ? (r² ≤ 2.8e-3 ? (r² ≤ 2e-7 ? @E₁_taylor64(z,4) : @E₁_taylor64(z,8)) : @E₁_taylor64(z,15)) : @E₁_taylor64(z,37) end end E₁{T<:Integer}(z::Union{T,Complex{T},Rational{T},Complex{Rational{T}}}) = E₁(float(z)) E₁_test(z::Union{Float64,Complex{Float64}}) = real(z)^2 + imag(z)^2 > 6 ? E₁_cf(z,100) : E₁_taylor(z)[1] x = linspace(1e-10,10,100) pcolor(x',x, [log10(1e-16 + relerr(E₁(x+y*im),E₁_test(x+y*im))) for y in x, x in x], cmap="cool") colorbar() xlabel("real(z)") ylabel("imag(z)") title(L"$\log_{10}$ relative error in optimized $E_1(z)$") @btime E₁(2+2im) @btime E₁_slow(2+2im) @btime E₁(10+2im) @btime E₁_slow(10+2im) @btime E₁(5) @btime E₁_slow(5) using PyCall exp1 = pyimport_conda("scipy.special", "scipy")["exp1"] relerr(E₁(2+2im), exp1(2+2im)) z = rand(10^6)*10 + rand(10^6)*10im @btime pycall($exp1, PyObject, $(PyObject(z))) evals=1; E₁(z::AbstractVector) = map(E₁, z) @btime E₁($z) evals=1; @btime pycall($exp1, PyObject, $(PyObject(real(z)))) evals=1; @btime E₁($(real(z))) evals=1; function preduce(⊕, lst::AbstractVector) if nprocs() == 1 return reduce(⊕, lst) end n = length(lst) queue = Base.copymutable(lst) @sync begin for p in workers() @async begin while length(queue) > 1 a = pop!(queue) b = pop!(queue) # compute results[p] = results[p] ⊕ lst[idx] push!(queue, remotecall_fetch(⊕, p, a, b)) end end end end if isempty(queue) return ⊕() # return empty call if it is defined else assert(length(queue) == 1) return queue[1] end end preduce(⊕, n::Integer) = preduce(⊕, 1:n) preduce(+, 1000) == 500500 function weirdplus(x,y) sleep(1e-3 / (x+y)) # wait for some number of seconds return x + y end preduce(weirdplus, 1000) @btime preduce(weirdplus, 1000) evals=1 addprocs(4) # parallelize over 4 workers workers() @everywhere function weirdplus(x,y) sleep(1e-3 / (x+y)) # wait for some number of seconds return x + y end @btime preduce(weirdplus, 1000) evals=1 1.376 / 0.434293