using BenchmarkTools
a = (1.0, 2.0)
b = (3.0, 5.0)
(3.0, 5.0)
F(a, b) = b .- a
@btime F($(Ref(a))[], $(Ref(a))[])
1.400 ns (0 allocations: 0 bytes)
(0.0, 0.0)
f(a::NTuple{N}, b::NTuple{N}) where N =
NTuple{N}(map(((i, j),) -> j - i, zip(a, b)))
@btime f($a, $b)
31.080 ns (1 allocation: 96 bytes)
(2.0, 3.0)
g(a::NTuple{N}, b::NTuple{N}) where N =
NTuple{N}(j - i for (i, j) in zip(a, b))
@btime g($(Ref(a))[], $(Ref(b))[])
1.600 ns (0 allocations: 0 bytes)
(2.0, 3.0)
h(a::NTuple{N}, b::NTuple{N}) where N =
ntuple(i -> b[i] - a[i], N)
@btime h($(Ref(a))[], $(Ref(b))[])
1.600 ns (0 allocations: 0 bytes)
(2.0, 3.0)
@code_warntype F(a, b)
Variables #self#::Core.Const(F) a::Tuple{Float64, Float64} b::Tuple{Float64, Float64} Body::Tuple{Float64, Float64} 1 ─ %1 = Base.broadcasted(Main.:-, b, a)::Base.Broadcast.Broadcasted{Base.Broadcast.Style{Tuple}, Nothing, typeof(-), Tuple{Tuple{Float64, Float64}, Tuple{Float64, Float64}}} │ %2 = Base.materialize(%1)::Tuple{Float64, Float64} └── return %2
@code_warntype g(a, b)
Variables #self#::Core.Const(g) a::Tuple{Float64, Float64} b::Tuple{Float64, Float64} #3::var"#3#4" Body::Tuple{Float64, Float64} 1 ─ %1 = Core.apply_type(Main.NTuple, $(Expr(:static_parameter, 1)))::Core.Const(Tuple{T, T} where T) │ (#3 = %new(Main.:(var"#3#4"))) │ %3 = #3::Core.Const(var"#3#4"()) │ %4 = Main.zip(a, b)::Base.Iterators.Zip{Tuple{Tuple{Float64, Float64}, Tuple{Float64, Float64}}} │ %5 = Base.Generator(%3, %4)::Base.Generator{Base.Iterators.Zip{Tuple{Tuple{Float64, Float64}, Tuple{Float64, Float64}}}, var"#3#4"} │ %6 = (%1)(%5)::Tuple{Float64, Float64} └── return %6
@code_warntype h(a, b)
Variables #self#::Core.Const(h) a::Tuple{Float64, Float64} b::Tuple{Float64, Float64} #5::var"#5#6"{Tuple{Float64, Float64}, Tuple{Float64, Float64}} Body::Tuple{Float64, Float64} 1 ─ %1 = Main.:(var"#5#6")::Core.Const(var"#5#6") │ %2 = Core.typeof(a)::Core.Const(Tuple{Float64, Float64}) │ %3 = Core.typeof(b)::Core.Const(Tuple{Float64, Float64}) │ %4 = Core.apply_type(%1, %2, %3)::Core.Const(var"#5#6"{Tuple{Float64, Float64}, Tuple{Float64, Float64}}) │ (#5 = %new(%4, a, b)) │ %6 = #5::var"#5#6"{Tuple{Float64, Float64}, Tuple{Float64, Float64}} │ %7 = Main.ntuple(%6, $(Expr(:static_parameter, 1)))::Tuple{Float64, Float64} └── return %7
@code_warntype f(a, b)
Variables #self#::Core.Const(f) a::Tuple{Float64, Float64} b::Tuple{Float64, Float64} #1::var"#1#2" Body::Tuple{Float64, Float64} 1 ─ %1 = Core.apply_type(Main.NTuple, $(Expr(:static_parameter, 1)))::Core.Const(Tuple{T, T} where T) │ (#1 = %new(Main.:(var"#1#2"))) │ %3 = #1::Core.Const(var"#1#2"()) │ %4 = Main.zip(a, b)::Base.Iterators.Zip{Tuple{Tuple{Float64, Float64}, Tuple{Float64, Float64}}} │ %5 = Main.map(%3, %4)::Vector{Float64} │ %6 = (%1)(%5)::Tuple{Float64, Float64} └── return %6
@code_llvm debuginfo=:none F(a, b)
; Function Attrs: uwtable define void @julia_F_1718([2 x double]* noalias nocapture sret %0, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %1, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %2) #0 { top: %3 = bitcast [2 x double]* %2 to <2 x double>* %4 = load <2 x double>, <2 x double>* %3, align 8 %5 = bitcast [2 x double]* %1 to <2 x double>* %6 = load <2 x double>, <2 x double>* %5, align 8 %7 = fsub <2 x double> %4, %6 %8 = bitcast [2 x double]* %0 to <2 x double>* store <2 x double> %7, <2 x double>* %8, align 8 ret void }
@code_llvm debuginfo=:none g(a, b)
; Function Attrs: uwtable define void @julia_g_1743([2 x double]* noalias nocapture sret %0, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %1, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %2) #0 { top: %3 = bitcast [2 x double]* %2 to <2 x double>* %4 = load <2 x double>, <2 x double>* %3, align 8 %5 = bitcast [2 x double]* %1 to <2 x double>* %6 = load <2 x double>, <2 x double>* %5, align 8 %7 = fsub <2 x double> %4, %6 %8 = bitcast [2 x double]* %0 to <2 x double>* store <2 x double> %7, <2 x double>* %8, align 8 ret void }
@code_llvm debuginfo=:none h(a, b)
; Function Attrs: uwtable define void @julia_h_1745([2 x double]* noalias nocapture sret %0, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %1, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %2) #0 { top: %3 = bitcast [2 x double]* %2 to <2 x double>* %4 = load <2 x double>, <2 x double>* %3, align 8 %5 = bitcast [2 x double]* %1 to <2 x double>* %6 = load <2 x double>, <2 x double>* %5, align 8 %7 = fsub <2 x double> %4, %6 %8 = bitcast [2 x double]* %0 to <2 x double>* store <2 x double> %7, <2 x double>* %8, align 8 ret void }
@code_llvm debuginfo=:none f(a, b)
; Function Attrs: uwtable define void @julia_f_1747([2 x double]* noalias nocapture sret %0, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %1, [2 x double]* nocapture nonnull readonly align 8 dereferenceable(16) %2) #0 { top: %3 = alloca {}*, align 8 %4 = alloca [2 x [2 x double]], align 8 %5 = alloca { [1 x [2 x [2 x double]]] }, align 8 %6 = bitcast [2 x [2 x double]]* %4 to i8* %7 = bitcast [2 x double]* %1 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(16) %6, i8* nonnull align 8 dereferenceable(16) %7, i64 16, i1 false) %8 = getelementptr inbounds [2 x [2 x double]], [2 x [2 x double]]* %4, i64 0, i64 1 %9 = bitcast [2 x double]* %8 to i8* %10 = bitcast [2 x double]* %2 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(16) %9, i8* nonnull align 8 dereferenceable(16) %10, i64 16, i1 false) %11 = bitcast { [1 x [2 x [2 x double]]] }* %5 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(32) %11, i8* nonnull align 8 dereferenceable(32) %6, i64 32, i1 false) %12 = call nonnull {}* @j_collect_1749({ [1 x [2 x [2 x double]]] }* nocapture readonly %5) #0 %13 = bitcast {}* %12 to { i8*, i64, i16, i16, i32 }* %14 = getelementptr inbounds { i8*, i64, i16, i16, i32 }, { i8*, i64, i16, i16, i32 }* %13, i64 0, i32 1 %15 = load i64, i64* %14, align 8 switch i64 %15, label %L43 [ i64 0, label %L23 i64 1, label %L40 ] L23: ; preds = %top store {}* inttoptr (i64 449532336 to {}*), {}** %3, align 8 %16 = call nonnull {}* @jl_invoke({}* inttoptr (i64 338795648 to {}*), {}** nonnull %3, i32 1, {}* inttoptr (i64 2150936944 to {}*)) call void @llvm.trap() unreachable L40: ; preds = %top store {}* inttoptr (i64 477628656 to {}*), {}** %3, align 8 %17 = call nonnull {}* @jl_invoke({}* inttoptr (i64 338795648 to {}*), {}** nonnull %3, i32 1, {}* inttoptr (i64 2150936944 to {}*)) call void @llvm.trap() unreachable L43: ; preds = %top %18 = bitcast {}* %12 to <2 x i64>** %19 = load <2 x i64>*, <2 x i64>** %18, align 8 %20 = load <2 x i64>, <2 x i64>* %19, align 8 %21 = bitcast [2 x double]* %0 to <2 x i64>* store <2 x i64> %20, <2 x i64>* %21, align 8 ret void }
@code_native debuginfo=:none F(a, b)
.text pushq %rbp movq %rsp, %rbp movq %rcx, %rax vmovupd (%r8), %xmm0 vsubpd (%rdx), %xmm0, %xmm0 vmovupd %xmm0, (%rax) popq %rbp retq nopw %cs:(%rax,%rax)
@code_native debuginfo=:none g(a, b)
.text pushq %rbp movq %rsp, %rbp movq %rcx, %rax vmovupd (%r8), %xmm0 vsubpd (%rdx), %xmm0, %xmm0 vmovupd %xmm0, (%rax) popq %rbp retq nopw %cs:(%rax,%rax)
@code_native debuginfo=:none h(a, b)
.text pushq %rbp movq %rsp, %rbp movq %rcx, %rax vmovupd (%r8), %xmm0 vsubpd (%rdx), %xmm0, %xmm0 vmovupd %xmm0, (%rax) popq %rbp retq nopw %cs:(%rax,%rax)
@code_native debuginfo=:none f(a, b)
.text pushq %rbp movq %rsp, %rbp pushq %rsi subq $104, %rsp movq %rcx, %rsi vmovups (%rdx), %xmm0 vmovaps %xmm0, -48(%rbp) vmovups (%r8), %xmm0 vmovaps %xmm0, -32(%rbp) vmovups -48(%rbp), %ymm0 vmovups %ymm0, -80(%rbp) movabsq $collect, %rax leaq -80(%rbp), %rcx vzeroupper callq *%rax movq 8(%rax), %rcx cmpq $1, %rcx je L96 testq %rcx, %rcx je L139 movq (%rax), %rax vmovups (%rax), %xmm0 vmovups %xmm0, (%rsi) movq %rsi, %rax addq $104, %rsp popq %rsi popq %rbp retq L96: movq $477628656, -16(%rbp) # imm = 0x1C7808F0 movabsq $jl_invoke, %rax leaq -16(%rbp), %rdx movl $338795648, %ecx # imm = 0x14319C80 movl $2150936944, %r9d # imm = 0x8034B170 movl $1, %r8d callq *%rax ud2 L139: movq $449532336, -16(%rbp) # imm = 0x1ACB51B0 movabsq $jl_invoke, %rax leaq -16(%rbp), %rdx movl $338795648, %ecx # imm = 0x14319C80 movl $2150936944, %r9d # imm = 0x8034B170 movl $1, %r8d callq *%rax ud2 nopw %cs:(%rax,%rax)