3

我正在尝试编写一个结构来计算梯度(遵循https://www.youtube.com/watch?v=rZS2LGiurKY)这是我到目前为止所拥有的:

struct GRAD{F <: Array{Float64,2}, ∇F <:Array{Float64,2}}
    f::F
    ∇f::∇F
end

begin 
    import Base: +,*,-,^,/,convert,promote_rule,size,reshape,promote
    # addition rule 
    +(x::GRAD,y::GRAD) = GRAD(x.f+y.f,x.∇f+y.∇f) 
    -(x::GRAD,y::GRAD) = GRAD(x.f-y.f,x.∇f-y.∇f) 

    # multiplying by scalar
    *(y::Real,x::GRAD) = GRAD(x.f.*y,x.∇f.*y)
    *(x::GRAD,y::Real) = *(y::Real,x::GRAD) 
    # product rule 
    *(x::GRAD,y::GRAD)  = GRAD(x.f.*y.f,x.f.*y.∇f+ x.∇f.*y.f)

    convert(::Type{GRAD},x::Array) = GRAD(x,zero(x))    
    size(x::GRAD) = size(x.f)   
    Base.promote_rule(::Type{GRAD{F,∇F}}, x::Type{<:Array}) = GRAD # bug is here!! 
end
A = rand(5,5)
r = rand(5,1)
b = rand(5,1)
g = GRAD(r, zeros(5,1) + [1 for i=1:5])

我想计算A*g(应该是A*ones())的梯度,但是当我这样做时

> A*g
MethodError: no method matching *(::Array{Float64,2}, ::Main.workspace2861.GRAD{Array{Float64,2},Array{Float64,2}})
Closest candidates are:
*(::Any, ::Any, !Matched::Any, !Matched::Any...) at operators.jl:538
*(!Matched::Real, ::Main.workspace2861.GRAD) at /var/folders/2s/p1vy6rx91lsfh9ltgzz6j_lmb6r7gr/T/Unexpected invention.jl#==#c23631c4-0646-11eb-13be-3b5fa3514823:6
*(::Union{StridedArray{T, 2}, LinearAlgebra.Adjoint{var"#s828",var"#s827"} where var"#s827"<:Union{StridedArray{T, 2}, LinearAlgebra.LowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UnitLowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UnitUpperTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UpperTriangular{T,S} where S<:AbstractArray{T,2}} where var"#s828", LinearAlgebra.LowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.Transpose{var"#s826",var"#s825"} where var"#s825"<:Union{StridedArray{T, 2}, LinearAlgebra.LowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UnitLowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UnitUpperTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UpperTriangular{T,S} where S<:AbstractArray{T,2}} where var"#s826", LinearAlgebra.UnitLowerTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UnitUpperTriangular{T,S} where S<:AbstractArray{T,2}, LinearAlgebra.UpperTriangular{T,S} where S<:AbstractArray{T,2}} where T, !Matched::LinearAlgebra.Adjoint{var"#s828",var"#s827"} where var"#s827"<:SparseArrays.AbstractSparseMatrixCSC where var"#s828") at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SparseArrays/src/linalg.jl:147

convert(GRAD, A) * g我得到了正确的结果。

我究竟做错了什么?

4

1 回答 1

4

我相信 Base 只回退到promote作为Number. 相反,您必须手动进行调度,例如

struct GRAD{F <: Array{Float64,2}, ∇F <:Array{Float64,2}}
    f::F
    ∇f::∇F
end

begin 
    import Base: +,*,-,^,/,convert,promote_rule,size,reshape,promote
    # addition rule 
    +(x::GRAD,y::GRAD) = GRAD(x.f+y.f,x.∇f+y.∇f) 
    -(x::GRAD,y::GRAD) = GRAD(x.f-y.f,x.∇f-y.∇f) 

    # multiplying by scalar
    *(y::Real,x::GRAD) = GRAD(x.f.*y,x.∇f.*y)
    *(x::GRAD,y::Real) = *(y::Real,x::GRAD) 
    # product rule 
    *(x::GRAD,y::GRAD)  = GRAD(x.f.*y.f,x.f.*y.∇f+ x.∇f.*y.f)
    *(x::GRAD,y::AbstractArray) = *(promote(x, y)...) #manually implement promotion
    *(x::AbstractArray,y::GRAD) = *(promote(x, y)...)

    convert(::Type{GRAD},x::Array) = GRAD(x,zero(x))    
    size(x::GRAD) = size(x.f)   
    Base.promote_rule(::Type{<:GRAD}, x::Type{<:Array}) = GRAD #fixed
end

A = rand(5,5)
r = rand(5,1)
b = rand(5,1)
g = GRAD(r, zeros(5,1) + [1 for i=1:5])
julia> A*g
GRAD{Array{Float64,2},Array{Float64,2}}([0.22826090714985026 0.3029960652217887 … 0.04569934008285687 0.3480034221401326; 0.2263393729468651 0.09785205038459334 … 0.2354369234901423 0.03963994636800826; … ; 0.2465774394414207 0.04853374224132803 … 0.1316815422172956 0.41189932434750665; 0.07773901558602414 0.3714828548333624 … 0.07235526901207193 0.38751984258803623], [0.46212899620837633 0.6134351660317792 … 0.09252127498998997 0.7045554762696247; 0.7634551330528128 0.33006033892034314 … 0.7941416705740725 0.13370771569513296; … ; 0.40923528629708694 0.08054962346187167 … 0.21854689444181385 0.6836137900806378; 0.16866950942083414 0.8060023710186879 … 0.15698845214696422 0.8407976515709032])

julia> (A*g).∇f
5×5 Array{Float64,2}:
 0.462129  0.613435   0.833935  0.0925213  0.704555
 0.763455  0.33006    0.354147  0.794142   0.133708
 0.774017  0.347564   0.255648  0.725451   0.629586
 0.409235  0.0805496  0.1764    0.218547   0.683614
 0.16867   0.806002   0.21655   0.156988   0.840798

这有帮助吗?

于 2020-10-04T22:15:08.567 回答