python - 用 numpy 同时对角化矩阵

Question

我有一个m × n × n numpy.ndarray的m个同时可对角化的方阵，并想用它numpy来获得它们的同时特征值。

例如，如果我有

from numpy import einsum, diag, array, linalg, random
U = linalg.svd(random.random((3,3)))[2]

M = einsum(
    "ij, ajk, lk",
    U, [diag([2,2,0]), diag([1,-1,1])], U)

中的两个矩阵M同时可对角化，我正在寻找一种获取数组的方法

array([[2.,  1.],
       [2., -1.],
       [0.,  1.]])

（直到行的排列）从M. 有没有内置的或简单的方法来获得这个？

score 7 · Accepted Answer

Cardoso 和 Soulomiac 在 1996 年发表了一种基于 Givens 旋转的相当简单且非常优雅的同步对角化算法：

Cardoso, J. 和 Souloumiac, A. (1996)。用于同时对角化的雅可比角。SIAM 矩阵分析与应用杂志，17（1），161-164。doi:10.1137/S0895479893259546

我在此响应的末尾附上了算法的 numpy 实现。警告：事实证明，同时对角化是一个有点棘手的数值问题，没有算法（据我所知）可以保证全局收敛。然而，它不起作用的情况（见论文）是退化的，实际上我从来没有让雅可比角算法对我失败。

#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""
Routines for simultaneous diagonalization
Arun Chaganty <arunchaganty@gmail.com>
"""

import numpy as np
from numpy import zeros, eye, diag
from numpy.linalg import norm

def givens_rotate( A, i, j, c, s ):
    """
    Rotate A along axis (i,j) by c and s
    """
    Ai, Aj = A[i,:], A[j,:]
    A[i,:], A[j,:] = c * Ai + s * Aj, c * Aj - s * Ai 

    return A

def givens_double_rotate( A, i, j, c, s ):
    """
    Rotate A along axis (i,j) by c and s
    """
    Ai, Aj = A[i,:], A[j,:]
    A[i,:], A[j,:] = c * Ai + s * Aj, c * Aj - s * Ai 
    A_i, A_j = A[:,i], A[:,j]
    A[:,i], A[:,j] = c * A_i + s * A_j, c * A_j - s * A_i 

    return A

def jacobi_angles( *Ms, **kwargs ):
    r"""
    Simultaneously diagonalize using Jacobi angles
    @article{SC-siam,
       HTML =   "ftp://sig.enst.fr/pub/jfc/Papers/siam_note.ps.gz",
       author = "Jean-Fran\c{c}ois Cardoso and Antoine Souloumiac",
       journal = "{SIAM} J. Mat. Anal. Appl.",
       title = "Jacobi angles for simultaneous diagonalization",
       pages = "161--164",
       volume = "17",
       number = "1",
       month = jan,
       year = {1995}}

    (a) Compute Givens rotations for every pair of indices (i,j) i < j 
            - from eigenvectors of G = gg'; g = A_ij - A_ji, A_ij + A_ji
            - Compute c, s as \sqrt{x+r/2r}, y/\sqrt{2r(x+r)}
    (b) Update matrices by multiplying by the givens rotation R(i,j,c,s)
    (c) Repeat (a) until stopping criterion: sin theta < threshold for all ij pairs
    """

    assert len(Ms) > 0
    m, n = Ms[0].shape
    assert m == n

    sweeps = kwargs.get('sweeps', 500)
    threshold = kwargs.get('eps', 1e-8)
    rank = kwargs.get('rank', m)

    R = eye(m)

    for _ in xrange(sweeps):
        done = True
        for i in xrange(rank):
            for j in xrange(i+1, m):
                G = zeros((2,2))
                for M in Ms:
                    g = np.array([ M[i,i] - M[j,j], M[i,j] + M[j,i] ])
                    G += np.outer(g,g) / len(Ms)
                # Compute the eigenvector directly
                t_on, t_off = G[0,0] - G[1,1], G[0,1] + G[1,0]
                theta = 0.5 * np.arctan2( t_off, t_on + np.sqrt( t_on*t_on + t_off * t_off) )
                c, s = np.cos(theta), np.sin(theta)

                if abs(s) > threshold:
                    done = False
                    # Update the matrices and V
                    for M in Ms:
                        givens_double_rotate(M, i, j, c, s)
                        #assert M[i,i] > M[j, j]
                    R = givens_rotate(R, i, j, c, s)

        if done:
            break
    R = R.T

    L = np.zeros((m, len(Ms)))
    err = 0
    for i, M in enumerate(Ms):
        # The off-diagonal elements of M should be 0
        L[:,i] = diag(M)
        err += norm(M - diag(diag(M)))

    return R, L, err

score 5 · Accepted Answer

I am not aware of any direct solution. But why not just getting the eigenvalues and the eigenvectors of the first matrix, and using the eigenvectors to transform all other matrices to the diagonal form? Something like:

eigvals, eigvecs = np.linalg.eig(matrix1)
eigvals2 = np.diagonal(np.dot(np.dot(transpose(eigvecs), matrix2), eigvecs))

You can the add the columns to an array via hstack if you like.

UPDATE: As pointed out below, this is only valid if no degenerate eigenvalues occur. Otherwise one would have to check first for the degenerate eigenvalues, then transform the 2nd matrix to a blockdiagonal form, and diagonalize eventual blocks bigger than 1x1 separately.

score 1 · Accepted Answer

如果您事先知道两个矩阵的特征值的大小，则可以对两个矩阵的线性组合进行对角化，并选择系数来打破退化。例如，如果两者的特征值都在 -10 和 10 之间，您可以对角化 100*M1 + M2。精度略有下降，但对于许多用途来说已经足够了——而且快速简单！

score 0 · Accepted Answer

我确信我的解决方案还有很大的改进空间，但我提出了以下三个函数集，以一种半稳健的方式为我进行计算。

def clusters(array,
             orig_indices = None,
             start = 0,
             rtol=numpy.allclose.__defaults__[0],
             atol=numpy.allclose.__defaults__[1]):
    """For an array, return a permutation that sorts the numbers and the sizes of the resulting blocks of identical numbers."""
    array = numpy.asarray(array)
    if not len(array):
        return numpy.array([]),[]
    if orig_indices is None:
        orig_indices = numpy.arange(len(array))
    x = array[0]
    close = abs(array-x) <= (atol + rtol*abs(x))
    first = sum(close)
    r_perm, r_sizes = clusters(
        array[~close],
        orig_indices[~close],
        start+first,
        rtol, atol)
    r_sizes.insert(0, first)
    return numpy.concatenate((orig_indices[close], r_perm)), r_sizes

def permutation_matrix(permutation, dtype=dtype):
    n = len(permutation)
    P = numpy.zeros((n,n), dtype)
    for i,j in enumerate(permutation):
        P[j,i]=1
    return P

def simultaneously_diagonalize(tensor, atol=numpy.allclose.__defaults__[1]):
    tensor = numpy.asarray(tensor)
    old_shape = tensor.shape
    size = old_shape[-1]
    tensor = tensor.reshape((-1, size, size))
    diag_mask = 1-numpy.eye(size)

    eigvalues, diagonalizer = numpy.linalg.eig(tensor[0])
    diagonalization = numpy.dot(
        numpy.dot(
            matrix.linalg.inv(diagonalizer),
            tensor).swapaxes(0,-2),
        diagonalizer)
    if numpy.allclose(diag_mask*diagonalization, 0):
        return diagonalization.diagonal(axis1=-2, axis2=-1).reshape(old_shape[:-1])
    else:
        perm, cluster_sizes = clusters(diagonalization[0].diagonal())
        perm_matrix = permutation_matrix(perm)
        diagonalization = numpy.dot(
            numpy.dot(
                perm_matrix.T,
                diagonalization).swapaxes(0,-2),
            perm_matrix)
        mask = 1-scipy.linalg.block_diag(
            *list(
                numpy.ones((blocksize, blocksize))
                for blocksize in cluster_sizes))
        print(diagonalization)
        assert(numpy.allclose(
                diagonalization*mask,
                0)) # Assert that the matrices are co-diagonalizable
        blocks = numpy.cumsum(cluster_sizes)
        start = 0
        other_part = []
        for block in blocks:
            other_part.append(
                simultaneously_diagonalize(
                    diagonalization[1:, start:block, start:block]))
            start = block
        return numpy.vstack(
            (diagonalization[0].diagonal(axis1=-2, axis2=-1),
             numpy.hstack(other_part)))

python - 用 numpy 同时对角化矩阵

4 回答 4

Related

Reference