0

我正在尝试制作一个 python 脚本,它可以让我获得给定矩阵中最长的重复字符(水平和垂直)。

例子:

我有这个矩阵:

afaaf
rbaca
rlaff

给出这个矩阵作为输入,它应该得到:a 3

您可以看到矩阵的第 3 列充满了 a,而且它是矩阵中重复次数最多的字符。

我有的:

#!/bin/python2.7

#Longest string in matrix

#Given a matrix filled with letters. Find the longest string, containing only the same letter, which can be obtained by starting
#with any position and then moving horizontally and vertically (each cell can be visited no more than 1 time).


# Settings here
# -------------
string_matrix = """
afaaf
rbaca
rlaff
"""

pos = (0,0)
# -------------

import pdb
import time
import collections
from collections import defaultdict
import re


rows = 0
columns = 0
matrix = []
matrix2 = []
counter = 0
res_l = []
i = 0
c = ''

# if matrix2 is full of 1's, stop
def stop():
    for i in range(0, rows):
        for j in range(0, columns):
            if matrix2[i][j] == 0:
                return False
    return True

# checks the points, and returns the most repeated char and length
def check_points(points1, points2):
    r = []

    r.append(-1)
    r.append('')

    # create strings from matrix
    s1 = ''
    s2 = ''


    for point in points1:
        s1 += matrix[point[0]][point[1]]

    for point in points2:
        s2 += matrix[point[0]][point[1]]

    rr = {}

    for c in s1:
        rr[c] = 0

    for c in s2:
        rr[c] = 0

    for i in range(0, len(s1)):
        k = 1
        for j in range(i+1, len(s1)):
            if s1[i] == s1[j]:
                k += 1
            else:
                break
            if k > rr[s1[i]]:
                rr[s1[i]] = k


    for i in range(0, len(s2)):
        k = 1
        for j in range(i+1, len(s2)):
            if s2[i] == s2[j]:
                k += 1
            else:
                break
            if k > rr[s2[i]]:
                rr[s2[i]] = k


    m = -1
    c = ''
    for key,value in rr.iteritems():
        if value > m:
            m = value
            c = key

    return m, c


# Depth-first search, recursive
def search(pos):
    global res_l
    global matrix2
    global c

    counter = 0

    x = pos[0]
    y = pos[1]

    c = matrix[x][y]

    # base clause
    # when matrix2 is all checked
    if stop():
        return counter, c



    points1 = []
    points2 = []
    allpoints = []

    for i in range(0, columns):
            if matrix2[x][i] != 1:
                points1.append([x, i])
                allpoints.append([x, i])


    for i in range(0, rows):
            if matrix2[i][x] != 1:
                points2.append([i, x])
                allpoints.append([i, x])





    r = check_points(points1, points2)


    if r[0] > counter:
        counter = r[0]
        c = r[1]

    matrix2[x][y] = 1


    for point in allpoints:
        rr = search(point)
        if rr[0] > counter:
            counter = int(rr[0])
            c = rr[1]
            #print 'c: ' + str(c) + ' - k: ' + str(counter)



    return counter, c

def main():
    # create the matrix from string
    string_matrix_l = string_matrix.strip()
    splited = string_matrix_l.split('\n')


    global rows
    global columns
    global matrix
    global matrix2

    rows = len(splited)
    columns = len(splited[1])



    # initialize matrixes with 0
    matrix = [[0 for x in range(columns)] for x in range(rows)]
    matrix2 = [[0 for x in range(columns)] for x in range(rows)]


    # string to matrix
    i = 0
    for s in splited:
        s = s.strip()
        if s == '':
            continue

        j = 0

        for c in s:
            try:## Heading ##
                matrix[i][j] = c
                #print 'ok: ' + str(i) + ' ' + str(j) + ' ' +  c
            except:
                print 'fail: index out of range matrix[' + str(i) + '][' + str(j)+'] ' + c
            j = j + 1

        i = i + 1


    # print some info
    print 'Given matrix: ' + str(matrix) + '\n'
    print 'Start position: ' + str(pos)
    print 'Start character: ' + str(matrix[pos[0]][pos[1]])

    # get the result
    res = search(pos)
    print '-------------------------------------'
    print '\nChar: ' + str(res[1]) + '\nLength: ' + str(res[0])


if __name__ == "__main__":
    main()

这是我的源代码。上面给出的示例也在源代码中使用。给出的结果是:r 2这是错误的......再次,应该是3

它有 4 个功能:main、search、stop 和 check_points。

  • 主要是初始化事情,
  • search 是我的递归函数,它接受一个参数(起点),并且应该递归地检查最长的字符串。我有另一个矩阵,长度与原始矩阵相同,只有 1 和 0。1 表示访问过的位置,0,不是。搜索功能在某个位置被搜索功能处理后,在正确的位置上设置 1。
  • stop 正在检查 matrix2 是否全为 1,在这种情况下,矩阵已全部解析
  • check_points 接受 2 个参数,2 个点列表,并返回重复次数最多的字符以及这些点的长度

什么不起作用:

大多数时候结果是给我错误的字符,即使有时计数可能是正确的。有时它会水平工作,有时则不会。我确定我做错了什么,但是......现在已经超过 1 周了,因为我试图弄清楚如何做到这一点。在stackoverflow上问了另一个问题,走得更远了,但......仍然卡住了。

任何建议表示赞赏。

4

2 回答 2

2

您可以使用itertools.groupby快速查找某些字符的重复计数,并izip_longest(*matrix)转置矩阵(交换其行和列)。

from itertools import groupby, izip_longest

matrix_string = """
afaaf
rbaca
rlaff
"""

def longest_repetition(row): 
    return max((sum(1 for item in group), letter) 
               for letter, group in groupby(row) 
               if letter is not None)

def main():
    matrix = [[letter for letter in row.strip()] 
              for row in matrix_string.strip().split('\n')]

    count, letter = max(
        max(longest_repetition(row) for row in matrix),
        max(longest_repetition(col) for col in izip_longest(*matrix))
    )

    print letter, count

if __name__ == '__main__':
    main()

由于您已经更新了这里的要求,因此这里的代码是带有一些解释的递归版本。如果这不是一项任务,并且这个任务出现在一些现实生活中的问题中,那么你真的应该使用第一个版本。

matrix_string = """
afaaf
rbaca
rlaff
"""


def find_longest_repetition(matrix):
    rows = len(matrix)
    cols = len(matrix[0])

    # row, col - row and column of the current character.
    # direction - 'h' if we are searching for repetitions in horizontal direction (i.e., in a row).
    #             'v' if we are searching in vertical direction.
    # result - (count, letter) of the longest repetition we have seen by now.
    #          This order allows to compare results directly and use `max` to get the better one
    # current - (count, letter) of the repetition we have seen just before the current character.
    def recurse(row, col, direction, result, current=(0, None)):
        # Check if we need to start a new row, new column, 
        #   new direction, or finish the recursion.
        if direction == 'h':    # If we are moving horizontally
            if row >= rows:     # ...  and finished all rows
                return recurse( # restart from the (0, 0) position in vertical direction.
                    0, 0,
                    'v',
                    result
                )
            if col >= cols:     # ... and finished all columns in the current row
                return recurse( # start the next row.
                    row + 1, 0,
                    direction,
                    result
                )
        else:                   # If we are moving vertically
            if col >= cols:     # ... and finished all columns
                return result   # then we have analysed all possible repetitions. 
            if row >= rows:     # ... and finished all rows in the current column
                return recurse( # start the next column.
                    0, col + 1,
                    direction,
                    result
                )

        # Figure out where to go next in the current direction
        d_row, d_col = (0, 1) if direction == 'h' else (1, 0)

        # Try to add current character to the current repetition
        count, letter = current
        if matrix[row][col] == letter:
            updated_current = count + 1, letter
        else:
            updated_current = 1, matrix[row][col]

        # Go on with the next character in the current direction
        return recurse( 
            row + d_row, 
            col + d_col,
            direction,
            max(updated_current, result), # Update the result, if necessary
            updated_current
        )

    return recurse(0, 0, 'h', (0, None))


def main():
    matrix = [[letter for letter in row.strip()] 
              for row in matrix_string.strip().split('\n')]

    count, letter = find_longest_repetition(matrix)

    print letter, count


if __name__ == '__main__':
    main()
于 2015-04-08T09:05:59.400 回答
1

您还可以尝试使用collections.Counter(string).most_common()来获得最多的字符重复次数。

from collections import Counter

string_matrix = """
afaaf
rbaca
rlaff
"""

def GetMostRepetitions(pos):
    mc = []

    for ii in range(pos[0],len(working_mat)):
        mc.extend(Counter(working_mat[ii]).most_common(1))  

        for jj in range(pos[1],len(working_mat[0])):
            column = []           

            for kk in range(ii,len(working_mat)):
                column.append(working_mat[kk][jj])

            mc.extend(Counter(column).most_common(1))          

    m = 0           
    for item in mc: 
        if item[1] > m:
            m = item[1]
            k = item[0]


    print(k, m)                 

working_mat = string_matrix.strip().split('\n')

for ii in range(len(working_mat)):
    for jj in range(len(working_mat[0])):
        pos = (ii,jj)
        GetMostRepetitions(pos)

正如Kolmar所说,您还可以使用更好的方法来转置矩阵

于 2015-04-08T09:24:58.163 回答