2

我正在努力在 python 中实现自然连接。前两行显示表的属性,接下来的两行显示每个表的元组或行。

预期输出:

[['A', 1, 'A', 'a', 'A'], 
 ['A', 1, 'A', 'a', 'Y'], 
 ['A', 1, 'Y', 'a', 'A'], 
 ['A', 1, 'Y', 'a', 'Y'], 
 ['S', 2, 'B', 'b', 'S']]

我得到了什么:

[['A', 1, 'A', 'a', 'A', 'Y'], 
 ['A', 1, 'A', 'a', 'A', 'Y']]

我查看了代码,一切似乎都是正确的,我将不胜感激。

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [['A', 1, 'A', 'a'], 
            ['B', 2, 'Y', 'a'], 
            ['Y', 4, 'B', 'b'], 
            ['A', 1, 'Y', 'a'], 
            ['S', 2, 'B', 'b']]

t2tuples = [[1, 'a', 'A'], 
            [3, 'a', 'B'], 
            [1, 'a', 'Y'], 
            [2, 'b', 'S'], 
            [3, 'b', 'E']]

def findindices(t1atts, t2atts):
  t1index=[]
  t2index=[]
  for index, att in enumerate(t1atts):
    for index2, att2 in enumerate(t2atts):
      if att == att2:
        t1index.append(index)
        t2index.append(index2)
  return t1index, t2index

def main():
  tpl=0; tpl2=0; i=0; j=0; count=0; result=[]
  t1index, t2index = findindices(t1atts, t2atts)
  for tpl in t1tuples:
    while tpl2 in range(len(t2tuples)):
      i=0; j=0
      while (i in range(len(t1index))) and (j in range(len(t2index))):
          if tpl[t1index[i]] != t2tuples[tpl2][t2index[j]]:
            i=len(t1index)
            j=len(t1index)
          else:
            count+=1
          i+=1
          j+=1
      if count == len(t1index):
        extravals = [val for index, val in enumerate(t2tuples[tpl2]) if index not in t2index]
        temp = tpl
        tpl += extravals
        result.append(tpl)
        tpl = temp
      count=0
      tpl2+=1
  print result
4

2 回答 2

1

好的,这是解决方案,请验证并让我知道它是否适合您:

我改变了一点命名来理解自己:

#!/usr/bin/python

table1 = ('A', 'B', 'C', 'D')
table2 = ('B', 'D', 'E')

row1 = [['A', 1, 'A', 'a'],
        ['B', 2, 'Y', 'a'],
        ['Y', 4, 'B', 'b'],
        ['A', 1, 'Y', 'a'],
        ['S', 2, 'B', 'b']]

row2 = [[1, 'a', 'A'],
        [3, 'a', 'B'],
        [1, 'a', 'Y'],
        [2, 'b', 'S'],
        [3, 'b', 'E']]

def findindices(table1, table2):
    inter = set(table1).intersection(set(table2))
    tup_index1 = [table1.index(x) for x in inter]
    tup_index2 = [table2.index(x) for x in inter]]
    return tup_index1, tup_index2

def main():

    final_lol = list()

    tup_index1, tup_index2 = findindices(table1, table2)

    merge_tup = zip(tup_index1, tup_index2)

    for tup1 in row1:
        for tup2 in row2:
            for m in merge_tup:
                if tup1[m[0]] != tup2[m[1]]:
                    break
            else:
               ls = []
               ls.extend(tup1)
               ls.append(tup2[-1])
               final_lol.append(ls)
    return final_lol

if __name__ == '__main__':
    import pprint
    pprint.pprint(main())

输出:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]
于 2014-02-03T06:40:57.147 回答
1

这就是我想出的。在调用完成之前,我会进行更多重构等

import pprint

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [
    ['A', 1, 'A', 'a'],
    ['B', 2, 'Y', 'a'],
    ['Y', 4, 'B', 'b'],
    ['A', 1, 'Y', 'a'],
    ['S', 2, 'B', 'b']]

t2tuples = [
    [1, 'a', 'A'],
    [3, 'a', 'B'],
    [1, 'a', 'Y'],
    [2, 'b', 'S'],
    [3, 'b', 'E']]


t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}

join_on = t1columns & t2columns
diff = t2columns - join_on

def match(row1, row2):
   return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)

results = []
for t1row in t1tuples:
    for t2row in t2tuples:
        if match(t1row, t2row):
            row = t1row[:]
            for rn in diff:
                row.append(t2row[t2map[rn]])
            results.append(row)

pprint.pprint(results)

我得到了预期的结果:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]
于 2014-02-03T07:00:54.857 回答