-1

我有一个文件,其中包含以下几行:

B99990001 1 2 3 4
B99990001 1 3 3 4
B99990002 1 2 3 4
B99990002 1 3 3 4
B99990003 1 2 3 4
B99990003 1 3 3 4

所以在这里我的目标是制作一个主列表,该列表应该基于行的第一列(B99990001、B99990002、B99990003)包含三个子列表:

Mainlist=[ 
          ['B99990001 1 2 3 4','B99990001 1 3 3 4'],#sublist1 has B99990001
          ['B99990002 1 2 3 4','B99990002 1 3 3 4'],#sublist2 has B99990002
          ['B99990002 1 2 3 4','B99990002 1 3 3 4'] #sublist3 has B99990002
                                                                                ]

我希望,我的问题是可以理解的。所以如果有人知道你能帮我解决这个问题。

提前谢谢你

在这里查看我的真实示例:

import os
import re
pdbPathAndName = ['/Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014  /ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990001.pdb','/Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014/ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990002.pdb']

''' /Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014/ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990001.pdb=[
                    'ATOM    138  SG  CYS    19       4.499   4.286   8.260  1.00 71.96           S',
                    'ATOM    397  SG  CYS    50      14.897   3.238   9.338  1.00 34.60           S',
                    'ATOM    424  SG  CYS    54       5.649   5.914   8.639  1.00 42.68           S',
                    'ATOM    774  SG  CYS    97      12.114  -6.864  23.897  1.00 62.23           S',
                    'ATOM    865  SG  CYS   108      15.200   3.910  11.227  1.00 54.49           S'    ]

/Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014/ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990002.pdb=[
                    'ATOM    929  SG  CYS   117      13.649  -6.894  22.589  1.00106.90           S',
                    'ATOM    138  SG  CYS    19       4.499   4.286   8.260  1.00 71.96           S',
                    'ATOM    397  SG  CYS    50      14.897   3.238   9.338  1.00 34.60           S',
                    'ATOM    424  SG  CYS    54       5.649   5.914   8.639  1.00 42.68           S',
                    'ATOM    774  SG  CYS    97      12.114  -6.864  23.897  1.00 62.23           S',
                    'ATOM    865  SG  CYS   108      15.200   3.910  11.227  1.00 54.49           S',
                    'ATOM    929  SG  CYS   117      13.649  -6.894  22.589  1.00106.90           S'    ] '''


for path in pdbPathAndName:
    f = open(path, 'r').readlines()
    f = map(lambda x: x.strip(), f)
    for line in f:
        if "SG" in line and line.endswith("S"):
             print (path.split("/")[-1] + "_" + re.split('\s+', line)[1] + ":" + re.split('\s+', line)[5] + ":" +re.split('\s+', line)[6] + ":" + re.split('\s+', line)[7])

#PRINTED OUTPUT
'''E3P.B99990001.pdb_138:6.923:0.241:6.116
   E3P.B99990001.pdb_397:15.856:3.506:8.144
   E3P.B99990001.pdb_424:8.558:1.315:6.627
   E3P.B99990001.pdb_774:14.204:-5.490:24.812
   E3P.B99990001.pdb_865:15.545:4.258:10.007
   E3P.B99990001.pdb_929:16.146:-6.081:24.770

   E3P.B99990002.pdb_138:4.499:4.286:8.260
   E3P.B99990002.pdb_397:14.897:3.238:9.338
   E3P.B99990002.pdb_424:5.649:5.914:8.639
   E3P.B99990002.pdb_774:12.114:-6.864:23.897
   E3P.B99990002.pdb_865:15.200:3.910:11.227
   E3P.B99990002.pdb_929:13.649:-6.894:22.589'''

  #MY EXPECTED OUTPUT 
''' MainlIst=[
            ['E3P.B99990001.pdb_138:6.923:0.241:6.116'
            'E3P.B99990001.pdb_397:15.856:3.506:8.144'
            'E3P.B99990001.pdb_424:8.558:1.315:6.627'
            'E3P.B99990001.pdb_774:14.204:-5.490:24.812'
            'E3P.B99990001.pdb_865:15.545:4.258:10.007'
            'E3P.B99990001.pdb_929:16.146:-6.081:24.770']#sublist1

            ['E3P.B99990002.pdb_138:4.499:4.286:8.260'
            'E3P.B99990002.pdb_397:14.897:3.238:9.338'
            'E3P.B99990002.pdb_424:5.649:5.914:8.639'
            'E3P.B99990002.pdb_774:12.114:-6.864:23.897'
            'E3P.B99990002.pdb_929:13.649:-6.894:22.589']#sublist2
                                                            ]'''
#then use thes sublists to make combinations
    for sublists in mainlist:
         Combinatedlist=map(dict,itertools.combinations(sublists.iteritems(), 2))
#since it is sublist there wont be any crossing between sublist1 and  sublist2 while doing combinations

#但如果你能建议我你的方法,我仍然没有得到正确的结果

嗨,伙计们,我通过在每个博客之间包含特定模式并根据相同的模式吐出以制作子列表然后将其组合起来,从而得到了答案

My code:

import fileinput
import os
import re
import itertools
import math
import sys

pdbPathAndName = ['/Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014/ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990001.pdb','/Users/Mahesh/Documents/MAHESH_INTERNSHIP_2014/ENZOWP2/2WC5_090715_170128/E3P/E3P.B99990002.pdb']

ATOM_COORDINATE=[]
for path in pdbPathAndName:
    f = open(path, 'r').readlines()
    f = map(lambda x: x.strip(), f)
    for line in f:
        if "SG" in line and line.endswith("S"):
        ATOM_COORDINATE.append(path.split("/")[-1] + "_" + re.split('\s+', line)[1] + ":" + re.split('\s+', line)[5] + ":" +re.split('\s+', line)[6] + ":" + re.split('\s+', line)[7])
ATOM_COORDINATE.append("foo")

#Making Mainlist with sublists by splitting "foo" pattern
sub = []
for item in ATOM_COORDINATE:
    if item == 'foo':
         ATOM_COORDINATE.append(sub)
         sub = []
    else:
        sub.append(item)
 #Making combinations out of sublists
 COMBINATION=[]
 for sublists in sub:
     for L in range(2, len(sublists), 4):
        for subset in itertools.combinations(sublists, L):
            COMBINATION.append(subset)

OUTPUT:
MainlistWithSublists:
[['E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_397:15.856:3.506:8.144', 'E3P.B99990001.pdb_424:8.558:1.315:6.627', 'E3P.B99990001.pdb_774:14.204:-5.490:24.812', 'E3P.B99990001.pdb_865:15.545:4.258:10.007', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'], ['E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_397:14.897:3.238:9.338', 'E3P.B99990002.pdb_424:5.649:5.914:8.639', 'E3P.B99990002.pdb_774:12.114:-6.864:23.897', 'E3P.B99990002.pdb_865:15.200:3.910:11.227', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589']]
Combination out of sublists:
[('E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_397:15.856:3.506:8.144'), ('E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_424:8.558:1.315:6.627'), ('E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_774:14.204:-5.490:24.812'), ('E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_865:15.545:4.258:10.007'), ('E3P.B99990001.pdb_138:6.923:0.241:6.116', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'), ('E3P.B99990001.pdb_397:15.856:3.506:8.144', 'E3P.B99990001.pdb_424:8.558:1.315:6.627'), ('E3P.B99990001.pdb_397:15.856:3.506:8.144', 'E3P.B99990001.pdb_774:14.204:-5.490:24.812'), ('E3P.B99990001.pdb_397:15.856:3.506:8.144', 'E3P.B99990001.pdb_865:15.545:4.258:10.007'), ('E3P.B99990001.pdb_397:15.856:3.506:8.144', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'), ('E3P.B99990001.pdb_424:8.558:1.315:6.627', 'E3P.B99990001.pdb_774:14.204:-5.490:24.812'), ('E3P.B99990001.pdb_424:8.558:1.315:6.627', 'E3P.B99990001.pdb_865:15.545:4.258:10.007'), ('E3P.B99990001.pdb_424:8.558:1.315:6.627', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'), ('E3P.B99990001.pdb_774:14.204:-5.490:24.812', 'E3P.B99990001.pdb_865:15.545:4.258:10.007'), ('E3P.B99990001.pdb_774:14.204:-5.490:24.812', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'), ('E3P.B99990001.pdb_865:15.545:4.258:10.007', 'E3P.B99990001.pdb_929:16.146:-6.081:24.770'), ('E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_397:14.897:3.238:9.338'), ('E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_424:5.649:5.914:8.639'), ('E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_774:12.114:-6.864:23.897'), ('E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_865:15.200:3.910:11.227'), ('E3P.B99990002.pdb_138:4.499:4.286:8.260', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589'), ('E3P.B99990002.pdb_397:14.897:3.238:9.338', 'E3P.B99990002.pdb_424:5.649:5.914:8.639'), ('E3P.B99990002.pdb_397:14.897:3.238:9.338', 'E3P.B99990002.pdb_774:12.114:-6.864:23.897'), ('E3P.B99990002.pdb_397:14.897:3.238:9.338', 'E3P.B99990002.pdb_865:15.200:3.910:11.227'), ('E3P.B99990002.pdb_397:14.897:3.238:9.338', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589'), ('E3P.B99990002.pdb_424:5.649:5.914:8.639', 'E3P.B99990002.pdb_774:12.114:-6.864:23.897'), ('E3P.B99990002.pdb_424:5.649:5.914:8.639', 'E3P.B99990002.pdb_865:15.200:3.910:11.227'), ('E3P.B99990002.pdb_424:5.649:5.914:8.639', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589'), ('E3P.B99990002.pdb_774:12.114:-6.864:23.897', 'E3P.B99990002.pdb_865:15.200:3.910:11.227'), ('E3P.B99990002.pdb_774:12.114:-6.864:23.897', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589'), ('E3P.B99990002.pdb_865:15.200:3.910:11.227', 'E3P.B99990002.pdb_929:13.649:-6.894:22.589')]

谢谢大家

4

2 回答 2

1

如果可以,只需使用字典:

from collections import defaultdict

s = """B99990001 1 2 3 4
B99990001 1 3 3 4
B99990002 1 2 3 4
B99990002 1 3 3 4
B99990003 1 2 3 4
B99990003 1 3 3 4"""

d = defaultdict(list)
for line in s.split('\n'):
    index, values = line.split(maxsplit=1)
    d[index].append(values)

输出(字典d):

d = {
    'B99990003': ['1 2 3 4', '1 3 3 4'],
    'B99990001': ['1 2 3 4', '1 3 3 4'],
    'B99990002': ['1 2 3 4', '1 3 3 4'],
}

如果您确实需要使用列表列表而不是 dict,则可以将其转换回列表:

l = [['%s %s' % (index, value) for value in d[index]] for index in d]

sorted(l)如果您更喜欢排序版本,可以使用它对其进行排序。

于 2015-07-12T23:20:35.193 回答
1

如果您想获得完全相同的输出:

from collections import OrderedDict

d = OrderedDict()
with open('file.txt') as f:
    for line in f:
        splitted = line.strip().split()
        key = splitted[0]
        if key not in d:
            d[key] = []
        d[key].append(' '.join( splitted[1:] ))

mainList = [ [key + ' ' + item for item in d[key] ] for key in d ]
print mainList

输出:

[['B99990001 1 2 3 4', 'B99990001 1 3 3 4'],
 ['B99990002 1 2 3 4', 'B99990002 1 3 3 4'],
 ['B99990003 1 2 3 4', 'B99990003 1 3 3 4']]
于 2015-07-12T23:24:50.897 回答