我正在尝试将以下 dict 转换为数据框:

city_data = {
    'San Francisco': {'x': [1, 2, 3], 'y': [4, 1, 2]},
    'Montreal': {'x': [1, 2, 3], 'y': [2, 4, 5]},
    'New York City': {'x': [1, 2, 3], 'y': [2, 2, 7]},
    'Cincinnati': {'x': [1, 2, 3], 'y': [1, 0, 2]},
    'Toronto': {'x': [1, 2, 3], 'y': [4, 7, 3]},
    'Ottawa': {'x': [1, 2, 3], 'y': [2, 3, 3]}


city            |  x  |  y
San Francisco   |  1  |  4
San Francisco   |  2  |  1
San Francisco   |  3  |  2

使用我在此处找到的解决方案将带有列表的嵌套字典展开到我尝试的 pandas DataFrame 中:

data = city_data

def unroll(data):
    if isinstance(data, dict):
        for key, value in data.items():
            # Recursively unroll the next level and prepend the key to each row.
            for row in unroll(value):
                yield [key] + row
    if isinstance(data, list):
        # This is the bottom of the structure (defines exactly one row).
        yield data

df = pd.DataFrame(list(unroll(nested_dict)))
df.rename(columns=lambda i: 'col{}'.format(i+1))




df = pd.DataFrame.from_dict(city_data, orient='index')
unnesting(df, ['x', 'y'])

               x  y 
Cincinnati     1  1 
Cincinnati     2  0 
Cincinnati     3  2 
Montreal       1  2 
Montreal       2  4 
Montreal       3  5 
New York City  1  2 
New York City  2  2 
New York City  3  7 
Ottawa         1  2 
Ottawa         2  3 
Ottawa         3  3 
San Francisco  1  4 
San Francisco  2  1 
San Francisco  3  2 
Toronto        1  4 
Toronto        2  7 
Toronto        3  3 


def unnesting(df, explode):
    idx = df.index.repeat(df[explode[0]].str.len())
    df1 = pd.concat([
        pd.DataFrame({x: np.concatenate(df[x].values)}) for x in explode], axis=1)
    df1.index = idx

    return df1.join(df.drop(explode, 1), how='left')
于 2019-06-07T18:36:48.420 回答

在@roganjosh 的提示的帮助下,我能够回答我的问题。这是我最终使用的解决方案:

city_data = {
        'San Francisco': {'x': [1, 2, 3], 'y': [4, 1, 2]},
        'Montreal': {'x': [1, 2, 3], 'y': [2, 4, 5]},
        'New York City': {'x': [1, 2, 3], 'y': [2, 2, 7]},
        'Cincinnati': {'x': [1, 2, 3], 'y': [1, 0, 2]},
        'Toronto': {'x': [1, 2, 3], 'y': [4, 7, 3]},
        'Ottawa': {'x': [1, 2, 3], 'y': [2, 3, 3]}

## Prepare my data
data = []
for city in city_data:
    data.append({'x': city_data[city]['x'], 'y': city_data[city]['y'],
             'city': city})

### use function from linked SO question

def explode(df, lst_cols, fill_value='', preserve_index=False):
    # make sure `lst_cols` is list-alike
    if (lst_cols is not None
        and len(lst_cols) > 0
        and not isinstance(lst_cols, (list, tuple, np.ndarray, pd.Series))):
        lst_cols = [lst_cols]
    # all columns except `lst_cols`
    idx_cols = df.columns.difference(lst_cols)
    # calculate lengths of lists
    lens = df[lst_cols[0]].str.len()
    # preserve original index values    
    idx = np.repeat(df.index.values, lens)
    # create "exploded" DF
    res = (pd.DataFrame({
                col:np.repeat(df[col].values, lens)
                for col in idx_cols},
             .assign(**{col:np.concatenate(df.loc[lens>0, col].values)
                            for col in lst_cols}))
    # append those rows that have empty lists
    if (lens == 0).any():
        # at least one list in cells is empty
        res = (res.append(df.loc[lens==0, idx_cols], sort=False)
    # revert the original index order
    res = res.sort_index()
    # reset index if requested
    if not preserve_index:        
        res = res.reset_index(drop=True)
    return res

df = pd.DataFrame(data)
df = explode(df, ['x','y'], fill_value='')


    city            x   y
0   San Francisco   1   4
1   San Francisco   2   1
2   San Francisco   3   2
3   Montreal        1   2
4   Montreal        2   4


于 2019-06-07T18:37:52.313 回答


df = pd.DataFrame(list(unroll(city_data)))
new_df = (df.set_index([0,1])
           .apply(lambda x: x.reset_index(level=0,drop=True).T)

new_df.reset_index(level=1, drop=True)


1              x  y
Cincinnati     1  1
Cincinnati     2  0
Cincinnati     3  2
Montreal       1  2
Montreal       2  4
Montreal       3  5
New York City  1  2
New York City  2  2
New York City  3  7
Ottawa         1  2
Ottawa         2  3
Ottawa         3  3
San Francisco  1  4
San Francisco  2  1
San Francisco  3  2
Toronto        1  4
Toronto        2  7
Toronto        3  3
于 2019-06-07T18:39:59.400 回答


out = pd.concat((pd.DataFrame(data) for data in city_data.values()), 
                 keys=city_data.keys(), names = ["city", ""], sort=False)
out.reset_index(level=0, inplace=True)
out.reset_index(drop=True, inplace=True)


             city  x  y
0   San Francisco  1  4
1   San Francisco  2  1
2   San Francisco  3  2
3        Montreal  1  2
4        Montreal  2  4
5        Montreal  3  5
6   New York City  1  2
7   New York City  2  2
8   New York City  3  7
9      Cincinnati  1  1
10     Cincinnati  2  0
11     Cincinnati  3  2
12        Toronto  1  4
13        Toronto  2  7
14        Toronto  3  3
15         Ottawa  1  2
16         Ottawa  2  3
17         Ottawa  3  3
于 2019-06-07T20:35:37.693 回答