1

我有以下输出 json,我尝试使用 pandas 将其转换为数据框json_normalize。我能够达到跑步者的水平json_normalize(data, ['runners']),但我要达到前水平。

[{
    u 'status' : u 'OPEN',
    u 'isMarketDataDelayed' : False,
    u 'numberOfRunners' : 9,
    u 'complete' : True,
    u 'bspReconciled' : False,
    u 'runnersVoidable' : False,
    u 'betDelay' : 0,
    u 'marketId' : u '1.123264244',
    u 'crossMatching' : False,
    u 'totalMatched' : 4.22,
    u 'version' : 1241856317,
    u 'lastMatchTime' : u '2016-02-25T10:32:25.704Z',
    u 'numberOfWinners' : 1,
    u 'inplay' : False,
    u 'numberOfActiveRunners' : 9,
    u 'totalAvailable' : 39.26,
    u 'runners' : [{
            u 'status' : u 'ACTIVE',
            u 'handicap' : 0.0,
            u 'selectionId' : 10861647,
            u 'totalMatched' : 0.0,
            u 'adjustmentFactor' : 16.631,
            u 'ex' : {
                u 'availableToBack' : [{
                        u 'price' : 1.02,
                        u 'size' : 2.15
                    }
                ],
                u 'availableToLay' : [],
                u 'tradedVolume' : []
            }
        }, {
            u 'status' : u 'ACTIVE',
            u 'handicap' : 0.0,
            u 'selectionId' : 10861648,
            u 'totalMatched' : 0.0,
            u 'adjustmentFactor' : 13.237,
            u 'ex' : {
                u 'availableToBack' : [{
                        u 'price' : 1.01,
                        u 'size' : 7.11
                    }
                ],
                u 'availableToLay' : [],
                u 'tradedVolume' : []
            }
        },

使用其他数据,我很容易做到这一点json_normalize(data, ['runners','ex']),但如果我在这种情况下这样做,我会得到

                   0
0    availableToBack
1     availableToLay
2       tradedVolume
3    availableToBack
4     availableToLay
5       tradedVolume
6    availableToBack
7     availableToLay
8       tradedVolume
9    availableToBack
10    availableToLay
11      tradedVolume
12   availableToBack
13    availableToLay
14      tradedVolume
15   availableToBack
16    availableToLay
17      tradedVolume
18   availableToBack
19    availableToLay
20      tradedVolume
21   availableToBack
22    availableToLay
23      tradedVolume
24   availableToBack
25    availableToLay
26      tradedVolume
27   availableToBack
28    availableToLay
29      tradedVolume
..               ...

它应该有列'availableToBack', 'availableToLay','tradedVolume'

4

1 回答 1

0
  • data是一个list嵌套dictslists
  • 'ex.availableToBack'是其中一个listdicts归一化为列'price''size'

规范化所有data

import pandas as pd

# if you want all of data, load data into a dataframe
df = pd.json_normalize(data)

# runners is a list that needs to be exploded
df = df.explode('runners').reset_index(drop=True)

# runners is a column of dicts that need to be normalized
runners = pd.json_normalize(df.pop('runners'))

# there are a number of columns that are lists that must be exploded
runners = runners.apply(pd.Series.explode)

# flatten ex.availableToBack
runners = runners.join(pd.DataFrame(runners.pop('ex.availableToBack').values.tolist()))

# add a prefix to all the runners column names
runners.columns = [f'runners_{v}' for v in runners.columns]

# join df and runners
df = df.join(runners)

# extract the ex columns
ex_cols = df.iloc[:, -4:].copy()

# display(df)
   betDelay  bspReconciled  complete  crossMatching  inplay  isMarketDataDelayed             lastMatchTime     marketId  numberOfActiveRunners  numberOfRunners  numberOfWinners  runnersVoidable status  totalAvailable  totalMatched     version  runners_adjustmentFactor  runners_handicap  runners_selectionId runners_status  runners_totalMatched runners_ex.availableToLay runners_ex.tradedVolume  runners_price  runners_size
0         0          False      True          False   False                False  2016-02-25T10:32:25.704Z  1.123264244                      9                9                1            False   OPEN           39.26          4.22  1241856317                    16.631               0.0             10861647         ACTIVE                   0.0                       NaN                     NaN           1.02          2.15
1         0          False      True          False   False                False  2016-02-25T10:32:25.704Z  1.123264244                      9                9                1            False   OPEN           39.26          4.22  1241856317                    13.237               0.0             10861648         ACTIVE                   0.0                       NaN                     NaN           1.01          7.11

# display(ex_cols)
  runners_ex.availableToLay runners_ex.tradedVolume  runners_price  runners_size
0                       NaN                     NaN           1.02          2.15
1                       NaN                     NaN           1.01          7.11

keys仅在规范化'runners'

# normalize the runners key to get ex
runners = pd.json_normalize(data, record_path=['runners'])

# there are a number of columns that are lists that must be exploded
runners = runners.apply(pd.Series.explode).reset_index(drop=True)

# flatten ex.availableToBack
runners = runners.join(pd.DataFrame(runners.pop('ex.availableToBack').values.tolist()))

# extract the ex columns
ex_cols = runners.iloc[:, -4:].copy()

# display(runners)
   adjustmentFactor  handicap  selectionId  status  totalMatched ex.availableToLay ex.tradedVolume  price  size
0            16.631       0.0     10861647  ACTIVE           0.0               NaN             NaN   1.02  2.15
1            13.237       0.0     10861648  ACTIVE           0.0               NaN             NaN   1.01  7.11

# display(ex_cols)
  ex.availableToLay ex.tradedVolume  price  size
0               NaN             NaN   1.02  2.15
1               NaN             NaN   1.01  7.11

data

data =\
[{'betDelay': 0,
  'bspReconciled': False,
  'complete': True,
  'crossMatching': False,
  'inplay': False,
  'isMarketDataDelayed': False,
  'lastMatchTime': '2016-02-25T10:32:25.704Z',
  'marketId': '1.123264244',
  'numberOfActiveRunners': 9,
  'numberOfRunners': 9,
  'numberOfWinners': 1,
  'runners': [{'adjustmentFactor': 16.631,
               'ex': {'availableToBack': [{'price': 1.02, 'size': 2.15}],
                      'availableToLay': [],
                      'tradedVolume': []},
               'handicap': 0.0,
               'selectionId': 10861647,
               'status': 'ACTIVE',
               'totalMatched': 0.0},
              {'adjustmentFactor': 13.237,
               'ex': {'availableToBack': [{'price': 1.01, 'size': 7.11}],
                      'availableToLay': [],
                      'tradedVolume': []},
               'handicap': 0.0,
               'selectionId': 10861648,
               'status': 'ACTIVE',
               'totalMatched': 0.0}],
  'runnersVoidable': False,
  'status': 'OPEN',
  'totalAvailable': 39.26,
  'totalMatched': 4.22,
  'version': 1241856317}]
于 2021-03-09T15:40:13.043 回答