0

我正在处理一个 JSON 结构,其输出如下:

{
    "time": "2015-10-20T20:15:00.847Z",
    "name": "meta.response.ean",
    "level": "info",
    "data1": {
        "HotelListResponse": {
            "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
            "numberOfRoomsRequested": 1,
            "moreResultsAvailable": true,
            "cacheKey": "-705f6d43:15086db3fd1:-4c58",
            "cacheLocation": "10.178.144.36:7300",
            "HotelList": {
                "@size": 2,
                "@activePropertyCount": 2,
                "HotelSummary": [{
                        "hotelId": 132684,
                        "city": "Seattle",
                        "highRate": 159.0,
                        "lowRate": 159.0,
                        "rateCurrencyCode": "USD",
                        "RoomRateDetailsList": {
                            "RoomRateDetails": {
                                "roomTypeCode": 10351,
                                "rateCode": 10351,
                                "roomDescription": "Standard Room, 1 Queen Bed",
                                "RateInfos": {
                                    "RateInfo": {
                                        "@promo": false,
                                        "ChargeableRateInfo": {
                                            "@averageBaseRate": 159.0,
                                            "@averageRate": 159.0,
                                            "@currencyCode": "USD",
                                            "@nightlyRateTotal": 159.0,
                                            "@surchargeTotal": 26.81,
                                            "@total": 185.81
                                        }
                                    }
                                }
                            }
                        }
                    }, {
                        "hotelId": 263664,
                        "city": "Las Vegas",
                        "highRate": 135.0,
                        "lowRate": 94.5,
                        "rateCurrencyCode": "USD",
                        "RoomRateDetailsList": {
                            "RoomRateDetails": {
                                "roomTypeCode": 373685,
                                "rateCode": 1238953,
                                "roomDescription": "Standard Room, 1 King Bed",
                                "RateInfos": {
                                    "RateInfo": {
                                        "@promo": true,
                                        "ChargeableRateInfo": {
                                            "@averageBaseRate": 135.0,
                                            "@averageRate": 94.5,
                                            "@currencyCode": "USD",
                                            "@nightlyRateTotal": 94.5,
                                            "@surchargeTotal": 9.45,
                                            "@total": 103.95
                                        }
                                    }
                                }
                            }
                        }
                    }
                ]
            }
        }
    },
    "context": {
        "X-Request-Id": "dca47992-b6cc-4b87-956c-90523c0bf3bb",
        "host": "getaways-search-app2",
        "thread": "http-nio-80-exec-12"
    }
}

如您所见,这些是嵌套数组。关于递归地展平这些有很多讨论。我无法在HotelSummary. 有任何想法吗?

  • 我想将部分 JSON 展平为以下形式:
{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":132684,
   "city":"Seattle",
   "highRate":159.0,
   "lowRate":159.0,
   "rateCurrencyCode":"USD",
   "roomTypeCode":10351,
   "rateCode":10351,
   "roomDescription":"Standard Room, 1 Queen Bed",
   "promo":"false",
   "averageBaseRate":159.0,
   "averageRate":159.0,
   "currencyCode":"USD",
   "nightlyRateTotal":159.0,
   "surchargeTotal":26.81,
   "total":185.81
}


{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":263664,
   "city":"Las Vegas",
   "highRate":135.0,
   "lowRate":94.5,
   "rateCurrencyCode":"USD",
   "roomTypeCode":373685,
   "rateCode":1238953,
   "roomDescription":"Standard Room, 1 King Bed",
   "promo":"true",
   "averageBaseRate":135.0,
   "averageRate":94.5,
   "currencyCode":"USD",
   "nightlyRateTotal":94.5,
   "surchargeTotal":9.45,
   "total":103.95
}
  • 我试过使用flattenDict类。我没有得到所需格式的输出。
def flattenDict(d, result=None):
    if result is None:
        result = {}
    for key in d:
        value = d[key]
        if isinstance(value, dict):
            value1 = {}
            for keyIn in value:
                value1[".".join([key,keyIn])]=value[keyIn]
            flattenDict(value1, result)
        elif isinstance(value, (list, tuple)):   
            for indexB, element in enumerate(value):
                if isinstance(element, dict):
                    value1 = {}
                    index = 0
                    for keyIn in element:
                        newkey = ".".join([key,keyIn])        
                        value1[".".join([key,keyIn])]=value[indexB][keyIn]
                        index += 1
                    for keyA in value1:
                        flattenDict(value1, result)   
        else:
            result[key]=value
    return result
4

1 回答 1

1

使用pandas& json_normalize

  • record_path是主要key展平的参数
  • meta是附加keys到展平的参数
  • json_normalize创建包含所有keys到所需key的列名,因此长列名(例如RoomRateDetailsList.RoomRateDetails.roomTypeCode
    • 长列名需要重命名为较短的版本
    • 推导dict式用于创建rename dict.
  • 以下代码利用了pathlib
    • .open是一种方法pathlib
    • 也适用于非 Windows 路径
import pandas as pd
import json
from pathlib import Path


# path to file
p = Path(r'c:\some_path_to_file\test.json')

# read json file
with p.open('r', encoding='utf-8') as f:
    data = json.loads(f.read())

# create dataframe
df = pd.json_normalize(data,
                    record_path=['data1', 'HotelListResponse', 'HotelList', 'HotelSummary'],
                    meta=[['data1', 'HotelListResponse', 'customerSessionId'],
                          ['data1', 'HotelListResponse', 'numberOfRoomsRequested'],
                          ['data1', 'HotelListResponse', 'moreResultsAvailable'],
                          ['data1', 'HotelListResponse', 'cacheKey'],
                          ['data1', 'HotelListResponse', 'cacheLocation'],
                          ['data1', 'HotelListResponse', 'HotelList', '@size'],
                          ['data1', 'HotelListResponse', 'HotelList', '@activePropertyCount']])

# rename columns:
rename = {value: value.split('.')[-1].replace('@', '') for value in df.columns}
df.rename(columns=rename, inplace=True)

# dataframe view
 hotelId       city  highRate  lowRate rateCurrencyCode  roomTypeCode  rateCode             roomDescription  promo  averageBaseRate  averageRate currencyCode  nightlyRateTotal  surchargeTotal   total                     customerSessionId numberOfRoomsRequested moreResultsAvailable                     cacheKey       cacheLocation size activePropertyCount
  132684    Seattle     159.0    159.0              USD         10351     10351  Standard Room, 1 Queen Bed  False            159.0        159.0          USD             159.0           26.81  185.81  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2
  263664  Las Vegas     135.0     94.5              USD        373685   1238953   Standard Room, 1 King Bed   True            135.0         94.5          USD              94.5            9.45  103.95  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2

# save to JSON
df.to_json('out.json', orient='records')

最终 JSON 输出:

[{
        "hotelId": 132684,
        "city": "Seattle",
        "highRate": 159.0,
        "lowRate": 159.0,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 10351,
        "rateCode": 10351,
        "roomDescription": "Standard Room, 1 Queen Bed",
        "promo": false,
        "averageBaseRate": 159.0,
        "averageRate": 159.0,
        "currencyCode": "USD",
        "nightlyRateTotal": 159.0,
        "surchargeTotal": 26.81,
        "total": 185.81,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }, {
        "hotelId": 263664,
        "city": "Las Vegas",
        "highRate": 135.0,
        "lowRate": 94.5,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 373685,
        "rateCode": 1238953,
        "roomDescription": "Standard Room, 1 King Bed",
        "promo": true,
        "averageBaseRate": 135.0,
        "averageRate": 94.5,
        "currencyCode": "USD",
        "nightlyRateTotal": 94.5,
        "surchargeTotal": 9.45,
        "total": 103.95,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }
]
于 2019-10-30T18:23:17.513 回答