26

我正在使用适用于 DynamoDB 的 AWS 命令​​行界面

当我们查询一个项目时,我们会得到一个非常详细的 JSON 输出。你会得到这样的东西(它是从 构建的get-item,以便几乎详尽无遗(NULL类型已被省略)aws 命令行帮助

{
    "Count": 1, 
    "Items": [
        {
            "Id": {
                "S": "app1"
            }, 
            "Parameters": {
                "M": {
                    "nfs": {
                        "M": {
                            "IP" : {
                                "S" : "172.16.0.178"
                            }, 
                            "defaultPath": {
                                "S": "/mnt/ebs/"
                            },
                            "key": {
                                "B": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                            },
                            "activated": {
                                "BOOL": true 
                            }
                        }
                    },
                    "ws" : {
                        "M" : {
                            "number" : {
                                "N" : "5"
                            },
                            "values" : {
                                "L" : [
                                    { "S" : "12253456346346"},
                                    { "S" : "23452353463464"},
                                    { "S" : "23523453461232"},
                                    { "S" : "34645745675675"},
                                    { "S" : "46456745757575"}
                                ]
                            }
                        } 
                    }
                }
            },
            "Oldtypes": {
                "typeSS" : {"SS" : ["foo", "bar", "baz"]},
                "typeNS" : {"NS" : ["0", "1", "2", "3", "4", "5"]},
                "typeBS" : {"BS" : ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K","VGVybWluYXRvciA1OiBHZW5lc2lzCg=="]}
            }
        }
    ], 
    "ScannedCount": 1, 
    "ConsumedCapacity": null
}

有什么方法可以让Items零件得到更简单的输出吗?像这样:

{
    "ConsumedCapacity": null,
    "Count": 1,
    "Items": [
        {
            "Id": "app1",
            "Parameters": {
                "nfs": {
                    "IP": "172.16.0.178",
                    "activated": true,
                    "defaultPath": "/mnt/ebs/",
                    "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
                },
                "ws": {
                    "number": 5,
                    "values": ["12253456346346","23452353463464","23523453461232","34645745675675","46456745757575"]
                }
            },
            "Oldtypes": {
                "typeBS": ["VGVybWluYXRvcgo=", "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK", "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=", "VGVybWluYXRvciA0OiBTYWx2YXRpb24K", "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="],
                "typeNS": [0, 1, 2, 3, 4, 5],
                "typeSS": ["foo","bar","baz"]
            }
        }
    ],
    "ScannedCount": 1
}

dynamodb-AWS CLI 1.7.10 文档中没有任何帮助。

我们必须从命令行获取结果。jq如有必要,我愿意使用其他命令行工具,但这样的jq映射对我来说似乎很复杂。


更新 1:jq基于解决方案(在 DanielH 的回答帮助下)

有了jq它很容易,但不是很漂亮,您可以执行以下操作:

$> aws dynamodb query --table-name ConfigCatalog --key-conditions '{ "Id" : {"AttributeValueList": [{"S":"app1"}], "ComparisonOperator": "EQ"}}' | jq -r '.Items[0].Parameters.M."nfs#IP".S'

结果将是:172.16.0.178

jq -r选项为您提供原始输出。


更新 2:jq基于解决方案(在 @jeff-mercado 的帮助下)

jq这是用于解组 DynamoDB 输出的 Jeff Mercado 函数的更新和注释版本。它会给你预期的输出:

$> cat unmarshal_dynamodb.jq
def unmarshal_dynamodb:
  # DynamoDB string type
  (objects | .S)

  # DynamoDB blob type
  // (objects | .B)

  # DynamoDB number type
  // (objects | .N | strings | tonumber)

  # DynamoDB boolean type
  // (objects | .BOOL)

  # DynamoDB map type, recursion on each item
  // (objects | .M | objects | with_entries(.value |= unmarshal_dynamodb))

  # DynamoDB list type, recursion on each item
  // (objects | .L | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type SS, string set
  // (objects | .SS | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type NS, number set
  // (objects | .NS | arrays | map(tonumber))

  # DynamoDB typed list type BS, blob set
  // (objects | .BS | arrays | map(unmarshal_dynamodb))

  # managing others DynamoDB output entries: "Count", "Items", "ScannedCount" and "ConsumedCapcity"
  // (objects | with_entries(.value |= unmarshal_dynamodb))
  // (arrays | map(unmarshal_dynamodb))

  # leaves values
  // .
  ;
unmarshal_dynamodb

如果您将DynamoDB查询输出保存到文件中,比如说ddb-query-result.json,您可以执行以获得所需的结果:

$> jq -f unmarshal_dynamodb.jq ddb-query-result.json
4

6 回答 6

20

您可以使用精心设计的函数递归地解码这些值。看起来键名对应于一种类型:

S -> string
N -> number
M -> map

如果可能,处理您要解码的每个案例,否则将其过滤掉。您可以使用各种类型的过滤器替代运算符来执行此操作。

$ cat input.json
{
  "Count": 1,
  "Items": [
    {
      "Id": { "S": "app1" },
      "Parameters": {
        "M": {
          "nfs#IP": { "S": "192.17.0.13" },
          "maxCount": { "N": "1" },
          "nfs#defaultPath": { "S": "/mnt/ebs/" }
        }
      }
    }
  ],
  "ScannedCount": 1,
  "ConsumedCapacity": null
}
$ cat ~/.jq
def decode_ddb:
    def _sprop($key): select(keys == [$key])[$key];                 # single property objects only
       ((objects | { value: _sprop("S") })                          # string (from string)
    // (objects | { value: _sprop("B") })                           # blob (from string)
    // (objects | { value: _sprop("N") | tonumber })                # number (from string)
    // (objects | { value: _sprop("BOOL") })                        # boolean (from boolean)
    // (objects | { value: _sprop("M") | map_values(decode_ddb) })  # map (from object)
    // (objects | { value: _sprop("L") | map(decode_ddb) })         # list (from encoded array)
    // (objects | { value: _sprop("SS") })                          # string set (from string array)
    // (objects | { value: _sprop("NS") | map(tonumber) })          # number set (from string array)
    // (objects | { value: _sprop("BS") })                          # blob set (from string array)
    // (objects | { value: map_values(decode_ddb) })                # all other non-conforming objects
    // (arrays | { value: map(decode_ddb) })                        # all other non-conforming arrays
    // { value: . }).value                                          # everything else
    ;
$ jq 'decode_ddb' input.json
{
  "Count": 1,
  "Items": [
    {
      "Id": "app1",
      "Parameters": {
        "nfs#IP": "192.17.0.13",
        "maxCount": 1,
        "nfs#defaultPath": "/mnt/ebs/"
      }
    }
  ],
  "ScannedCount": 1,
  "ConsumedCapacity": null
}
于 2015-02-21T03:05:59.897 回答
1

实现帖子目标的另一种方法是使用类似or的node.js扩展并构建命令行工具。node-dynamodbdynamodb-marshalernode

使用包构建node.js命令行应用程序的有趣教程:创建您的第一个 Node.js 命令行应用程序commander


这是一个快速而肮脏的 oneliner,它从标准输入读取一条记录并以简化形式打印出来:

node -e 'console.log(JSON.stringify(require("aws-sdk").DynamoDB.Converter.unmarshall(JSON.parse(require("fs").readFileSync(0, "utf-8")))))'
于 2015-02-23T19:46:28.077 回答
0

据我所知,没有其他输出像您发布的“详细”输出。因此我认为,你不能避免像jqoder这样的中间工具sed

这篇文章中有几个关于转换原始发电机数据的建议:

从 DynamoDB 导出数据

也许您可以结合使用这些脚本之一jqsed

于 2015-02-19T15:02:50.470 回答
0

这是另一种方法。这可能有点残酷,但它显示了基本思想。

def unwanted:    ["B","BOOL","M","S","L","BS","SS"];
def fixpath(p):  [ p[] | select( unwanted[[.]]==[] ) ];
def fixnum(p;v):
    if   p[-2]=="NS" then [p[:-2]+p[-1:],(v|tonumber)]
    elif p[-1]=="N" then [p[:-1], (v|tonumber)]
    else [p,v] end;

reduce (tostream|select(length==2)) as [$p,$v] (
    {}
  ; fixnum(fixpath($p);$v) as [$fp,$fv]      
  | setpath($fp;$fv)
)

在线尝试!

样本运行(假设过滤器输入filter.jq和数据输入data.json

$ jq -M -f filter.jq data.json
{
  "ConsumedCapacity": null,
  "Count": 1,
  "Items": [
    {
      "Id": "app1",
      "Oldtypes": {
        "typeBS": [
          "VGVybWluYXRvcgo=",
          "VGVybWluYXRvciAyOiBKdWRnbWVudCBEYXkK",
          "VGVybWluYXRvciAzOiBSaXNlIG9mIHRoZSBNYWNoaW5lcwo=",
          "VGVybWluYXRvciA0OiBTYWx2YXRpb24K",
          "VGVybWluYXRvciA1OiBHZW5lc2lzCg=="
        ],
        "typeNS": [
          0,
          1,
          2,
          3,
          4,
          5
        ],
        "typeSS": [
          "foo",
          "bar",
          "baz"
        ]
      },
      "Parameters": {
        "nfs": {
          "IP": "172.16.0.178",
          "activated": true,
          "defaultPath": "/mnt/ebs/",
          "key": "dGhpcyB0ZXh0IGlzIGJhc2U2NC1lbmNvZGVk"
        },
        "ws": {
          "number": 5,
          "values": [
            "12253456346346",
            "23452353463464",
            "23523453461232",
            "34645745675675",
            "46456745757575"
          ]
        }
      }
    }
  ],
  "ScannedCount": 1
}
于 2017-10-12T02:15:23.997 回答
0

jq这是可以处理空值的解决方案的更新版本。

$> cat unmarshal_dynamodb.jq
def unmarshal_dynamodb:
  # null
  walk( if type == "object" and .NULL then . |= null else . end ) |

  # DynamoDB string type
  (objects | .S)

  # DynamoDB blob type
  // (objects | .B)

  # DynamoDB number type
  // (objects | .N | strings | tonumber)

  # DynamoDB boolean type
  // (objects | .BOOL)

  # DynamoDB map type, recursion on each item
  // (objects | .M | objects | with_entries(.value |= unmarshal_dynamodb))

  # DynamoDB list type, recursion on each item
  // (objects | .L | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type SS, string set
  // (objects | .SS | arrays | map(unmarshal_dynamodb))

  # DynamoDB typed list type NS, number set
  // (objects | .NS | arrays | map(tonumber))

  # DynamoDB typed list type BS, blob set
  // (objects | .BS | arrays | map(unmarshal_dynamodb))

  # managing others DynamoDB output entries: "Count", "Items", "ScannedCount" and "ConsumedCapcity"
  // (objects | with_entries(.value |= unmarshal_dynamodb))
  // (arrays | map(unmarshal_dynamodb))

  # leaves values
  // .
  ;
unmarshal_dynamodb
$> jq -f unmarshal_dynamodb.jq ddb-query-result.json

感谢 @jeff-mercado 和 @herve 的原始版本。

于 2021-12-14T00:23:16.050 回答
0

这是节点中的一个脚本来执行此操作。

我命名了文件reformat.js,但您可以随意命名

'use strict';

/**
 * This script will parse the AWS dynamo CLI JSON response into JS.
 * This parses out the type keys in the objects.
 */

const fs = require('fs');

const rawData = fs.readFileSync('response.json'); // Import the raw response from the dynamoDB CLI query
const response = JSON.parse(rawData); // Parse to JS to make it easier to work with.

function shallowFormatData(data){
  // Loop through the object and replace the Type key with the value.
  for(const key in data){
    const innerRawObject = data[key]
    const innerKeys = Object.keys(innerRawObject)
    innerKeys.forEach(innerKey => {
      const innerFormattedObject = innerRawObject[innerKey]
      if(typeof innerFormattedObject == 'object'){
        data[key] = shallowFormatData(innerFormattedObject) // Recursively call formatData if there are nested objects
      }else{
        // Null items come back with a type of "NULL" and value of true. we want to set the value to null if the type is "NULL"
        data[key] = innerKey == 'NULL' ? null : innerFormattedObject
      }
    })
  }
  return data
}

// this only gets the Items and not the meta data.
const result = response.Items.map(item => {
  return shallowFormatData(item)
})

console.dir(result, {'maxArrayLength': null}); // There is a default limit on how big a console.log can be, this removes that limit.

步骤 1) 通过 CLI 运行 dynamoDB 查询并将其保存到 JSON 文件。要保存来自 CLI 的响应,只需添加> somefile.json. 为方便起见,我将其保存在与重新格式化文件相同的目录中

// Example: Run in CLI

$ aws dynamodb query --table-name stage_requests-service_FoxEvents \
 --key-condition-expression "PK = :v1" \
 --expression-attribute-values file://expression-attributes.json > response.json

表达式属性.json

{
  ":v1": {"S": "SOMEVAL"}
}

如果您需要有关我如何查询 DynamoDB 的更多信息,请查看文档https://docs.aws.amazon.com/cli/latest/reference/dynamodb/query.html#examples中的这些示例

现在您有了一个 JSON 数据文件,您需要重新格式化从终端运行 format.js 脚本

第2步)

// Run this in your terminal
$ node reformat.js > formatted.js 

如果你想要一个 JSON 对象输出,你应该有一个干净的 JS 对象输出,只需JSON.stringify(result)console.dir脚本末尾放一个

于 2020-05-19T23:27:38.830 回答