0

我需要提取一些 html 标签中存在的 json。如何使用正则表达式从下面的 json 中提取名称(键)值

<div id="gwt_products_display_results" class="gwt_products_display_results">
                <span class="JSON" style="display: none;">
{
    "products": [
        {
            "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
            "listIndex": "0",
            "minimumPrice": 20,
            "categoryOnSale": "false",
            "mfPartNumber": "FF010ATM",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [],
            "partNumber": "b_FF010ATM",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
            "iapAttributeCode": "",
            "beanType": "bundle",
            "name": "Athena Mineral Fabric by the Yard",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "262682",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 20,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        },
        {
            "targetURL": "/athena-slate-fabric-by-the-yard/262683",
            "listIndex": "1",
            "minimumPrice": 20,
            "categoryOnSale": "false",
            "mfPartNumber": "FF010ATS",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [],
            "partNumber": "b_FF010ATS",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
            "iapAttributeCode": "",
            "beanType": "bundle",
            "name": "Athena Slate Fabric by the Yard",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "262683",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 20,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        },
        {
            "targetURL": "/typewriter-keys-giclee/261307",
            "listIndex": "2",
            "minimumPrice": 259,
            "categoryOnSale": "false",
            "mfPartNumber": "WD813",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [
                {
                    "S7 - Overlay 1": "blank"
                }
            ],
            "partNumber": "p_WD813",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/typewriter-keys-giclee/261307",
            "iapAttributeCode": "",
            "beanType": "product",
            "name": "Typewriter Keys Giclee",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "261307",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 259,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        }
    ]
}
</span>
</div>

到目前为止我尝试过的是

<span class="JSON" style="display: none;">([\s\S]+?)<\/span>
4

3 回答 3

4

You can convert it to an array and then get the names using array_keys();

$array = json_decode($json);

$keys = array_keys($array['products']);
于 2013-05-31T12:28:47.253 回答
1

为什么,正则表达式?正如这里的其他人提到的,您可以使用 json_decode 将其解析为数组并进行处理。

但是,如果您坚持使用正则表达式,我会说/"(.+?)":/如果您的 JSON 具有如图所示的确切格式,则将匹配所有键。

更新

所以你是从一个 html 字符串中获取它的。考虑变量是 $html 并且当您坚持使用正则表达式时,使用正则表达式解析 json,如下所示,然后解码。要解析密钥,请使用array_keys()

preg_match('/<span.*?class="JSON".*?>(.+?)<\/span>/s', $html, $matches);

$decoded_array = json_decode($matches[1], true);

print_r($decoded_array);

$keys = array_keys($decoded_array['products'][0]);

print_r($keys);
于 2013-05-31T12:38:29.383 回答
0

您可以使用DOMDocumentDOMXPath来查找span包含 JSON 的元素,然后对其进行json_decode。这是一个粗略的示例,可以帮助您上路:-

<?php
$html = '
<html>
    <head>
        <title>Example</title>
    </head>
    <body>
        <div id="gwt_products_display_results" class="gwt_products_display_results">
            <span class="JSON" style="display: none;">
            {
                "products": [
                    {
                        "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "listIndex": "0",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATM",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATM",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Mineral Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262682",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    },
                    {
                        "targetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "listIndex": "1",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATS",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATS",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Slate Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262683",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    }
                ]
            }
            </span>
        </div>
    </body>    
</html>
';

$document   = DOMDocument::loadHTML($html);
$xpath      = new DOMXPath($document);
$spans      = $xpath->query('//div/span[@class="JSON"]');

foreach ($spans as $span) {
    $catalog = json_decode($span->nodeValue);
    printf("We found %d products.\n", count($catalog->products));
    foreach ($catalog->products as $index => $product) {
        printf("Product #%d - %s.\n", ++$index, $product->name);
    }
}

/*
    We found 2 products.
    Product #1 - Athena Mineral Fabric by the Yard.
    Product #2 - Athena Slate Fabric by the Yard.
*/
于 2013-05-31T13:33:41.593 回答