0

我通过 Clearbit 提供程序使用 API 调用将一些数据拉入本地服务器上的数据库。使用 SQL Server 2017 解析数据一切正常,直到遇到问题。

为了便于理解,我将直接上示例。

这是 JSON 格式的 API 调用输出示例

{
  "id": "384dfe0d-5bba-445e-a390-2d946dc84a12",
  "name": "Honeywell",
  "legalName": "Honeywell International Inc",
  "domain": "honeywell.com",
  "domainAliases": [
    "honeywell.at",
    "honeywell.it",
    "evohome.info",
    "wifithermostat.com",
    "emsaviation.com",
    "mytotalconnect.com",
    "honeywell.nl",
    "honeywell.co.za",
    "honeywell.com.au",
    "honeywell.ca",
    "alliedsignal.com",
    "emsdss.com",
    "primusepic.com",
    "alarmnet-me.com",
    "lebow.com",
    "honeywell.ie",
    "honeywell.jp",
    "honeywell.com.br",
    "trendcontrol.co.uk",
    "honeywellforjaguar.co.uk",
    "aviaso.com",
    "skyforce.co.uk",
    "newenglandinstruments.com",
    "honeywell.fi",
    "alarmnet.com",
    "skyconnect.com",
    "skyforceuk.com",
    "securitex.com",
    "missionready.com",
    "honeywellaerospace.com",
    "formation.com",
    "aclon.com",
    "electrocorp.com",
    "ultrak.com",
    "satcom1.com",
    "hsmpats.com",
    "myaerospace.com",
    "emsglobaltracking.com",
    "fascocontrols.com",
    "honeywellnow.com",
    "bendixbrakes.com",
    "elmwoodsensors.com",
    "ovationselect.com",
    "honeywellbusinessaviation.com",
    "iflyaspire.com",
    "btrinc.com",
    "honeywellspecialtymaterials.com",
    "magneticsensors.com",
    "activeye.com",
    "egarrett.com",
    "novar-eds.com",
    "aviaso.co.uk",
    "chadwick-helmuth.com",
    "datainstruments.com",
    "lebowproducts.com",
    "honeywell-produktkatalog.de",
    "honeywellforjaguar.com",
    "hobbs-corp.com",
    "emsgt.com",
    "honeywellaes.com",
    "honeywellbuildingsolutions.com",
    "satcom1.aero",
    "honeywell-building-solutions.de",
    "lifesafetydistribution.com",
    "godirect.com",
    "garrettbulletin.com",
    "yourhomeexpert.com",
    "aerospacetrading.com",
    "sensorsystems.com",
    "wifithermostat.info",
    "honeywell-fachseminare.de",
    "hobbscorporation.com",
    "kcl.hu",
    "honeywell.sk",
    "esser.info",
    "inertialsensor.com",
    "sensotec.com",
    "notifier.com",
    "honeywellgreer.com",
    "smartact.de",
    "honeywellfire.com",
    "iris-systems.com",
    "honeywell.ru",
    "lxei.com",
    "thermalswitch.com",
    "hightempsolutions.com",
    "aubetech.com",
    "honeywell-haustechnik.de",
    "careersathoneywell.com",
    "garrettbyhoneywell.com",
    "honeywell.in",
    "honeywell.cn",
    "honeywell.com.mx",
    "kcp.com",
    "satamatics.com",
    "myflite.com"
  ],
  "site": {
    "title": "Honeywell",
    "h1": null,
    "metaDescription": " We are blending products with software solutions to link people and businesses to the information they need to be more efficient, safer and connected. ",
    "metaAuthor": null,
    "phoneNumbers": [
      "+1 877-271-8620",
      "+1 800-633-3991",
      "+1 877-841-2840",
      "+1 480-353-3020",
      "+1 973-455-3388",
      "+1 973-204-9621",
      "+32 2 728 20 45",
      "+32 476 20 90 19",
      "+44 7794 007289",
      "+86 21 2219 6509"
    ],
    "emailAddresses": [
      "domains@honeywell.com",
      "HoneywellPrivacy@honeywell.com",
      "rob.ferris@honeywell.com",
      "ilse.schouteden@honeywell.com",
      "chris.martin2@honeywell.com",
      "Anahi.Espinosa@honeywell.com",
      "lydia.lu@honeywell.com",
      "madhavi.jha@Honeywell.com",
      "Steven.Brecken@Honeywell.com",
      "Steve.Brecken@Honeywell.com",
      "Eugene.Tan@Honeywell.com"
    ]
  },
  "category": {
    "sector": "Consumer Discretionary",
    "industryGroup": "Automobiles & Components",
    "industry": "Automotive",
    "subIndustry": "Automotive",
    "sicCode": "3714",
    "naicsCode": null
  },
  "tags": [
    "Automotive",
    "Enterprise",
    "B2B",
    "Electrical"
  ],
  "description": " We are blending products with software solutions to link people and businesses to the information they need to be more efficient, safer and connected. ",
  "foundedYear": 1936,
  "location": "115 Tabor Rd, Morris Plains, NJ 07950, USA",
  "timeZone": "America/New_York",
  "utcOffset": -4,
  "geo": {
    "streetNumber": "115",
    "streetName": "Tabor Road",
    "subPremise": null,
    "city": "Morris Plains",
    "postalCode": "07950",
    "state": "New Jersey",
    "stateCode": "NJ",
    "country": "United States",
    "countryCode": "US",
    "lat": 40.8358456,
    "lng": -74.4771042
  },
  "logo": "https://logo.clearbit.com/honeywell.com",
  "facebook": {
    "handle": "293855263965203",
    "likes": null
  },
  "linkedin": {
    "handle": "company/honeywell"
  },
  "twitter": {
    "handle": "HoneywellNow",
    "id": "257492733",
    "bio": "Please visit us over at @Honeywell.",
    "followers": 2322,
    "following": 1,
    "location": "Morris Plains, NJ",
    "site": "https:",
    "avatar": 
  },
  "crunchbase": {
    "handle": "organization/honeywell"
  },
  "emailProvider": false,
  "type": "public",
  "ticker": "HON",
  "phone": "+1 973-455-2000",
  "metrics": {
    "alexaUsRank": 6045,
    "alexaGlobalRank": 18053,
    "googleRank": null,
    "employees": 51779,
    "employeesRange": "1000+",
    "marketCap": 102920000000,
    "raised": null,
    "annualRevenue": 39302000000,
    "fiscalYearEnd": 12
  },
  "indexedAt": "2017-07-11T23:00:41.115Z",
  "tech": [
    "crazy_egg",
    "google_analytics",
    "google_tag_manager",
    "asp_net",
    "mouseflow",
    "marketo",
    "go_squared",
    "microsoft_exchange_online",
    "outlook",
    "recaptcha"
  ],
  "parent": {
    "domain": null
  },
  "similarDomains": [
    "abb-livingspace.com",
    "alerton.com",
    "gereports.com",
    "honeywellprocess.com",
    "honeywelluk.com",
    "johnsoncontrols.com",
    "jpinstruments.com",
    "lenel.com",
    "maxitrol.com",
    "nucalgon.com",
    "schneider-electric.us",
    "siemens.com"
  ]
}

如果您查看此处的示例,您会看到"domainAliases": [...] 这是我仍然需要解析的 JSON 的一部分。

这是我已经拥有的 SQL 解析查询:

SELECT * 
     , JSON_VALUE(JSONData,'$.name') AS CompanyName
     , JSON_VALUE(JSONData,'$.category.sector') AS CategorySector
     , JSON_VALUE(JSONData, '$.category.industryGroup') AS CategoryIndustryGroup
     , JSON_VALUE(JSONData, '$.category.industry') AS CategoryIndustry
     , JSON_VALUE(JSONData, '$.category.subIndustry') AS CategorySubIndustry
     , JSON_VALUE(JSONData, '$.category.sicCode') AS CategorySicCode
     , JSON_VALUE(JSONData, '$.category.naicsCode') AS CategoryNaicsCode
     , JSON_VALUE(JSONData, '$.metrics.employees') AS EmployeesNumber
     , JSON_VALUE(JSONData, '$.metrics.employeesRange') AS EmployeesRange
     , JSON_VALUE(JSONData, '$.metrics.marketCap') AS MarketCap
     , JSON_VALUE(JSONData, '$.metrics.annualRevenue') AS AnnualRevenue
     , JSON_VALUE(JSONData, '$.similarDomains') AS SimilarDomains
FROM Domains;

我希望将此数据(“domainAliases”)作为上层查询中的数据存储在其他表中(我知道我已经拥有的解析查询只是一个 SELECT 查询,但我也有一个 UPDATE 版本的查询)。

这是一个示例图片,展示了新表中成品的外观,相同的数据库。左列称为Company Name,第二列称为Domain Aliases

左列称为公司名称,第二列称为域别名。

现在JSON数据存储在哪里?我将它存储在一个名为tablename: 的列中,所有这些都存储在一个名为. 数据类型是.JSONDataDomainsDomainbankJSONDatanvarchar(max)

我需要按公司名称对数据进行分组,并且在公司名称旁边应该有别名域,就像图片示例所示。现在请记住,我将针对 10k+JSONData秒运行此查询,并且要创建的新表将非常大,但只要它全部按公司名称和所有别名域分组,它应该是好的。一些JSONDatas 没有以正确的格式返回 API 调用,因为他们要么没有找到数据,要么出现了其他问题,所以如果查询在 the 下找不到任何内容,"domainAliases": [...]或者如果它甚至没有找到,"domainAliases": [...]那么我不不需要公司出现在新表上。

所以简短的回顾一下:让我们做一个新表(我们称之为AliasDomains),找到下面的数据"domainAliases": [...]也拉出公司名称JSON_VALUE(JSONData,'$.name') AS CompanyName,将数据存储在新表中,如帖子中较高的图片示例,然后分组CompanyName

4

1 回答 1

1

因此,从您的帖子中,我并不完全清楚您的问题是什么,但我认为这是如何编写一些 SQL 语句来完成上述操作?

首先,我想说你不应该关心GROUP BY插入中的,GROUP BY当从表中检索数据时。

SELECT话虽如此,您可以很容易地使用Domains表中的 a 和CROSS APPLY OPENJSON语句来完成您想要的,如下所示:

INSERT INTO AliasDomains(CompanyName, DomainAliases)
SELECT JSON_VALUE(JSONData, '$.name'), value 
FROM Domains
CROSS APPLY OPENJSON (JSONData, '$.domainAliases')

编辑:可能应该value在上面的语句中添加它是从返回的,例如它引用了你想要OPENJSON的(在这种情况下)路径的值。domainAliases

希望这可以帮助?!

尼尔斯

于 2017-08-10T03:19:01.487 回答