0

我已将我的 Java 应用程序从 API 的 V2Beta 版本移植到 V2,我返回的结果似乎不如 V2Beta 版本“准确”。

姓名、地址、邮政编码、年龄等根本不会被取消识别。我使用 V2 API 看到的结果与使用 V2Beta API 得到的结果大不相同。也许我做错了什么?给定输入"Hello Mr. John S. Smith! This is Mr. Jones writing back with my SSN: 911-87-9111",唯一被去识别的是 SSN 数字。我本来希望这些名字也会被取消识别。

我正在使用 Spring 注入凭据等内容,并且有一些 Lombok 注释来简化我的生活,但是大部分代码应该非常简单:

import com.google.api.gax.core.CredentialsProvider;
import com.google.cloud.ProjectName;
import com.google.cloud.dlp.v2.DlpServiceClient;
import com.google.cloud.dlp.v2.DlpServiceSettings;
import com.google.privacy.dlp.v2.CharacterMaskConfig;
import com.google.privacy.dlp.v2.ContentItem;
import com.google.privacy.dlp.v2.DeidentifyConfig;
import com.google.privacy.dlp.v2.DeidentifyContentRequest;
import com.google.privacy.dlp.v2.DeidentifyContentResponse;
import com.google.privacy.dlp.v2.FieldId;
import com.google.privacy.dlp.v2.InfoTypeTransformations;
import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation;
import com.google.privacy.dlp.v2.PrimitiveTransformation;
import com.google.privacy.dlp.v2.Table;
import com.google.privacy.dlp.v2.Table.Row;
import com.google.privacy.dlp.v2.Value;
import lombok.AccessLevel;
import lombok.Setter;
import lombok.SneakyThrows;
import lombok.experimental.FieldDefaults;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

import java.util.Collection;
import java.util.LinkedList;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.springframework.util.CollectionUtils.isEmpty;

@Service("DeIdentifyTest")
@FieldDefaults(level = AccessLevel.PRIVATE)
@Setter
@Slf4j
public class DeIdentifyTest {
    final DlpServiceSettings settings;
    final String projectId;

    @SneakyThrows
    public DeIdentifyTest(CredentialsProvider credentialsProvider, String projectId) {
        this.settings = DlpServiceSettings.newBuilder().setCredentialsProvider(credentialsProvider).build();
        this.projectId = projectId;
    }

    public CompletableFuture<Collection<String>> redact(final Collection<String> input,
                                                            final String mask) {
        return CompletableFuture.supplyAsync(() -> redactContent(input, mask));
    }

    @SneakyThrows
    private Collection<String> redactContent(Collection<String> input, String mask) {
        log.debug("Input: {}", input);

        if (isEmpty(input)) {
            return input;
        }

        CharacterMaskConfig characterMaskConfig =
                CharacterMaskConfig.newBuilder().setMaskingCharacter(mask).build();

        PrimitiveTransformation primitiveTransformation =
                PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build();

        InfoTypeTransformation infoTypeTransformationObject =
                InfoTypeTransformation.newBuilder().setPrimitiveTransformation(primitiveTransformation).build();

        InfoTypeTransformations infoTypeTransformationArray =
                InfoTypeTransformations.newBuilder().addTransformations(infoTypeTransformationObject).build();

        DeidentifyConfig deidentifyConfig =
                DeidentifyConfig.newBuilder().setInfoTypeTransformations(infoTypeTransformationArray).build();

        try (DlpServiceClient dlpClient = DlpServiceClient.create(settings)) {
            // Create the deidentification request object
            DeidentifyContentRequest request =
                    DeidentifyContentRequest.newBuilder()
                            .setParent(ProjectName.of(projectId).toString())
                            .setDeidentifyConfig(deidentifyConfig)
                            .setItem(createContentItemWithTable(input))
                            .build();

            // Execute the deidentification request
            DeidentifyContentResponse response = dlpClient.deidentifyContent(request);
            Table table = response.getItem().getTable();

            return Stream.of(table.getRowsList())
                            .flatMap(rows -> rows.stream())
                            .flatMap(row -> row.getValuesList().stream())
                            .map(val -> val.getStringValue())
                            .collect(Collectors.toCollection(LinkedList::new));
        }
    }

    private ContentItem createContentItemWithTable(Collection<String> input) {
        Table.Builder tableBuilder = Table.newBuilder().addHeaders(FieldId.newBuilder().setName("unused").build());
        Value.Builder valueBuilder = Value.newBuilder();

        Optional<Table.Builder> tableOpt = input.stream()
                .filter(item -> isNotBlank(item))
                .map(item -> valueBuilder.setStringValue(item).build())
                .map(value -> Row.newBuilder().addValues(value).build())
                .map(row -> tableBuilder.addRows(row))
                .reduce((t1, t2) -> t1);

        return ContentItem.newBuilder().setTable(tableOpt.get().build()).build();
    }
}
4

1 回答 1

0

您的示例未能向我们展示您选择检测的 InfoType。V2 中的主要变化是不再有默认的检测器列表。您必须具体说明您要查找的内容。

有关整个列表,请参阅https://cloud.google.com/dlp/docs/infotypes-reference 。

如果我发送这个

 {
 "item": {
  "value": "Hello Mr. John S. Smith! This is Mr. Jones writing back with my SSN: 509-03-2530"
 },
 "inspectConfig": {
  "includeQuote": true,
  "infoTypes": [
   {
    "name": "PERSON_NAME"
   },
   {
    "name": "US_SOCIAL_SECURITY_NUMBER"
   }
  ]
 }
}

我明白了

{
 "result": {
  "findings": [
   {
    "quote": "Mr. John S. Smith",
    "infoType": {
     "name": "PERSON_NAME"
    },
    "likelihood": "LIKELY",
    "location": {
     "byteRange": {
      "start": "6",
      "end": "23"
     },
     "codepointRange": {
      "start": "6",
      "end": "23"
     }
    },
    "createTime": "2018-05-21T16:11:54.449Z"
   },
   {
    "quote": "Jones",
    "infoType": {
     "name": "PERSON_NAME"
    },
    "likelihood": "POSSIBLE",
    "location": {
     "byteRange": {
      "start": "37",
      "end": "42"
     },
     "codepointRange": {
      "start": "37",
      "end": "42"
     }
    },
    "createTime": "2018-05-21T16:11:54.449Z"
   },
   {
    "quote": "509-03-2530",
    "infoType": {
     "name": "US_SOCIAL_SECURITY_NUMBER"
    },
    "likelihood": "LIKELY",
    "location": {
     "byteRange": {
      "start": "69",
      "end": "80"
     },
     "codepointRange": {
      "start": "69",
      "end": "80"
     }
    },
    "createTime": "2018-05-21T16:11:54.425Z"
   }
  ]
 }
}
于 2018-05-21T16:13:44.437 回答