0

我在 Big Query 中有数据,我想在 Spark 集群中运行分析。根据文档,如果我实例化一个 Spark 集群,它应该带有一个 Big Query 连接器。我正在寻找任何示例代码来执行此操作,在pyspark中找到了一个。找不到任何 c# 示例。还找到了一些关于 DataProc APIs nuget 包中的函数的文档。

正在寻找使用 c# 在 Google Cloud 中启动 Spark 集群的示例。

4

1 回答 1

2

安装 Google.Apis.Dataproc.v1 版本 1.10.0.40(或更高版本)后:

以下是用于在 C# 中创建 Dataproc 集群的快速示例控制台应用:

using Google.Apis.Auth.OAuth2; 
using Google.Apis.Services;
using Google.Apis.Dataproc.v1; 
using Google.Apis.Dataproc.v1.Data;

using System; 
using System.Threading;

namespace DataprocSample {
    class Program
    {
        static void Main(string[] args)
        {
            string project = "YOUR PROJECT HERE";
            string dataprocGlobalRegion = "global";
            string zone = "us-east1-b";
            string machineType = "n1-standard-4";
            string clusterName = "sample-cluster";
            int numWorkers = 2;
            // See the docs for Application Default Credentials:
            // https://developers.google.com/identity/protocols/application-default-credentials
            // In general, a previous 'gcloud auth login' will suffice if running as yourself.
            // If running from a VM, ensure the VM was started such that the service account has
            // the CLOUD_PLATFORM scope. 
            GoogleCredential credential = GoogleCredential.GetApplicationDefaultAsync().Result;
            if (credential.IsCreateScopedRequired)
            {
                credential = credential.CreateScoped(new[] { DataprocService.Scope.CloudPlatform });
            }

            DataprocService service = new DataprocService(
                new BaseClientService.Initializer()
                {
                    HttpClientInitializer = credential,
                    ApplicationName = "Dataproc Sample",
                });

            // Create a new cluster:
            Cluster newCluster = new Cluster
            {
                ClusterName = clusterName,
                Config = new ClusterConfig
                {
                    GceClusterConfig = new GceClusterConfig
                    {
                        ZoneUri = String.Format(
                            "https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}",
                            project, zone),
                    },
                    MasterConfig = new InstanceGroupConfig
                    {
                        NumInstances = 1,
                        MachineTypeUri = String.Format(
                            "https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}/machineTypes/{2}",
                            project, zone, machineType),
                    },
                    WorkerConfig = new InstanceGroupConfig
                    {
                        NumInstances = numWorkers,
                        MachineTypeUri = String.Format(
                            "https://www.googleapis.com/compute/v1/projects/{0}/zones/{1}/machineTypes/{2}",
                            project, zone, machineType),
                    },
                },
            };
            Operation createOperation = 
                service.Projects.Regions.Clusters.Create(newCluster, project, dataprocGlobalRegion).Execute();
            // Poll the operation:
            while (!IsDone(createOperation))
            {
                Console.WriteLine("Polling operation {0}", createOperation.Name);
                createOperation =
                    service.Projects.Regions.Operations.Get(createOperation.Name).Execute();
                Thread.Sleep(1000);
            }

            Console.WriteLine("Done creating cluster {0}", newCluster.ClusterName);
        }
        static bool IsDone(Operation op)
        {
            return op.Done ?? false;
        }
    }
 }
于 2016-02-29T21:51:32.727 回答