0

我需要一些帮助,只是为了把事情放在上下文中,我是 PowerShell 的新手,我有一个任务,简单来说,需要一个包含超过 200 万条记录(来自 BigFix)和很多列的 csv,打破通过选择特定的列将其转换为多个 CSV,因此下面的代码是我尝试完成此操作,创建的 CSV 将被压缩。问题,只有 20 万条记录,这大约需要 4 小时,所以首先我不知道如果有一种方法可以导入 Csv 一次,而不是每次我必须选择不同的列进行输出时都导入它?除了一开始的复制任务(需要首先)和压缩需要在创建所有 CSV 之后,其余的可以同时运行(我不知道该怎么做)谢谢你的帮助

$filePath = "C:\location2\powerShellTesting\Input\bigFixDataNew.csv"

Copy-Item "\\location1\20191213_BFI_SAMPLE_DATA_csv.csv" -Destination $filePath




$System = "..\Output\System.csv"
$AddRemove = "..\Output\AddRemove.csv"
$GS_PC_BIOS = "..\Output\GS_PC_BIOS.csv"
$GS_PROCESSOR = "..\Output\GS_PROCESSOR.csv"
$GS_LOGICAL_DISK = "..\Output\GS_LOGICAL_DISK.csv"
$GS_X86_PC_MEMORY = "..\Output\GS_X86_PC_MEMORY.csv"
$GS_COMPUTER_SYSTEM = "..\Output\GS_COMPUTER_SYSTEM.csv"
$GS_OPERATING_SYSTEM = "..\Output\GS_OPERATING_SYSTEM.csv"
$GS_WORKSTATION_STATUS = "..\Output\GS_WORKSTATION_STATUS.csv"



$desiredColumnsAddRemove = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Component Name'}; label ='DISPLAYNAME'},
@{ expression = {$_.'Product Version'}; label = 'VERSION'},
@{ expression = {$_.'Publisher Name'}; label = 'PUBLISHER'},
@{ expression = {$_.'Creation'}; label = 'INSTALLDATE'}

$desiredColumnsGS_COMPUTER_SYSTEM = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Server Model'}; label = 'MODEL0'},
@{ expression = {$_.'Partition Virtual Processors'}; label = 'NUMBEROFPROCESSORS0'}

$desiredColumnsGS_OPERATING_SYSTEM = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Operating System'}; label = 'NAME0'},
@{ expression = {$_.'Operating System'}; label = 'CAPTION0'}

$desiredColumnsGS_WORKSTATION_STATUS = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID'},
@{ expression = {$_.'Last Scan Attempt'}; label = 'LASTHWSCAN'}

$desiredColumnsSystem = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'DNS Name'}; label = 'NAME'},
@{ expression = {$_.'User Name'}; label = 'USER_NAME'}

$desiredColumnsGS_X86_PC_MEMORY = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }

$desiredColumnsGS_PROCESSOR = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Processor Brand String'}; label = 'NAME0'}

$desiredColumnsGS_PC_BIOS = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Server Serial Number'}; label = 'SERIALNUMBER0'}

$desiredColumnsGS_LOGICAL_DISK = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }




Import-Csv $filePath | Select $desiredColumnsGS_X86_PC_MEMORY -Unique |
Export-Csv -Path $GS_X86_PC_MEMORY –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_PROCESSOR -Unique |
Export-Csv -Path $GS_PROCESSOR –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_PC_BIOS -Unique |
Export-Csv -Path $GS_PC_BIOS –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_LOGICAL_DISK -Unique |
Export-Csv -Path $GS_LOGICAL_DISK –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_OPERATING_SYSTEM -Unique |
Export-Csv -Path $GS_OPERATING_SYSTEM –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_WORKSTATION_STATUS -Unique |
Export-Csv -Path $GS_WORKSTATION_STATUS –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsSystem -Unique |
Export-Csv -Path $System –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsGS_COMPUTER_SYSTEM -Unique |
Export-Csv -Path $GS_COMPUTER_SYSTEM –NoTypeInformation

Import-Csv $filePath | Select $desiredColumnsAddRemove |
Export-Csv -Path $AddRemove –NoTypeInformation



# Creating the Zip File
$compress = @{
    Path = "..\Output\AddRemove.csv",
    "..\Output\GS_COMPUTER_SYSTEM.csv" ,
    "..\Output\GS_OPERATING_SYSTEM.csv",
    "..\Output\GS_WORKSTATION_STATUS.csv",
    "..\Output\System.csv",
    "..\Output\GS_X86_PC_MEMORY.csv",
    "..\Output\GS_PROCESSOR.csv",
    "..\Output\GS_PC_BIOS.csv",
    "..\Output\GS_LOGICAL_DISK.csv"

    CompressionLevel = "Fastest"
    DestinationPath = "..\Output\BigFix.Zip"
}
Compress-Archive @compress -Force
4

2 回答 2

0

当然,问题在于您正在$filePath 每个输出文件一次读取和解析文件,而理想情况下,它将被读取和解析一次 period。诱惑可能是将结果存储在一个变量中以供重用,但是,正如您所发现的,这使您与.NET 数组的最大大小Import-Csv $filePath不一致。即使不是这种情况,您在脚本运行时仍然会占用大量内存。

$filePath我们可以通过逐条记录将数据写入每个输出文件,而不是一次写入一个输出文件,而只需读取一次。首先,让我们清理定义哪些列输出到哪些文件的代码......

$filePath = "C:\location2\powerShellTesting\Input\bigFixDataNew.csv"

Copy-Item "\\location1\20191213_BFI_SAMPLE_DATA_csv.csv" -Destination $filePath

$outputFileDescriptors = @(
    @{
        Path = "..\Output\System.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'DNS Name'}; label = 'NAME'},
            @{ expression = {$_.'User Name'}; label = 'USER_NAME'}
        )
    },
    @{
        Path = "..\Output\AddRemove.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'Component Name'}; label ='DISPLAYNAME'},
            @{ expression = {$_.'Product Version'}; label = 'VERSION'},
            @{ expression = {$_.'Publisher Name'}; label = 'PUBLISHER'},
            @{ expression = {$_.'Creation'}; label = 'INSTALLDATE'}
        )
    },
    @{
        Path = "..\Output\GS_PC_BIOS.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
            @{ expression = {$_.'Server Serial Number'}; label = 'SERIALNUMBER0'}
        )
    },
    @{
        Path = "..\Output\GS_PROCESSOR.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'Vendor'}; label = 'MANUFACTURER0'},
            @{ expression = {$_.'Processor Brand String'}; label = 'NAME0'}
        )
    },
    @{
        Path = "..\Output\GS_LOGICAL_DISK.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }
        )
    },
    @{
        Path = "..\Output\GS_X86_PC_MEMORY.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }
        )
    },
    @{
        Path = "..\Output\GS_COMPUTER_SYSTEM.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
            @{ expression = {$_.'Server Model'}; label = 'MODEL0'},
            @{ expression = {$_.'Partition Virtual Processors'}; label = 'NUMBEROFPROCESSORS0'}
        )
    },
    @{
        Path = "..\Output\GS_OPERATING_SYSTEM.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
            @{ expression = {$_.'Operating System'}; label = 'NAME0'},
            @{ expression = {$_.'Operating System'}; label = 'CAPTION0'}
        )
    },
    @{
        Path = "..\Output\GS_WORKSTATION_STATUS.csv"
        Columns = @(
            @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID'},
            @{ expression = {$_.'Last Scan Attempt'}; label = 'LASTHWSCAN'}         
        )
    } `
        | ForEach-Object -Process { [PSCustomObject] $_ }
)

$outputFileDescriptors将包含一个实例数组[PSCustomObject],每个实例都具有定义该输出文件的属性PathColumns在这一点上,你可以简单地重写脚本的结尾......

foreach ($outputFileDescriptor in $outputFileDescriptors)
{
    Import-Csv $filePath | Select $outputFileDescriptor.Columns -Unique |
        Export-Csv -Path $outputFileDescriptor.Path -NoTypeInformation
}

# Creating the Zip File
Compress-Archive -Path ($outputFileDescriptors).Path -DestinationPath "..\Output\BigFix.Zip" `
    -CompressionLevel "Fastest" -Force

...但是与您的原始脚本相比,它没有性能改进;我们仍然Import-Csv为每个输出文件调用一次。

相反,让我们像这样修改那个循环......

foreach ($record in Import-Csv $filePath)
{
    foreach ($outputFileDescriptor in $outputFileDescriptors)
    {
        $record | Select $outputFileDescriptor.Columns |
            Export-Csv -Path $outputFileDescriptor.Path -NoTypeInformation -Append
    }
}

现在我们只调用Import-Csv一次,并且对于每个输入记录,我们将适当的列输出到每个文件。最重要的是,我们一次只能有一个对一条记录的变量引用,从而减少了内存使用。

这里还有两个值得注意的变化。首先,我们传递-AppendExport-Csv; 这样就不会为每条记录覆盖完整的文件。其次,我们没有传递-UniqueSelect-Object. 我们可以,但它不会做任何事情,因为在这种情况下Select,在评估唯一性时只考虑单个记录而不是整个输入数据集。

不幸的是,Select ... -Unique它不能用于像这样的流输出场景,因为它会等到它评估完所有输入数据之后再将任何东西传递到管道中(看起来它肯定可以在第一次遇到它时输出一个值,但是显然它没有)。如果您确实有需要过滤掉的冗余输出数据,那么您可以跟踪自己已经看到的数据......但是在内存中收集数据几乎会让我们回到我们开始的地方,除非数量唯一数据仅占整个数据集的一小部分和/或删除冗余数据的需要只是特定输出文件的真正问题。

于 2019-12-24T01:10:23.070 回答
0
$filePath = "C:\location2\powerShellTesting\Input\bigFixDataNew.csv"

Copy-Item "\\location1\20191213_BFI_SAMPLE_DATA_csv.csv" -Destination $filePath




$System = "..\Output\System.csv"
$AddRemove = "..\Output\AddRemove.csv"
$GS_PC_BIOS = "..\Output\GS_PC_BIOS.csv"
$GS_PROCESSOR = "..\Output\GS_PROCESSOR.csv"
$GS_LOGICAL_DISK = "..\Output\GS_LOGICAL_DISK.csv"
$GS_X86_PC_MEMORY = "..\Output\GS_X86_PC_MEMORY.csv"
$GS_COMPUTER_SYSTEM = "..\Output\GS_COMPUTER_SYSTEM.csv"
$GS_OPERATING_SYSTEM = "..\Output\GS_OPERATING_SYSTEM.csv"
$GS_WORKSTATION_STATUS = "..\Output\GS_WORKSTATION_STATUS.csv"



$desiredColumnsAddRemove = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Component Name'}; label ='DISPLAYNAME'},
@{ expression = {$_.'Product Version'}; label = 'VERSION'},
@{ expression = {$_.'Publisher Name'}; label = 'PUBLISHER'},
@{ expression = {$_.'Creation'}; label = 'INSTALLDATE'}

$desiredColumnsGS_COMPUTER_SYSTEM = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Server Model'}; label = 'MODEL0'},
@{ expression = {$_.'Partition Virtual Processors'}; label = 'NUMBEROFPROCESSORS0'}

$desiredColumnsGS_OPERATING_SYSTEM = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Operating System'}; label = 'NAME0'},
@{ expression = {$_.'Operating System'}; label = 'CAPTION0'}

$desiredColumnsGS_WORKSTATION_STATUS = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID'},
@{ expression = {$_.'Last Scan Attempt'}; label = 'LASTHWSCAN'}

$desiredColumnsSystem = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'DNS Name'}; label = 'NAME'},
@{ expression = {$_.'User Name'}; label = 'USER_NAME'}

$desiredColumnsGS_X86_PC_MEMORY = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }

$desiredColumnsGS_PROCESSOR = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Processor Brand String'}; label = 'NAME0'}

$desiredColumnsGS_PC_BIOS = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' },
@{ expression = {$_.'Server Vendor'}; label = 'MANUFACTURER0'},
@{ expression = {$_.'Server Serial Number'}; label = 'SERIALNUMBER0'}

$desiredColumnsGS_LOGICAL_DISK = @{ expression = {$_.'Internal Computer ID'}; label = 'RESOURCEID' }


$csvfile = Import-Csv $filePath


$csvfile | Select $desiredColumnsGS_X86_PC_MEMORY -Unique |
Export-Csv -Path $GS_X86_PC_MEMORY –NoTypeInformation

$csvfile | Select $desiredColumnsGS_PROCESSOR -Unique |
Export-Csv -Path $GS_PROCESSOR –NoTypeInformation

$csvfile | Select $desiredColumnsGS_PC_BIOS -Unique |
Export-Csv -Path $GS_PC_BIOS –NoTypeInformation

$csvfile | Select $desiredColumnsGS_LOGICAL_DISK -Unique |
Export-Csv -Path $GS_LOGICAL_DISK –NoTypeInformation

$csvfile | Select $desiredColumnsGS_OPERATING_SYSTEM -Unique |
Export-Csv -Path $GS_OPERATING_SYSTEM –NoTypeInformation

$csvfile | Select $desiredColumnsGS_WORKSTATION_STATUS -Unique |
Export-Csv -Path $GS_WORKSTATION_STATUS –NoTypeInformation

$csvfile | Select $desiredColumnsSystem -Unique |
Export-Csv -Path $System –NoTypeInformation

$csvfile | Select $desiredColumnsGS_COMPUTER_SYSTEM -Unique |
Export-Csv -Path $GS_COMPUTER_SYSTEM –NoTypeInformation

$csvfile | Select $desiredColumnsAddRemove |
Export-Csv -Path $AddRemove –NoTypeInformation



# Creating the Zip File
$compress = @{
    Path = "..\Output\AddRemove.csv",
    "..\Output\GS_COMPUTER_SYSTEM.csv" ,
    "..\Output\GS_OPERATING_SYSTEM.csv",
    "..\Output\GS_WORKSTATION_STATUS.csv",
    "..\Output\System.csv",
    "..\Output\GS_X86_PC_MEMORY.csv",
    "..\Output\GS_PROCESSOR.csv",
    "..\Output\GS_PC_BIOS.csv",
    "..\Output\GS_LOGICAL_DISK.csv"

    CompressionLevel = "Fastest"
    DestinationPath = "..\Output\BigFix.Zip"
}
Compress-Archive @compress -Force

与其多次导入它,不如将其导入一个变量,然后只操作该变量。

于 2019-12-19T18:59:00.503 回答