感谢@Michael 完成了超过 99% 的工作!
这是 Michael 脚本的 PowerShell 版本,适用于任何人。这也是@Qubei 建议的Windows-1252
代码页/编码来解决É
问题;虽然允许您修改这些编码,以防您的数据通过不同的编码组合损坏。
#based on c# in question: https://stackoverflow.com/questions/10484833/detecting-bad-utf-8-encoding-list-of-bad-characters-to-sniff
function Convert-CorruptCodePageString {
[CmdletBinding(DefaultParameterSetName = 'ByInputText')]
param (
[Parameter(Mandatory = $true, ValueFromPipeline = $true, ParameterSetName = 'ByInputText')]
[string]$InputText
,
[Parameter(Mandatory = $true, ValueFromPipeline = $true, ParameterSetName = 'ByInputObject')]
[PSObject]$InputObject
,
[Parameter(Mandatory = $true, ParameterSetName = 'ByInputObject')]
[string]$Property
,
[Parameter()]
[System.Text.Encoding]$SourceEncoding = [System.Text.Encoding]::GetEncoding('Windows-1252')
,
[Parameter()]
[System.Text.Encoding]$DestinationEncoding = [system.Text.Encoding]::UTF8
,
[Parameter()]
[string]$DodgyChars = 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö'
)
begin {
[string]$InvalidCharRegex = ($DodgyChars.ToCharArray() | %{
[byte[]]$dodgyCharBytes = $DestinationEncoding.GetBytes($_.ToString())
$SourceEncoding.GetString($dodgyCharBytes,0,$dodgyCharBytes.Length).Trim()
}) -join '|'
}
process {
if ($PSCmdlet.ParameterSetName -eq 'ByInputText') {
$InputObject = $null
} else {
$InputText = $InputObject."$Property"
}
[bool]$IsLikelyCorrupted = $InputText -match $InvalidCharRegex
if ($IsLikelyCorrupted) { #only bother to decrupt if we think it's corrupted
[byte[]]$bytes = $SourceEncoding.GetBytes($InputText)
[string]$outputText = $DestinationEncoding.GetString($bytes,0,$bytes.Length)
} else {
[string]$outputText = $InputText
}
[pscustomobject]@{
InputString = $InputText
OutputString = $outputText
InputObject = $InputObject
IsLikelyCorrupted = $IsLikelyCorrupted
}
}
}
演示
#demo of using a simple string without the function (may cause corruption since this doesn't check if the characters being replaced are those likely to have been corrupted / thus is more likely to cause corruption in many strings).
$x = 'Strømmen'
$bytes = [System.Text.Encoding]::GetEncoding('Windows-1252').GetBytes($x)
[system.Text.Encoding]::UTF8.GetString($bytes,0,$bytes.Length)
#demo using the function
$x | Convert-CorruptCodePageString
#demo of checking all records in a table for an issue / reporting those with issues
#amend SQL Query, MyDatabaseInstance, and MyDatabaseCatlogue to point to your DB / query the relevant table
Invoke-SQLQuery -Query 'Select [Description], [RecId] from [DimensionFinancialTag] where [Description] is not null and [Description] > ''''' -DbInstance $MyDatabaseInstance -DbCatalog $MyDatabaseCatalog |
Convert-CorruptCodePageString -Property 'Description' |
?{$_.IsLikelyCorrupted} |
ft @{N='RecordId';E={$_.InputObject.RecId}}, InputString, OutputString
我的演示中使用的附加功能
我不是Invoke-SqlCmd
cmdlet 的粉丝,所以我自己动手。
function Invoke-SQLQuery {
[CmdletBinding(DefaultParameterSetName = 'ByQuery')]
param (
[Parameter(Mandatory = $true)]
[string]$DbInstance
,
[Parameter(Mandatory = $true)]
[string]$DbCatalog
,
[Parameter(Mandatory = $true, ParameterSetName = 'ByQuery')]
[string]$Query
,
[Parameter(Mandatory = $true, ParameterSetName = 'ByPath')]
[string]$Path
,
[Parameter(Mandatory = $false)]
[hashtable]$Params = @{}
,
[Parameter(Mandatory = $false)]
[int]$CommandTimeoutSeconds = 30 #this is the SQL default
,
[Parameter(Mandatory = $false)]
[System.Management.Automation.Credential()]
[System.Management.Automation.PSCredential]$Credential=[System.Management.Automation.PSCredential]::Empty
)
begin {
write-verbose "Call to 'Execute-SQLQuery'"
$connectionString = ("Server={0};Database={1}" -f $DbInstance,$DbCatalog)
if ($Credential -eq [System.Management.Automation.PSCredential]::Empty) {
$connectionString = ("{0};Integrated Security=True" -f $connectionString)
} else {
$connectionString = ("{0};User Id={1};Password={2}" -f $connectionString, $Credential.UserName, $Credential.GetNetworkCredential().Password)
$PSCmdlet.Name
}
$connection = New-Object System.Data.SqlClient.SqlConnection
$connection.ConnectionString = $connectionString
$connection.Open()
}
process {
#create the command & assign the connection
$cmd = new-object -TypeName 'System.Data.SqlClient.SqlCommand'
$cmd.Connection = $connection
#load in our query
switch ($PSCmdlet.ParameterSetName) {
'ByQuery' {$cmd.CommandText = $Query; break;}
'ByPath' {$cmd.CommandText = Get-Content -Path $Path -Raw; break;}
default {throw "ParameterSet $($PSCmdlet.ParameterSetName) not recognised by Invoke-SQLQuery"}
}
#assign parameters as required
#NB: these don't need declare statements in our query; so a query of 'select @demo myDemo' would be sufficient for us to pass in a parameter with name @demo and have it used
#we can also pass in parameters that don't exist; they're simply ignored (sometimes useful if writing generic code that has optional params)
$Params.Keys | %{$cmd.Parameters.AddWithValue("@$_", $Params[$_]) | out-null}
$reader = $cmd.ExecuteReader()
while (-not ($reader.IsClosed)) {
$table = new-object 'System.Data.DataTable'
$table.Load($reader)
write-verbose "TableName: $($table.TableName)" #NB: table names aren't always available
$table | Select-Object -ExcludeProperty RowError, RowState, Table, ItemArray, HasErrors
}
}
end {
$connection.Close()
}
}