我正在尝试移植一些将数据发送到 Universe 数据库的 Delphi 代码。为了使 DB 中的文本清晰可见,我们需要在 OEM 中对其进行编码。
在 Delphi 中是这样完成的:
procedure TForm1.GenerarTablasNLS;
var
i: integer;
begin
for i := 0 to 255 do
begin
TablaUV_NLS[i] := AnsiChar(i);
TablaNLS_UV[i] := AnsiChar(i);
end;
// Nulo final
TablaUV_NLS[256] := #0;
TablaNLS_UV[256] := #0;
OemToCharA(@TablaUV_NLS[1], @TablaUV_NLS[1]);
CharToOemA(@TablaNLS_UV[1], @TablaNLS_UV[1]);
然后我们像这样简单地翻译我们的文本
function StringToUniverse(const Value: string): AnsiString;
var
p: PChar;
q: PAnsiChar;
begin
SetLength(Result, Length(Value));
if Value = '' then Exit;
p := Pointer(Value);
q := Pointer(Result);
while p^ <> #0 do
begin
q^ := TablaNLS_UV[Ord(AnsiChar(p^))];
Inc(p);
Inc(q);
end;
end;
我使用存储每个字符翻译的字典在 Python 中遵循相同的逻辑
class StringUniverseDict(dict):
def __missing__(self, key):
return key
TablaString2UV = StringUniverseDict()
def rellenar_tablas_codificacion():
TablaString2UV['á'] = ' ' # chr(225) = chr(160)
TablaString2UV['é'] = '‚' # chr(233) = chr(130)
TablaString2UV['í'] = '¡' # chr(237) = chr(161)
TablaString2UV['ó'] = '¢' # chr(243) = chr(162)
TablaString2UV['ú'] = '£' # chr(250) = chr(163)
TablaString2UV['ñ'] = '¤' # chr(241) = chr(164)
TablaString2UV['ç'] = '‡' # chr(231) = chr(135)
TablaString2UV['Á'] = 'µ' # chr(193) = chr(181)
TablaString2UV['É'] = chr(144) # chr(201) = chr(144)
TablaString2UV['Í'] = 'Ö' # chr(205) = chr(214)
TablaString2UV['Ó'] = 'à' # chr(211) = chr(224)
TablaString2UV['Ñ'] = '¥' # chr(209) = chr(165)
TablaString2UV['Ç'] = '€' # chr(199) = chr(128)
TablaString2UV['ü'] = chr(129) # chr(252) = chr(129)
TablaString2UV[chr(129)] = '_' # chr(129) = chr(095)
TablaString2UV[chr(141)] = '_' # chr(141) = chr(095)
TablaString2UV['•'] = chr(007) # chr(149) = chr(007)
TablaString2UV['Å'] = chr(143) # chr(197) = chr(143)
TablaString2UV['Ø'] = chr(157) # chr(216) = chr(157)
TablaString2UV['ì'] = chr(141) # chr(236) = chr(141)
只要我使用可打印字符进行翻译,它就可以“正常”工作。例如,字符串
"á é í ó ú ñ ç Á Í Ó Ú Ñ Ç"
在 Delphi 中被翻译成以下字节:
0xa0 0x20 0x82 0x20 0xa1 0x20 0xa2 0x20 0xa3 0x20 0xa4 0x20 0x87 0x20 0xb5 0x20 0xd6 0x20 0xe0 0x20 0xe9 0x20 0xa5 0x20 0x80 0xfe 0x73 0x64 0x73
(á 转换为 ' ',即 chr(160) 或 0xA0 十六进制。é 是 '‚' 或 chr(130),0x82 十六进制,í 是 '¡',char(161) 或 0xA1 十六进制等等)
在 Python 中,当我尝试将其编码为 OEM 时,我执行以下操作:
def convertir_string_a_universe(cadena_python):
resultado = ''
for letra in cadena_python:
resultado += TablaString2UV[letra]
return resultado
然后,获取字节
txt_registro = convertir_string_a_universe(txt_orig)
datos = bytes(txt_registro, 'cp1252')
有了这个,我得到以下字节:
b'\xa0 \x82 \xa1 \xa2 \xa3 \xa4 \x87 \xb5 \xd6 \xe0 \xe9 \xa5 \x80 \x9a'
我的问题是这个 OEM 编码使用了不可打印的字符,比如 'É' = chr(144) (0x90 in hexa)。如果我尝试使用一个数组调用 bytes(txt_registro, 'cp1252') ,我将 'É' 翻译成 chr(0x90) 我会收到此错误:
caracteres_mal = 'Éü'
txt_registro = convertir_string_a_universe(txt_orig)
datos = bytes(txt_registro, 'cp1252')
File "C:\Users\Hector\PyCharmProjects\pyuniverse\pyuniverse\UniverseRegister.py", line 138, in reconstruir_registro_universe
datos = bytes(txt_registro, 'cp1252')
File "C:\Users\Hector\AppData\Local\Programs\Python\Python36-32\lib\encodings\cp1252.py", line 12, in encode
return codecs.charmap_encode(input,errors,encoding_table)
UnicodeEncodeError: 'charmap' codec can't encode character '\x90' in position 0: character maps to <undefined>
如何在不引发此 UnicodeEncodeError 的情况下进行此 OEM 编码?