如同
QDir 和 QDirIterator 忽略具有非 ASCII 文件名的文件
和
UnicodeEncodeError:“latin-1”编解码器无法编码字符
关于上面的第二个链接,我在下面添加了 test0()。我的理解是 utf-8 是我正在寻找的解决方案,但是尝试对文件名进行编码失败了。
def test0():
print("test0...using unicode literal")
name = u"123c\udcb4.wav"
test("test0b", name)
n = name.encode('utf-8')
print(n)
n = QtCore.QFile.decodeName(n)
print(n)
# From http://docs.python.org/release/3.0.1/howto/unicode.html
# This will indeed overwrite the correct file!
# f = open(name, 'w')
# f.write('blah\n')
# f.close()
测试0结果...
test0...using unicode literal
test0b QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test0b QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test0b os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test0b os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
Traceback (most recent call last):
File "unicode.py", line 157, in <module>
test0()
File "unicode.py", line 42, in test0
n = name.encode('utf-8')
UnicodeEncodeError: 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed
编辑
从https://www.rfc-editor.org/rfc/rfc3629进一步阅读告诉我“UTF-8 的定义禁止在 U+D800 和 U+DFFF 之间编码字符数”。因此,如果 uft-8 不允许这些字符。您应该如何处理如此命名的文件?Python 可以为它们创建和测试存在。所以这指向我的 Qt api 使用或 Qt api 本身的问题?!
我正在努力解决在 Python3 中正确处理 unicode 文件名的问题。最终,我正在开发基于 Phonon 的音乐播放器。我试图尽可能地将问题与此隔离开来。从下面的代码中,您会看到我已经尝试了尽可能多的替代方案。我最初的反应是这里有错误......也许是我的......也许在一个或多个库中。任何帮助将非常感激!
我有一个包含 3 个 unicode 文件名 123[abc]U.wav 的目录。前两个文件处理得当……主要是……第三个 123c 是错误的。
from PyQt4 import QtGui, QtCore
import sys, os
def test(_name, _file):
# print(_name, repr(_file))
f = QtCore.QFile(_file)
# f = QtCore.QFile(QtCore.QFile.decodeName(test))
exists = f.exists()
try:
print(_name, "QFile.exists", f.fileName(), exists)
except UnicodeEncodeError as e:
print(e, repr(_file), exists)
fileInfo = QtCore.QFileInfo(_file)
exists = fileInfo.exists()
try:
print(_name, "QFileInfo.exists", fileInfo.fileName(), exists)
except UnicodeEncodeError as e:
print(e, repr(_file), exists)
exists = os.path.exists(_file)
try:
print(_name, "os.path.exists", _file, exists)
except UnicodeEncodeError as e:
print(e, repr(_file), exists)
exists = os.path.isfile(_file)
try:
print(_name, "os.path.isfile", _file, exists)
except UnicodeEncodeError as e:
print(e, repr(_file), exists)
print()
def test1():
args = QtGui.QApplication.arguments()
print("test1...using QtGui.QApplication.arguments()")
test("test1", args[1])
def test2():
print("test2...using sys.argv")
test("test2", sys.argv[1])
def test3():
print("test3...QtGui.QFileDialog.getOpenFileName()")
name = QtGui.QFileDialog.getOpenFileName()
test("test3", name)
def test4():
print("test4...QtCore.QDir().entryInfoList()")
p = os.path.abspath(__file__)
p, _ = os.path.split(p)
d = QtCore.QDir(p)
for inf in d.entryInfoList(QtCore.QDir.AllEntries|QtCore.QDir.NoDotAndDotDot|QtCore.QDir.System):
print("test4", inf.fileName())
# if str(inf.fileName()).startswith("123c"):
if u"123c\ufffd.wav" == inf.fileName():
# if u"123c\udcb4.wav" == inf.fileName(): # This check fails..even tho that is what is reported in error messages for test2
test("test4a", inf.fileName())
test("test4b", inf.absoluteFilePath())
def test5():
print("test5...os.listdir()")
p = os.path.abspath(__file__)
p, _ = os.path.split(p)
dirList = os.listdir(p)
for file in dirList:
fullfile = os.path.join(p, file)
try:
print("test5", file)
except UnicodeEncodeError as e:
print(e)
print("test5", repr(fullfile))
# if u"123c\ufffd.wav" == file: # This check fails..even tho it worked in test4
if u"123c\udcb4.wav" == file:
test("test5a", file)
test("test5b", fullfile)
print()
def test6():
print("test6...Phonon and QtGui.QFileDialog.getOpenFileName()")
from PyQt4.phonon import Phonon
class Window(QtGui.QDialog):
def __init__(self):
QtGui.QDialog.__init__(self, None)
self.mediaObject = Phonon.MediaObject(self)
self.audioOutput = Phonon.AudioOutput(Phonon.MusicCategory, self)
Phonon.createPath(self.mediaObject, self.audioOutput)
self.mediaObject.stateChanged.connect(self.handleStateChanged)
name = QtGui.QFileDialog.getOpenFileName()# works with python3..not for 123c
# name = QtGui.QApplication.arguments()[1] # works with python2..but not python3...not for 123c
# name = sys.argv[1] # works with python3..but not python2...not for 123c
# p = os.path.abspath(__file__)
# p, _ = os.path.split(p)
# print(p)
# name = os.path.join(p, str(name))
self.mediaObject.setCurrentSource(Phonon.MediaSource(name))
self.mediaObject.play()
def handleStateChanged(self, newstate, oldstate):
if newstate == Phonon.PlayingState:
source = self.mediaObject.currentSource().fileName()
print('test6 playing: :', source)
elif newstate == Phonon.StoppedState:
source = self.mediaObject.currentSource().fileName()
print('test6 stopped: :', source)
elif newstate == Phonon.ErrorState:
source = self.mediaObject.currentSource().fileName()
print('test6 ERROR: could not play:', source)
win = Window()
win.resize(200, 100)
# win.show()
win.exec_()
def timerTick():
QtGui.QApplication.exit()
if __name__ == '__main__':
app = QtGui.QApplication(sys.argv)
app.setApplicationName('unicode_test')
test1()
test2()
test3()
test4()
test5()
test6()
timer = QtCore.QTimer()
timer.timeout.connect(timerTick)
timer.start(1)
sys.exit(app.exec_())
123a的测试结果...
python3 unicode.py 123a�.wav
test1...using QtGui.QApplication.arguments()
test1 QFile.exists unknown False
test1 QFileInfo.exists unknown False
test1 os.path.exists unknown False
test1 os.path.isfile unknown False
test2...using sys.argv
test2 QFile.exists 123a�.wav True
test2 QFileInfo.exists 123a�.wav True
test2 os.path.exists 123a�.wav True
test2 os.path.isfile 123a�.wav True
test3...QtGui.QFileDialog.getOpenFileName()
test3 QFile.exists /home/mememe/Desktop/test/unicode/123a�.wav True
test3 QFileInfo.exists 123a�.wav True
test3 os.path.exists /home/mememe/Desktop/test/unicode/123a�.wav True
test3 os.path.isfile /home/mememe/Desktop/test/unicode/123a�.wav True
test4...QtCore.QDir().entryInfoList()
test4 123a�.wav
test4 123bÆ.wav
test4 123c�.wav
test4a QFile.exists 123c�.wav False
test4a QFileInfo.exists 123c�.wav False
test4a os.path.exists 123c�.wav False
test4a os.path.isfile 123c�.wav False
test4b QFile.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b QFileInfo.exists 123c�.wav False
test4b os.path.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b os.path.isfile /home/mememe/Desktop/test/unicode/123c�.wav False
test4 unicode.py
test5...os.listdir()
test5 unicode.py
test5 '/home/mememe/Desktop/test/unicode/unicode.py'
test5 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed
test5 '/home/mememe/Desktop/test/unicode/123c\udcb4.wav'
test5a QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5a os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5b QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5b os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5 123bÆ.wav
test5 '/home/mememe/Desktop/test/unicode/123bÆ.wav'
test5 123a�.wav
test5 '/home/mememe/Desktop/test/unicode/123a�.wav'
test6...Phonon and QtGui.QFileDialog.getOpenFileName()
test6 stopped: : /home/mememe/Desktop/test/unicode/123a�.wav
test6 playing: : /home/mememe/Desktop/test/unicode/123a�.wav
test6 stopped: : /home/mememe/Desktop/test/unicode/123a�.wav
用 123b 测试结果...
python3 unicode.py 123bÆ.wav
test1...using QtGui.QApplication.arguments()
test1 QFile.exists 123b.wav False
test1 QFileInfo.exists 123b.wav False
test1 os.path.exists 123b.wav False
test1 os.path.isfile 123b.wav False
test2...using sys.argv
test2 QFile.exists 123bÆ.wav True
test2 QFileInfo.exists 123bÆ.wav True
test2 os.path.exists 123bÆ.wav True
test2 os.path.isfile 123bÆ.wav True
test3...QtGui.QFileDialog.getOpenFileName()
test3 QFile.exists /home/mememe/Desktop/test/unicode/123bÆ.wav True
test3 QFileInfo.exists 123bÆ.wav True
test3 os.path.exists /home/mememe/Desktop/test/unicode/123bÆ.wav True
test3 os.path.isfile /home/mememe/Desktop/test/unicode/123bÆ.wav True
test4...QtCore.QDir().entryInfoList()
test4 123a�.wav
test4 123bÆ.wav
test4 123c�.wav
test4a QFile.exists 123c�.wav False
test4a QFileInfo.exists 123c�.wav False
test4a os.path.exists 123c�.wav False
test4a os.path.isfile 123c�.wav False
test4b QFile.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b QFileInfo.exists 123c�.wav False
test4b os.path.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b os.path.isfile /home/mememe/Desktop/test/unicode/123c�.wav False
test4 unicode.py
test5...os.listdir()
test5 unicode.py
test5 '/home/mememe/Desktop/test/unicode/unicode.py'
test5 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed
test5 '/home/mememe/Desktop/test/unicode/123c\udcb4.wav'
test5a QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5a os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5b QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5b os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5 123bÆ.wav
test5 '/home/mememe/Desktop/test/unicode/123bÆ.wav'
test5 123a�.wav
test5 '/home/mememe/Desktop/test/unicode/123a�.wav'
test6...Phonon and QtGui.QFileDialog.getOpenFileName()
test6 stopped: : /home/mememe/Desktop/test/unicode/123bÆ.wav
test6 playing: : /home/mememe/Desktop/test/unicode/123bÆ.wav
test6 stopped: : /home/mememe/Desktop/test/unicode/123bÆ.wav
123c的测试结果...
python3 unicode.py 123c�.wav
test1...using QtGui.QApplication.arguments()
test1 QFile.exists unknown False
test1 QFileInfo.exists unknown False
test1 os.path.exists unknown False
test1 os.path.isfile unknown False
test2...using sys.argv
test2 QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test2 QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test2 os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test2 os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test3...QtGui.QFileDialog.getOpenFileName()
test3 QFile.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test3 QFileInfo.exists 123c�.wav False
test3 os.path.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test3 os.path.isfile /home/mememe/Desktop/test/unicode/123c�.wav False
test4...QtCore.QDir().entryInfoList()
test4 123a�.wav
test4 123bÆ.wav
test4 123c�.wav
test4a QFile.exists 123c�.wav False
test4a QFileInfo.exists 123c�.wav False
test4a os.path.exists 123c�.wav False
test4a os.path.isfile 123c�.wav False
test4b QFile.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b QFileInfo.exists 123c�.wav False
test4b os.path.exists /home/mememe/Desktop/test/unicode/123c�.wav False
test4b os.path.isfile /home/mememe/Desktop/test/unicode/123c�.wav False
test4 unicode.py
test5...os.listdir()
test5 unicode.py
test5 '/home/mememe/Desktop/test/unicode/unicode.py'
test5 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed
test5 '/home/mememe/Desktop/test/unicode/123c\udcb4.wav'
test5a QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' False
test5a os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5a os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '123c\udcb4.wav' True
test5b QFile.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b QFileInfo.exists 'utf-8' codec can't encode character '\udcb4' in position 4: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' False
test5b os.path.exists 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5b os.path.isfile 'utf-8' codec can't encode character '\udcb4' in position 38: surrogates not allowed '/home/mememe/Desktop/test/unicode/123c\udcb4.wav' True
test5 123bÆ.wav
test5 '/home/mememe/Desktop/test/unicode/123bÆ.wav'
test5 123a�.wav
test5 '/home/mememe/Desktop/test/unicode/123a�.wav'
test6...Phonon and QtGui.QFileDialog.getOpenFileName()
test6 stopped: : /home/mememe/Desktop/test/unicode/123c�.wav
关于测试结果的有趣的事情......
- Test1 对所有 3 个文件都失败了。
- 所有 3 个文件都通过了测试 2...除了 123c 的 QFile 和 QFileInfo 测试
- Test3 123a 和 123b 通过,但 123c 失败
- Test4 ...QDir 在目录中找到所有 4 个文件
- 所有文件的 Test4a 和 Test4b 均失败
- Test5 ...os.listdir 找到目录下所有4个文件
- 注意:Test5a 和 test5b 检查必须使用不同的 unicode 检查?!
- Test5a 和 Test5b 未能通过 QFile 和 QfileInfo 测试,但通过了 os.path 检查。
- 123a 和 123b 的测试 6 通过,但 123c 失败……声子播放器收到了仅停止的消息,而停止播放停止了 123a 和 123b 文件。
我知道这是很多信息......我没有试图彻底。
那么,如果最后一个问题是在 Python3 中处理 unicode 文件名的正确方法是什么?