python - 在 Python 中从 BaseHTTPServer 下载带有 unicode 字符的文件

Question

我正在使用 Python 2.7.8 来创建一个服务器，我可以从中下载文件。问题是许多文件都包含 utf-8 字符，例如 čćžšđ 等。我尝试解码路径，但每当我单击带有 unicode 字符的文件名时，它都会返回“错误 404：找不到文件”。如何正确解码路径，以便可以下载带有 utf-8 字符的文件，如果可能，在我的服务器索引上将它们显示为 utf-8 字符。这是我的服务器代码，其中包括我尝试过的内容和完整的服务器代码：

# -*- coding: utf-8 -*-

__version__ = "0.6"

__all__ = ["SimpleHTTPRequestHandler"]

import os
import posixpath
import BaseHTTPServer
import urllib
import cgi
import shutil
import mimetypes
from StringIO import StringIO
import SocketServer
import time
import sys
import unicodedata



class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):

    server_version = "SimpleHTTP/" + __version__

    def do_GET(self):
        """Serve a GET request."""
        f = self.send_head()
        if f:
        self.copyfile(f, self.wfile)
        f.close()

    def do_HEAD(self):
        """Serve a HEAD request."""
        f = self.send_head()
        if f:
            f.close()

    def send_head(self):
        path_now = self.translate_path(self.path)
        path_change = (os.path.dirname(os.path.abspath(__file__)) + "/files/")
        if path_now.startswith("/home/files/"):
           pass
        else:
           os.chdir(path_change)
        path = self.translate_path(self.path)
        """Those are few examples of what have I tried:
        path = path1.decode('ascii', 'ignore').makePath()
        path = unicodedata.normalize('NFKD', path1).encode('ascii','ignore')
        path2 = path1.decode("utf-8")
        path = path2.encode("utf-8")
        path = path2.encode("utf-8")"""
        f = None
        if os.path.isdir(path):
            for index in "index.html", "index.htm":
                index = os.path.join(path, index)
                if os.path.exists(index):
                    path = index
                    break
            else:
                return self.list_directory(path)
        ctype = self.guess_type(path)
        if ctype.startswith('text/'):
            mode = 'r'
        else:
            mode = 'rb'
        try:
            f = open(path.decode(sys.getfilesystemencoding()), mode) #this doesn't work, nothing changes
            size = os.path.getsize(path)
        except IOError:
            self.send_error(404, "File not found")
            return None
        self.send_response(200)
        self.send_header("Content-type", ctype + "; charset=utf-8") # + " charset=utf-8"
        self.send_header("Content-Length", size)
        self.end_headers()
        return f

    def list_directory(self, path):
        try:
            list = os.listdir(path)
        except os.error:
            self.send_error(404, "No permission to list directory")
            return None
        list.sort(lambda a, b: cmp(a.lower(), b.lower()))
        f = StringIO()
        f.write("<title>Directory listing for %s</title>\n" % self.path)
        f.write("<h2>Directory listing for %s</h2>\n" % self.path)
        f.write("<hr>\n<ul>\n")
        for name in list:
            fullname = os.path.join(path, name)
            displayname = linkname = name = cgi.escape(name)
            if os.path.isdir(fullname):
                displayname = name + "/"
                linkname = name + "/"
            if os.path.islink(fullname):
                displayname = name + "@"
            f.write('<li><a href="%s">%s</a>\n' % (linkname, displayname))
        f.write("</ul>\n<hr>\n")
        f.seek(0)
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.end_headers()
        return f

    def translate_path(self, path):
        try:
            path = posixpath.normpath(urllib.unquote(path))
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwd()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path
        except Exception, e:
           self.send_error(403, e)
           path = posixpath.normpath(urllib.unquote(path))
           words = path.split('/')
           words = filter(None, words)
           path = os.getcwd()
           for word in words:
               drive, word = os.path.splitdrive(word)
               head, word = os.path.split(word)
               if word in (os.curdir, os.pardir): continue
               path = os.path.join(path, word)
           return path.encode("utf-8")

    def copyfile(self, source, outputfile):
        shutil.copyfileobj(source, outputfile)

    def guess_type(self, path):
        base, ext = posixpath.splitext(path)
        if self.extensions_map.has_key(ext):
            return self.extensions_map[ext]
        ext = ext.lower()
        if self.extensions_map.has_key(ext):
            return self.extensions_map[ext]
        else:
            return self.extensions_map['']

    extensions_map = mimetypes.types_map.copy()
    extensions_map.update({
        '': 'application/octet-stream', # Default
        '.py': 'text/plain',
        '.c': 'text/plain',
        '.h': 'text/plain',
        })

class ForkingHTTPServer(SocketServer.ForkingMixIn, BaseHTTPServer.HTTPServer):
    def finish_request(self, request, client_address):
        request.settimeout(30)
        BaseHTTPServer.HTTPServer.finish_request(self, request, client_address)


def test(HandlerClass = SimpleHTTPRequestHandler, ServerClass = BaseHTTPServer.HTTPServer, server_address=("192.168.1.2", 8000)):
    try:
        print "Server started"
        srvr = ForkingHTTPServer(server_address, HandlerClass)
        srvr.serve_forever()  # serve_forever
    except KeyboardInterrupt:
        print "Closing sockets..."
        time.sleep(2)
        print "Server is shutting down in 3"
        time.sleep(1)
        print "Server is shutting down in 2"
        time.sleep(1)
        print "Server is shutting down in 1"
        time.sleep(1)
        srvr.socket.close()


if __name__ == '__main__':
        test()

我希望这是您需要的所有信息。如果您需要其他任何内容，请发表评论，我很乐意编辑我的问题；）

score 1 · Accepted Answer

我刚刚看到我在“send_head”函数中将字符集设置为 utf-8，但在“list_directory”函数中没有。另外，我编辑

"; charset=utf-8"

至

'; charset="utf-8"'

它现在就像一个魅力。

看起来我一直在编码/解码正确的东西，但没有正确设置我的标题。

python - 在 Python 中从 BaseHTTPServer 下载带有 unicode 字符的文件

1 回答 1

Related

Reference