换句话说,以 / 开头的字符串允许使用字母数字值,并且不允许使用除这些以外的任何其他特殊字符:/、.、-
您缺少一些在 URL 中有效的字符
import string
import urllib
import urlparse
valid_chars = string.letters + string.digits + '/.-~'
valid_paths = []
urls = ['http://www.my.uni.edu/info/matriculation/enroling.html',
'http://info.my.org/AboutUs/Phonebook',
'http://www.library.my.town.va.us/Catalogue/76523471236%2Fwen44--4.98',
'http://www.my.org/462F4F2D4241522A314159265358979323846',
'http://www.myu.edu/org/admin/people#andy',
'http://www.w3.org/RDB/EMP?*%20where%20name%%3Ddobbins']
for i in urls:
path = urllib.unquote(urlparse.urlparse(i).path)
if path[0] == '/' and len([i for i in path if i in valid_chars]) == len(path):
valid_paths.append(path)