We already had a custom importer (disclaimer: I did not write that code I 'm just the current maintainer) whose load_module
:
def load_module(self,fullname):
if fullname in sys.modules:
return sys.modules[fullname]
else: # set to avoid reimporting recursively
sys.modules[fullname] = imp.new_module(fullname)
if isinstance(fullname,unicode):
filename = fullname.replace(u'.',u'\\')
ext = u'.py'
initfile = u'__init__'
else:
filename = fullname.replace('.','\\')
ext = '.py'
initfile = '__init__'
try:
if os.path.exists(filename+ext):
with open(filename+ext,'U') as fp:
mod = imp.load_source(fullname,filename+ext,fp)
sys.modules[fullname] = mod
mod.__loader__ = self
else:
mod = sys.modules[fullname]
mod.__loader__ = self
mod.__file__ = os.path.join(os.getcwd(),filename)
mod.__path__ = [filename]
#init file
initfile = os.path.join(filename,initfile+ext)
if os.path.exists(initfile):
with open(initfile,'U') as fp:
code = fp.read()
exec compile(code, initfile, 'exec') in mod.__dict__
return mod
except Exception as e: # wrap in ImportError a la python2 - will keep
# the original traceback even if import errors nest
print 'fail', filename+ext
raise ImportError, u'caused by ' + repr(e), sys.exc_info()[2]
So I thought I could replace the parts that access the sys.modules
cache with overriddable methods that would in my override leave that cache alone:
So:
@@ -48,2 +55,2 @@ class UnicodeImporter(object):
- if fullname in sys.modules:
- return sys.modules[fullname]
+ if self._check_imported(fullname):
+ return self._get_imported(fullname)
@@ -51 +58 @@ class UnicodeImporter(object):
- sys.modules[fullname] = imp.new_module(fullname)
+ self._add_to_imported(fullname, imp.new_module(fullname))
@@ -64 +71 @@ class UnicodeImporter(object):
- sys.modules[fullname] = mod
+ self._add_to_imported(fullname, mod)
@@ -67 +74 @@ class UnicodeImporter(object):
- mod = sys.modules[fullname]
+ mod = self._get_imported(fullname)
and define:
class FakeUnicodeImporter(UnicodeImporter):
_modules_to_discard = {}
def _check_imported(self, fullname):
return fullname in sys.modules or fullname in self._modules_to_discard
def _get_imported(self, fullname):
try:
return sys.modules[fullname]
except KeyError:
return self._modules_to_discard[fullname]
def _add_to_imported(self, fullname, mod):
self._modules_to_discard[fullname] = mod
@classmethod
def cleanup(cls):
cls._modules_to_discard.clear()
Then I added the importer in the sys.meta_path and was good to go:
importer = sys.meta_path[0]
try:
if not hasattr(sys,'frozen'):
sys.meta_path = [fake_importer()]
perform_the_imports() # see question
finally:
fake_importer.cleanup()
sys.meta_path = [importer]
Right ? Wrong!
Traceback (most recent call last):
File "bash\bush.py", line 74, in __supportedGames
module = __import__('game',globals(),locals(),[modname],-1)
File "Wrye Bash Launcher.pyw", line 83, in load_module
exec compile(code, initfile, 'exec') in mod.__dict__
File "bash\game\game1\__init__.py", line 29, in <module>
from .constants import *
ImportError: caused by SystemError("Parent module 'bash.game.game1' not loaded, cannot perform relative import",)
Huh ? I am currently importing that very same module. Well the answer is probably in import's docs
If the module is not found in the cache, then sys.meta_path is searched (the specification for sys.meta_path can be found in PEP 302).
That's not completely to the point but what I guess is that the statement from .constants import *
looks up the sys.modules to check if the parent module is there, and I see no way of bypassing that (note that our custom loader is using the builtin import mechanism for modules, mod.__loader__ = self
is set after the fact).
So I updated my FakeImporter to use the sys.modules cache and then clean that up.
class FakeUnicodeImporter(UnicodeImporter):
_modules_to_discard = set()
def _check_imported(self, fullname):
return fullname in sys.modules or fullname in self._modules_to_discard
def _add_to_imported(self, fullname, mod):
super(FakeUnicodeImporter, self)._add_to_imported(fullname, mod)
self._modules_to_discard.add(fullname)
@classmethod
def cleanup(cls):
for m in cls._modules_to_discard: del sys.modules[m]
This however blew in a new way - or rather two ways:
a reference to the game/ package was held in bash
top package instance in sys.modules:
bash\
__init__.py
the_code_in_question_is_here.py
game\
...
because game
is imported as bash.game
. That reference held references to all game1, game2,...
, subpackages so those were never garbage collected
- a reference to another module (brec) was held as
bash.brec
by the same bash
module instance. This reference was imported as from .. import brec
in game\game1 without triggering an import, to update SomeClass
. However, in yet another module, an import of the form from ...brec import SomeClass
did trigger an import and another instance of the brec module ended up in the sys.modules. That instance had a non updated SomeClass
and blew with an AttributeError.
Both were fixed by manually deleting those references - so gc collected all modules (for 5 mbytes of ram out of 75) and the from .. import brec
did trigger an import (this from ... import foo
vs from ...foo import bar
warrants a question).
The moral of the story is that it is possible but:
- the package and subpackages should only reference each other
- all references to external modules/packages should be deleted from top level package attributes
- the package reference itself should be deleted from top level package attribute
If this sounds complicated and error prone it is - at least now I have a much cleaner view of interdependencies and their perils - time to address that.
This post was sponsored by Pydev's debugger - I found the gc
module very useful in grokking what was going on - tips from here. Of course there were a lot of variables that were the debugger's and that complicated stuff