Return a list of imported Python modules used in a script?

后端 未结 13 2221
深忆病人
深忆病人 2020-11-30 00:23

I am writing a program that categorizes a list of Python files by which modules they import. As such I need to scan the collection of .py files ad return a list of which mod

相关标签:
13条回答
  • 2020-11-30 01:15

    You might want to try dis (pun intended):

    import dis
    from collections import defaultdict
    from pprint import pprint
    
    statements = """
    from __future__ import (absolute_import,
                            division)
    import os
    import collections, itertools
    from math import *
    from gzip import open as gzip_open
    from subprocess import check_output, Popen
    """
    
    instructions = dis.get_instructions(statements)
    imports = [__ for __ in instructions if 'IMPORT' in __.opname]
    
    grouped = defaultdict(list)
    for instr in imports:
        grouped[instr.opname].append(instr.argval)
    
    pprint(grouped)
    

    outputs

    defaultdict(<class 'list'>,
                {'IMPORT_FROM': ['absolute_import',
                                 'division',
                                 'open',
                                 'check_output',
                                 'Popen'],
                 'IMPORT_NAME': ['__future__',
                                 'os',
                                 'collections',
                                 'itertools',
                                 'math',
                                 'gzip',
                                 'subprocess'],
                 'IMPORT_STAR': [None]})
    

    Your imported modules are grouped['IMPORT_NAME'].

    0 讨论(0)
  • 2020-11-30 01:15

    Thanks Tony Suffolk for inspect, importlib samples ... I built this wee module and you're all welcome to use it if it helps you. Giving back, yaaaay!

    import timeit
    import os
    import inspect, importlib as implib
    import textwrap as twrap
    
    def src_modules(filename):
        assert (len(filename)>1)
    
        mod = implib.import_module(filename.split(".")[0])
        ml_alias = []
        ml_actual = []
        ml_together = []
        ml_final = []
        for i in inspect.getmembers(mod, inspect.ismodule):
            ml_alias.append(i[0])
            ml_actual.append((str(i[1]).split(" ")[1]))
            ml_together = zip(ml_actual, ml_alias)
        for t in ml_together:
            (a,b) = t
            ml_final.append(a+":="+b)
    
        return ml_final
    
    def l_to_str(itr):
        assert(len(itr)>0)
    
        itr.sort()
        r_str = ""
        for i in itr:
            r_str += i+"  "
        return r_str
    
    def src_info(filename, start_time=timeit.default_timer()):
        assert (len(filename)>1)
    
        filename_in = filename
        filename = filename_in.split(".")[0]
    
        if __name__ == filename:
            output_module = filename
        else:
            output_module = __name__
    
        print ("\n" + (80 * "#"))
        print (" runtime ~= {0} ms".format(round(((timeit.default_timer() - start_time)*1000),3)))
        print (" source file --> '{0}'".format(filename_in))
        print (" output via --> '{0}'".format(output_module))
        print (" modules used in '{0}':".format(filename))
        print ("  "+"\n  ".join(twrap.wrap(l_to_str(src_modules(filename)), 75)))
        print (80 * "#")
    
        return ""
    
    
    if __name__ == "__main__":
        src_info(os.path.basename(__file__))
    
    
    ## how to use in X file:
    #
    # import print_src_info
    # import os
    #
    # < ... your code ... >
    #
    # if __name__ == "__main__":
    #     print_src_info.src_info(os.path.basename(__file__))
    
    
    ## example output:
    #
    # ################################################################################
    #  runtime ~= 0.049 ms
    #  source file --> 'print_src_info.py'
    #  output via --> '__main__'
    #  modules used in 'print_src_info':
    #   'importlib':=implib  'inspect':=inspect  'os':=os  'textwrap':=twrap
    #   'timeit':=timeit
    # ################################################################################
    
    0 讨论(0)
  • 2020-11-30 01:20

    I understand that this post is VERY old but I have found an ideal solution. I came up with this idea:

    def find_modules(code):
        modules = []
        code = code.splitlines()
        for item in code:
            if item[:7] == "import " and ", " not in item:
                if " as " in item:
                    modules.append(item[7:item.find(" as ")])
                else:
                    modules.append(item[7:])
            elif item[:5] == "from ":
                modules.append(item[5:item.find(" import ")])
    
            elif ", " in item:
                item = item[7:].split(", ")
                modules = modules+item
    
            else:
                print(item)
        return modules
    
    code = """
    import foo
    import bar
    from baz import eggs
    import mymodule as test
    import hello, there, stack
    """
    print(find_modules(code))
    

    it does from, as, commas and normal import statements. it requires no dependencies and works with other lines of code.

    The above code prints:

    ['foo', 'bar', 'baz', 'mymodule', 'hello', 'there', 'stack']
    

    Just put your code in the find_modules function.

    0 讨论(0)
  • 2020-11-30 01:21

    Well, you could always write a simple script that searches the file for import statements. This one finds all imported modules and files, including those imported in functions or classes:

    def find_imports(toCheck):
        """
        Given a filename, returns a list of modules imported by the program.
        Only modules that can be imported from the current directory
        will be included. This program does not run the code, so import statements
        in if/else or try/except blocks will always be included.
        """
        import imp
        importedItems = []
        with open(toCheck, 'r') as pyFile:
            for line in pyFile:
                # ignore comments
                line = line.strip().partition("#")[0].partition("as")[0].split(' ')
                if line[0] == "import":
                    for imported in line[1:]:
                        # remove commas (this doesn't check for commas if
                        # they're supposed to be there!
                        imported = imported.strip(", ")
                        try:
                            # check to see if the module can be imported
                            # (doesn't actually import - just finds it if it exists)
                            imp.find_module(imported)
                            # add to the list of items we imported
                            importedItems.append(imported)
                        except ImportError:
                            # ignore items that can't be imported
                            # (unless that isn't what you want?)
                            pass
    
        return importedItems
    
    toCheck = raw_input("Which file should be checked: ")
    print find_imports(toCheck)
    

    This doesn't do anything for from module import something style imports, though that could easily be added, depending on how you want to deal with those. It also doesn't do any syntax checking, so if you have some funny business like import sys gtk, os it will think you've imported all three modules even though the line is an error. It also doesn't deal with try/except type statements with regards to import - if it could be imported, this function will list it. It also doesn't deal well with multiple imports per line if you use the as keyword. The real issue here is that I'd have to write a full parser to really do this correctly. The given code works in many cases, as long as you understand there are definite corner cases.

    One issue is that relative imports will fail if this script isn't in the same directory as the given file. You may want to add the directory of the given script to sys.path.

    0 讨论(0)
  • 2020-11-30 01:21

    I know this is old but I was also looking for such a solution like OP did. So I wrote this code to find imported modules by scripts in a folder. It works with import abc and from abc import cde format. I hope it helps someone else.

    import re
    import os
    
    
    def get_imported_modules(folder):
        files = [f for f in os.listdir(folder) if f.endswith(".py")]
    
        imports = []
        for file in files:
            with open(os.path.join(folder, file), mode="r") as f:
                lines = f.read()
                result = re.findall(r"(?<!from)import (\w+)[\n.]|from\s+(\w+)\s+import", lines)
                for imp in result:
                    for i in imp:
                        if len(i):
                            if i not in imports:
                                imports.append(i)
    
        return imports
    
    0 讨论(0)
  • 2020-11-30 01:25

    IMO the best way todo this is to use the http://furius.ca/snakefood/ package. The author has done all of the required work to get not only directly imported modules but it uses the AST to parse the code for runtime dependencies that a more static analysis would miss.

    Worked up a command example to demonstrate:

    sfood ./example.py | sfood-cluster > example.deps
    

    That will generate a basic dependency file of each unique module. For even more detail use:

    sfood -r -i ./example.py | sfood-cluster > example.deps
    

    To walk a tree and find all imports, you can also do this in code: Please NOTE - The AST chunks of this routine were lifted from the snakefood source which has this copyright: Copyright (C) 2001-2007 Martin Blais. All Rights Reserved.

     import os
     import compiler
     from compiler.ast import Discard, Const
     from compiler.visitor import ASTVisitor
    
     def pyfiles(startPath):
         r = []
         d = os.path.abspath(startPath)
         if os.path.exists(d) and os.path.isdir(d):
             for root, dirs, files in os.walk(d):
                 for f in files:
                     n, ext = os.path.splitext(f)
                     if ext == '.py':
                         r.append([d, f])
         return r
    
     class ImportVisitor(object):
         def __init__(self):
             self.modules = []
             self.recent = []
         def visitImport(self, node):
             self.accept_imports()
             self.recent.extend((x[0], None, x[1] or x[0], node.lineno, 0)
                                for x in node.names)
         def visitFrom(self, node):
             self.accept_imports()
             modname = node.modname
             if modname == '__future__':
                 return # Ignore these.
             for name, as_ in node.names:
                 if name == '*':
                     # We really don't know...
                     mod = (modname, None, None, node.lineno, node.level)
                 else:
                     mod = (modname, name, as_ or name, node.lineno, node.level)
                 self.recent.append(mod)
         def default(self, node):
             pragma = None
             if self.recent:
                 if isinstance(node, Discard):
                     children = node.getChildren()
                     if len(children) == 1 and isinstance(children[0], Const):
                         const_node = children[0]
                         pragma = const_node.value
             self.accept_imports(pragma)
         def accept_imports(self, pragma=None):
             self.modules.extend((m, r, l, n, lvl, pragma)
                                 for (m, r, l, n, lvl) in self.recent)
             self.recent = []
         def finalize(self):
             self.accept_imports()
             return self.modules
    
     class ImportWalker(ASTVisitor):
         def __init__(self, visitor):
             ASTVisitor.__init__(self)
             self._visitor = visitor
         def default(self, node, *args):
             self._visitor.default(node)
             ASTVisitor.default(self, node, *args) 
    
     def parse_python_source(fn):
         contents = open(fn, 'rU').read()
         ast = compiler.parse(contents)
         vis = ImportVisitor() 
    
         compiler.walk(ast, vis, ImportWalker(vis))
         return vis.finalize()
    
     for d, f in pyfiles('/Users/bear/temp/foobar'):
         print d, f
         print parse_python_source(os.path.join(d, f)) 
    

    0 讨论(0)
提交回复
热议问题