Find broken symlinks with Python

后端 未结 8 1450
广开言路
广开言路 2020-12-15 05:17

If I call os.stat() on a broken symlink, python throws an OSError exception. This makes it useful for finding them. However, there are

8条回答
  •  天涯浪人
    2020-12-15 05:29

    I had a similar problem: how to catch broken symlinks, even when they occur in some parent dir? I also wanted to log all of them (in an application dealing with a fairly large number of files), but without too many repeats.

    Here is what I came up with, including unit tests.

    fileutil.py:

    import os
    from functools import lru_cache
    import logging
    
    logger = logging.getLogger(__name__)
    
    @lru_cache(maxsize=2000)
    def check_broken_link(filename):
        """
        Check for broken symlinks, either at the file level, or in the
        hierarchy of parent dirs.
        If it finds a broken link, an ERROR message is logged.
        The function is cached, so that the same error messages are not repeated.
    
        Args:
            filename: file to check
    
        Returns:
            True if the file (or one of its parents) is a broken symlink.
            False otherwise (i.e. either it exists or not, but no element
            on its path is a broken link).
    
        """
        if os.path.isfile(filename) or os.path.isdir(filename):
            return False
        if os.path.islink(filename):
            # there is a symlink, but it is dead (pointing nowhere)
            link = os.readlink(filename)
            logger.error('broken symlink: {} -> {}'.format(filename, link))
            return True
        # ok, we have either:
        #   1. a filename that simply doesn't exist (but the containing dir
               does exist), or
        #   2. a broken link in some parent dir
        parent = os.path.dirname(filename)
        if parent == filename:
            # reached root
            return False
        return check_broken_link(parent)
    

    Unit tests:

    import logging
    import shutil
    import tempfile
    import os
    
    import unittest
    from ..util import fileutil
    
    
    class TestFile(unittest.TestCase):
    
        def _mkdir(self, path, create=True):
            d = os.path.join(self.test_dir, path)
            if create:
                os.makedirs(d, exist_ok=True)
            return d
    
        def _mkfile(self, path, create=True):
            f = os.path.join(self.test_dir, path)
            if create:
                d = os.path.dirname(f)
                os.makedirs(d, exist_ok=True)
                with open(f, mode='w') as fp:
                    fp.write('hello')
            return f
    
        def _mklink(self, target, path):
            f = os.path.join(self.test_dir, path)
            d = os.path.dirname(f)
            os.makedirs(d, exist_ok=True)
            os.symlink(target, f)
            return f
    
        def setUp(self):
            # reset the lru_cache of check_broken_link
            fileutil.check_broken_link.cache_clear()
    
            # create a temporary directory for our tests
            self.test_dir = tempfile.mkdtemp()
    
            # create a small tree of dirs, files, and symlinks
            self._mkfile('a/b/c/foo.txt')
            self._mklink('b', 'a/x')
            self._mklink('b/c/foo.txt', 'a/f')
            self._mklink('../..', 'a/b/c/y')
            self._mklink('not_exist.txt', 'a/b/c/bad_link.txt')
            bad_path = self._mkfile('a/XXX/c/foo.txt', create=False)
            self._mklink(bad_path, 'a/b/c/bad_path.txt')
            self._mklink('not_a_dir', 'a/bad_dir')
    
        def tearDown(self):
            # Remove the directory after the test
            shutil.rmtree(self.test_dir)
    
        def catch_check_broken_link(self, expected_errors, expected_result, path):
            filename = self._mkfile(path, create=False)
            with self.assertLogs(level='ERROR') as cm:
                result = fileutil.check_broken_link(filename)
                logging.critical('nothing')  # trick: emit one extra message, so the with assertLogs block doesn't fail
            error_logs = [r for r in cm.records if r.levelname is 'ERROR']
            actual_errors = len(error_logs)
            self.assertEqual(expected_result, result, msg=path)
            self.assertEqual(expected_errors, actual_errors, msg=path)
    
        def test_check_broken_link_exists(self):
            self.catch_check_broken_link(0, False, 'a/b/c/foo.txt')
            self.catch_check_broken_link(0, False, 'a/x/c/foo.txt')
            self.catch_check_broken_link(0, False, 'a/f')
            self.catch_check_broken_link(0, False, 'a/b/c/y/b/c/y/b/c/foo.txt')
    
        def test_check_broken_link_notfound(self):
            self.catch_check_broken_link(0, False, 'a/b/c/not_found.txt')
    
        def test_check_broken_link_badlink(self):
            self.catch_check_broken_link(1, True, 'a/b/c/bad_link.txt')
            self.catch_check_broken_link(0, True, 'a/b/c/bad_link.txt')
    
        def test_check_broken_link_badpath(self):
            self.catch_check_broken_link(1, True, 'a/b/c/bad_path.txt')
            self.catch_check_broken_link(0, True, 'a/b/c/bad_path.txt')
    
        def test_check_broken_link_badparent(self):
            self.catch_check_broken_link(1, True, 'a/bad_dir/c/foo.txt')
            self.catch_check_broken_link(0, True, 'a/bad_dir/c/foo.txt')
            # bad link, but shouldn't log a new error:
            self.catch_check_broken_link(0, True, 'a/bad_dir/c')
            # bad link, but shouldn't log a new error:
            self.catch_check_broken_link(0, True, 'a/bad_dir')
    
    if __name__ == '__main__':
        unittest.main()
    

提交回复
热议问题