Files in directory with wildcard on Windows

后端 未结 2 1140
南方客
南方客 2020-12-19 18:12

how can I easy get all files paths from path containing a wildcards? For example: C:/Data*Set/Files*/*.txt and I wrote it on Linux using glob function

相关标签:
2条回答
  • 2020-12-19 18:49

    So you should do away with using OS specific file access, in favor of the OS independent: Filesystem Library

    Let's say that you're given filesystem::path input which contains the path with wildcards. To use this to solve your problem you'd need to:

    1. Use parent_path to break apart input into directories
    2. Use filename to obtain the input filename
    3. Obtain a directory_iterator to the relative or absolute path where the input begins
    4. Create a recursive function which takes in begin and end iterators to the obtained parent path, the directory iterator, and the filename
    5. Any time a directory or filename uses a '*' use a regex with the iterator to determine the directory which should be progressed to next
    6. Either return the path to the matching file or an empty path

    Due to the excellent Ben Voigt's comment I've updated the algorithm to step over unwildcarded directories.

    For example:

    regex GenerateRegex(string& arg) {
        for (auto i = arg.find('*'); i != string::npos; i = arg.find('*', i + 2)) {
            arg.insert(i, 1, '.');
        }
    
        return regex(arg);
    }
    
    filesystem::path FindFirstFile(filesystem::path directory, filesystem::path::const_iterator& start, const filesystem::path::const_iterator& finish, string& filename) {
        while (start != finish && start->string().find('*') == string::npos) {
            directory /= *start++;
        }
        filesystem::directory_iterator it(directory);
        filesystem::path result;
    
        if (it != filesystem::directory_iterator()) {
            if (start == finish) {
                for (auto i = filename.find('.'); i != string::npos; i = filename.find('.', i + 2)) {
                    filename.insert(i, 1, '\\');
                }
                const auto re = GenerateRegex(filename);
    
                do {
                    if (!filesystem::is_directory(it->status()) && regex_match(it->path().string(), re)) {
                        result = *it;
                        break;
                    }
                } while (++it != filesystem::directory_iterator());
            }
            else {
                const auto re = GenerateRegex(start->string());
    
                do {
                    if (it->is_directory() && regex_match(prev(it->path().end())->string(), re)) {
                        result = FindFirstFile(it->path(), next(start), finish, filename);
    
                        if (!result.empty()) {
                            break;
                        }
                    }
                } while (++it != filesystem::directory_iterator());
            }
        }
        return result;
    }
    

    Which can be called with:

    const filesystem::path input("C:/Test/Data*Set/Files*/*.txt");
    
    if (input.is_absolute()) {
        const auto relative_parent = input.parent_path().relative_path();
    
        cout << FindFirstFile(input.root_path(), begin(relative_parent), end(relative_parent), input.filename().string()) << endl;
    } else {
        const auto parent = input.parent_path();
    
        cout << FindFirstFile(filesystem::current_path(), begin(parent), end(parent), input.filename().string()) << endl;
    }
    

    Live Example

    0 讨论(0)
  • 2020-12-19 19:13

    need understand how FindFirstFile[Ex] work. this is shell over NtQueryDirectoryFile. FindFirstFile[Ex] need divide input name to folder name (which will be opened in used as FileHandle) and search mask used as FileName. mask can be only in file name. folder must have exact name without wildcard to opened first.

    as result FindFirstFile[Ex] always open concrete single folder and search in this folder by mask. for recursive search files - we need recursive call FindFirstFile[Ex]. simply usual we use the same constant search mask on all levels. for example when we want find all files begin from X:\SomeFolder we first call FindFirstFile[Ex] with X:\SomeFolder\* on level 0. if we found SomeSubfolder - we call FindFirstFile[Ex] with X:\SomeFolder\SomeSubfolder\* on level 1 and so on. but we can use different search masks on different levels. Data*Set on level 0, Files* on level 1, *.txt on level 2

    so we need call FindFirstFileEx recursive and on different recursions level use different masks. for example we want found c:\Program*\*\*.txt. we need start from c:\Program*, then for every founded result append \* mask, then append \*.txt on next level. or we can for example want next - search files by next mask - c:\Program Files*\Internet Explorer\* with any deep level. we can use constant deep search folder mask (optional) with final mask (also optional) used already on all more deep levels. all this can be really not so hard and efficient implemented:

    struct ENUM_CONTEXT : WIN32_FIND_DATA 
    {
        PCWSTR _szMask;
        PCWSTR *_pszMask;
        ULONG _MaskCount;
        ULONG _MaxLevel;
        ULONG _nFiles;
        ULONG _nFolders;
        WCHAR _FileName[MAXSHORT + 1];
    
        void StartEnum(PCWSTR pcszRoot, PCWSTR pszMask[], ULONG MaskCount, PCWSTR szMask, ULONG MaxLevel, PSTR prefix)
        {
            SIZE_T len = wcslen(pcszRoot);
    
            if (len < RTL_NUMBER_OF(_FileName))
            {
                memcpy(_FileName, pcszRoot, len * sizeof(WCHAR));
    
                _szMask = szMask, _pszMask = pszMask, _MaskCount = MaskCount;
                _MaxLevel = szMask ? MaxLevel : MaskCount;
                _nFolders = 0, _nFolders = 0;
    
                Enum(_FileName + len, 0, prefix);
            }
        }
    
        void Enum(PWSTR pszEnd, ULONG nLevel, PSTR prefix);
    };
    
    void ENUM_CONTEXT::Enum(PWSTR pszEnd, ULONG nLevel, PSTR prefix)
    {
        if (nLevel > _MaxLevel)
        {
            return ;
        }
    
        PCWSTR lpFileName = _FileName;
    
        SIZE_T cb = lpFileName + RTL_NUMBER_OF(_FileName) - pszEnd;
    
        PCWSTR szMask = nLevel < _MaskCount ? _pszMask[nLevel] : _szMask;
    
        SIZE_T cchMask = wcslen(szMask) + 1;
    
        if (cb < cchMask + 1)
        {
            return ;
        }
    
        *pszEnd++ = L'\\', cb--;
    
        DbgPrint("%s[<%.*S>]\n", prefix, pszEnd - lpFileName, lpFileName);
    
        memcpy(pszEnd, szMask, cchMask * sizeof(WCHAR));
    
        ULONG dwError;
    
        HANDLE hFindFile = FindFirstFileEx(lpFileName, FindExInfoBasic, this, FindExSearchNameMatch, 0, FIND_FIRST_EX_LARGE_FETCH);
    
        if (hFindFile != INVALID_HANDLE_VALUE)
        {
            PWSTR FileName = cFileName;
    
            do 
            {
                SIZE_T FileNameLength = wcslen(FileName);
    
                switch (FileNameLength)
                {
                case 2:
                    if (FileName[1] != '.') break;
                case 1:
                    if (FileName[0] == '.') continue;
                }
    
                if (dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
                {
                    _nFolders++;
    
                    if (!(dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT))
                    {
                        if (cb < FileNameLength)
                        {
                            __debugbreak();
                        }
                        else
                        {
                            memcpy(pszEnd, FileName, FileNameLength * sizeof(WCHAR));
                            Enum(pszEnd + FileNameLength, nLevel + 1, prefix - 1);
                        }
                    }
                }
                else if (nLevel >= _MaskCount || (!_szMask && nLevel == _MaskCount - 1))
                {
                    _nFiles++;
    
                    DbgPrint("%s%u%u <%.*S>\n", prefix, nFileSizeLow, nFileSizeHigh, FileNameLength, FileName);
                }
    
            } while (FindNextFile(hFindFile, this));
    
            if ((dwError = GetLastError()) == ERROR_NO_MORE_FILES)
            {
                dwError = NOERROR;
            }
    
            FindClose(hFindFile);
        }
        else
        {
            dwError = GetLastError();
        }
    
        if (dwError && dwError != ERROR_FILE_NOT_FOUND)
        {
            DbgPrint("%s[<%.*S>] err = %u\n", prefix, pszEnd - lpFileName, lpFileName, dwError);
        }
    }
    
    void Test(PCWSTR pcszRoot)
    {
        char prefix[MAXUCHAR + 1];
        memset(prefix, '\t', RTL_NUMBER_OF(prefix) - 1);
        prefix[RTL_NUMBER_OF(prefix) - 1] = 0;
    
        ENUM_CONTEXT ectx;
    
        static PCWSTR Masks[] = { L"Program*", L"*", L"*.txt" };
        static PCWSTR Masks2[] = { L"Program*", L"*" };
        static PCWSTR Masks3[] = { L"Program Files*", L"Internet Explorer" };
    
        // search Program*\*\*.txt with fixed deep level
        ectx.StartEnum(pcszRoot, Masks, RTL_NUMBER_OF(Masks), 0, RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
        // search *.txt files from Program*\*\ - any deep level
        ectx.StartEnum(pcszRoot, Masks2, RTL_NUMBER_OF(Masks2), L"*.txt", RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
        // search all files (*) from Program Files*\Internet Explorer\ 
        ectx.StartEnum(pcszRoot, Masks3, RTL_NUMBER_OF(Masks3), L"*", RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
    
    }
    
    0 讨论(0)
提交回复
热议问题