how can I easy get all files paths from path containing a wildcards? For example: C:/Data*Set/Files*/*.txt and I wrote it on Linux using glob
function
So you should do away with using OS specific file access, in favor of the OS independent: Filesystem Library
Let's say that you're given filesystem::path input
which contains the path with wildcards. To use this to solve your problem you'd need to:
input
into directoriesinput
filenameinput
beginsbegin
and end
iterators to the obtained parent path, the directory iterator, and the filename'*'
use a regex
with the iterator to determine the directory which should be progressed to nextpath
Due to the excellent Ben Voigt's comment I've updated the algorithm to step over unwildcarded directories.
For example:
regex GenerateRegex(string& arg) {
for (auto i = arg.find('*'); i != string::npos; i = arg.find('*', i + 2)) {
arg.insert(i, 1, '.');
}
return regex(arg);
}
filesystem::path FindFirstFile(filesystem::path directory, filesystem::path::const_iterator& start, const filesystem::path::const_iterator& finish, string& filename) {
while (start != finish && start->string().find('*') == string::npos) {
directory /= *start++;
}
filesystem::directory_iterator it(directory);
filesystem::path result;
if (it != filesystem::directory_iterator()) {
if (start == finish) {
for (auto i = filename.find('.'); i != string::npos; i = filename.find('.', i + 2)) {
filename.insert(i, 1, '\\');
}
const auto re = GenerateRegex(filename);
do {
if (!filesystem::is_directory(it->status()) && regex_match(it->path().string(), re)) {
result = *it;
break;
}
} while (++it != filesystem::directory_iterator());
}
else {
const auto re = GenerateRegex(start->string());
do {
if (it->is_directory() && regex_match(prev(it->path().end())->string(), re)) {
result = FindFirstFile(it->path(), next(start), finish, filename);
if (!result.empty()) {
break;
}
}
} while (++it != filesystem::directory_iterator());
}
}
return result;
}
Which can be called with:
const filesystem::path input("C:/Test/Data*Set/Files*/*.txt");
if (input.is_absolute()) {
const auto relative_parent = input.parent_path().relative_path();
cout << FindFirstFile(input.root_path(), begin(relative_parent), end(relative_parent), input.filename().string()) << endl;
} else {
const auto parent = input.parent_path();
cout << FindFirstFile(filesystem::current_path(), begin(parent), end(parent), input.filename().string()) << endl;
}
Live Example
need understand how FindFirstFile[Ex]
work. this is shell over NtQueryDirectoryFile. FindFirstFile[Ex]
need divide input name to folder name (which will be opened in used as FileHandle) and search mask used as FileName. mask can be only in file name. folder must have exact name without wildcard to opened first.
as result FindFirstFile[Ex]
always open concrete single folder and search in this folder by mask. for recursive search files - we need recursive call FindFirstFile[Ex]
. simply usual we use the same constant search mask on all levels. for example when we want find all files begin from X:\SomeFolder
we first call FindFirstFile[Ex]
with X:\SomeFolder\*
on level 0. if we found SomeSubfolder
- we call FindFirstFile[Ex]
with X:\SomeFolder\SomeSubfolder\*
on level 1 and so on. but we can use different search masks on different levels. Data*Set
on level 0, Files*
on level 1, *.txt
on level 2
so we need call FindFirstFileEx
recursive and on different recursions level use different masks. for example we want found c:\Program*\*\*.txt
. we need start from c:\Program*
, then for every founded result append \*
mask, then append \*.txt
on next level. or we can for example want next - search files by next mask - c:\Program Files*\Internet Explorer\*
with any deep level. we can use constant deep search folder mask (optional) with final mask (also optional) used already on all more deep levels.
all this can be really not so hard and efficient implemented:
struct ENUM_CONTEXT : WIN32_FIND_DATA
{
PCWSTR _szMask;
PCWSTR *_pszMask;
ULONG _MaskCount;
ULONG _MaxLevel;
ULONG _nFiles;
ULONG _nFolders;
WCHAR _FileName[MAXSHORT + 1];
void StartEnum(PCWSTR pcszRoot, PCWSTR pszMask[], ULONG MaskCount, PCWSTR szMask, ULONG MaxLevel, PSTR prefix)
{
SIZE_T len = wcslen(pcszRoot);
if (len < RTL_NUMBER_OF(_FileName))
{
memcpy(_FileName, pcszRoot, len * sizeof(WCHAR));
_szMask = szMask, _pszMask = pszMask, _MaskCount = MaskCount;
_MaxLevel = szMask ? MaxLevel : MaskCount;
_nFolders = 0, _nFolders = 0;
Enum(_FileName + len, 0, prefix);
}
}
void Enum(PWSTR pszEnd, ULONG nLevel, PSTR prefix);
};
void ENUM_CONTEXT::Enum(PWSTR pszEnd, ULONG nLevel, PSTR prefix)
{
if (nLevel > _MaxLevel)
{
return ;
}
PCWSTR lpFileName = _FileName;
SIZE_T cb = lpFileName + RTL_NUMBER_OF(_FileName) - pszEnd;
PCWSTR szMask = nLevel < _MaskCount ? _pszMask[nLevel] : _szMask;
SIZE_T cchMask = wcslen(szMask) + 1;
if (cb < cchMask + 1)
{
return ;
}
*pszEnd++ = L'\\', cb--;
DbgPrint("%s[<%.*S>]\n", prefix, pszEnd - lpFileName, lpFileName);
memcpy(pszEnd, szMask, cchMask * sizeof(WCHAR));
ULONG dwError;
HANDLE hFindFile = FindFirstFileEx(lpFileName, FindExInfoBasic, this, FindExSearchNameMatch, 0, FIND_FIRST_EX_LARGE_FETCH);
if (hFindFile != INVALID_HANDLE_VALUE)
{
PWSTR FileName = cFileName;
do
{
SIZE_T FileNameLength = wcslen(FileName);
switch (FileNameLength)
{
case 2:
if (FileName[1] != '.') break;
case 1:
if (FileName[0] == '.') continue;
}
if (dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
_nFolders++;
if (!(dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT))
{
if (cb < FileNameLength)
{
__debugbreak();
}
else
{
memcpy(pszEnd, FileName, FileNameLength * sizeof(WCHAR));
Enum(pszEnd + FileNameLength, nLevel + 1, prefix - 1);
}
}
}
else if (nLevel >= _MaskCount || (!_szMask && nLevel == _MaskCount - 1))
{
_nFiles++;
DbgPrint("%s%u%u <%.*S>\n", prefix, nFileSizeLow, nFileSizeHigh, FileNameLength, FileName);
}
} while (FindNextFile(hFindFile, this));
if ((dwError = GetLastError()) == ERROR_NO_MORE_FILES)
{
dwError = NOERROR;
}
FindClose(hFindFile);
}
else
{
dwError = GetLastError();
}
if (dwError && dwError != ERROR_FILE_NOT_FOUND)
{
DbgPrint("%s[<%.*S>] err = %u\n", prefix, pszEnd - lpFileName, lpFileName, dwError);
}
}
void Test(PCWSTR pcszRoot)
{
char prefix[MAXUCHAR + 1];
memset(prefix, '\t', RTL_NUMBER_OF(prefix) - 1);
prefix[RTL_NUMBER_OF(prefix) - 1] = 0;
ENUM_CONTEXT ectx;
static PCWSTR Masks[] = { L"Program*", L"*", L"*.txt" };
static PCWSTR Masks2[] = { L"Program*", L"*" };
static PCWSTR Masks3[] = { L"Program Files*", L"Internet Explorer" };
// search Program*\*\*.txt with fixed deep level
ectx.StartEnum(pcszRoot, Masks, RTL_NUMBER_OF(Masks), 0, RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
// search *.txt files from Program*\*\ - any deep level
ectx.StartEnum(pcszRoot, Masks2, RTL_NUMBER_OF(Masks2), L"*.txt", RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
// search all files (*) from Program Files*\Internet Explorer\
ectx.StartEnum(pcszRoot, Masks3, RTL_NUMBER_OF(Masks3), L"*", RTL_NUMBER_OF(prefix) - 1, prefix + RTL_NUMBER_OF(prefix) - 1);
}