The Problem I need to write a simple software that, giving certain constraints, appends to a list a series of files. The user could choose between
@Stephen C:
Below as per your request, my benchmark code that I talked about in the comments (C# - not Java).
Note that it should use stopwatch instead of datetime for better accuracy, but otherwise it's fine.
I didn't test if the iteration delivers the same number files as the recursion, but it should.
Actually, if you pay attention to the median, you'll notice that this already starts showing with only very few files. (my Desktop folder contains 2210 files, 415 folders, 3.2 GB total, most of it large files in the download folder, AppData, and a higher number of files due to one larger C# project [mail-server] on my desktop).
To get the numbers I talked about in the comment, install cygwin (with everything [that's about 100GB, I think] ), and index the cygwin folder.

As mentioned in the comments, it is not entirely correct to say it doesn't matter.
While for a small directory tree, recursion is negligibly more efficient than iteration (in the order of several 10s of milliseconds), for a very large tree, recursion is minutes (and therefore noticeably) slower than iteration. It isn't all to difficult to grasp why either. If you have to allocate and return a new set of stack variables, each time, call a function, and store all previous results until you return, you're of course slower than when you initiate a a stack-structure on the heap once, and use that for each iteration.
The tree doesn't need to be pathologically deep for this effect to be noticed (while slow speed isn't a stack overflow, its very negative consequences are not much different from a StackOverflow-Bug). Also I wouldn't call having a lot of files "pathological", because if you do an index on your main drive, you'll naturally have a lot of files. Have some HTML documentation, and the number of files explodes. You'll find that on a whole lot of files, an iteration completes in less than 30 seconds, while a recursion needs appx. 3 minutes.
using System;
using System.Data;
using System.Linq;
namespace IterativeDirectoryCSharp
{
public class SearchStrategy
{
//Iterative File and Folder Listing in VB.NET
public static bool IterativeSearch2(string strPath)
{
System.IO.DirectoryInfo dirInfo = new System.IO.DirectoryInfo(strPath);
System.IO.FileSystemInfo[] arrfsiEntities = null;
arrfsiEntities = dirInfo.GetFileSystemInfos();
// Creates and initializes a new Stack.
System.Collections.Stack strLastPathStack = new System.Collections.Stack();
System.Collections.Stack iIndexStack = new System.Collections.Stack();
int iIndex = 0;
int iMaxEntities = arrfsiEntities.Length;
do
{
while (iIndex < iMaxEntities)
{
if (arrfsiEntities[iIndex].Attributes == System.IO.FileAttributes.Directory)
{
//Console.WriteLine("Searching directory " + arrfsiEntities[iIndex].FullName);
strLastPathStack.Push(System.IO.Directory.GetParent(arrfsiEntities[iIndex].FullName).FullName);
strLastPathStack.Push(arrfsiEntities[iIndex].FullName);
iIndexStack.Push(iIndex);
dirInfo = null;
Array.Clear(arrfsiEntities, 0, arrfsiEntities.Length);
dirInfo = new System.IO.DirectoryInfo(strLastPathStack.Pop().ToString());
arrfsiEntities = dirInfo.GetFileSystemInfos();
iIndex = 0;
iMaxEntities = arrfsiEntities.Length;
continue;
}
else
{
//Console.WriteLine(arrfsiEntities[iIndex].FullName);
}
iIndex += 1;
} // Whend
dirInfo = null;
Array.Clear(arrfsiEntities, 0, arrfsiEntities.Length);
// Dont try to do move the next line in the loop while/until statement, null reference when pop on an empty stack...
if (strLastPathStack.Count == 0)
break;
dirInfo = new System.IO.DirectoryInfo(strLastPathStack.Pop().ToString());
arrfsiEntities = dirInfo.GetFileSystemInfos();
iIndex = (int)iIndexStack.Pop() + 1;
iMaxEntities = arrfsiEntities.Length;
} // End do
while (true);
return true;
} // End Function IterativeSearch2
public static bool IterativeSearch1(string path)
{
System.IO.DirectoryInfo dirInfo = new System.IO.DirectoryInfo(path);
System.IO.FileSystemInfo[] arrfsiEntities = null;
arrfsiEntities = dirInfo.GetFileSystemInfos();
// Creates and initializes a new Stack.
System.Collections.Stack myStack = new System.Collections.Stack();
//Console.WriteLine("Stack is empty when \t stack.count={0}", myStack.Count);
int iIndex = 0;
int iMaxEntities = arrfsiEntities.Length - 1;
do
{
for (iIndex = 0; iIndex <= iMaxEntities; iIndex += 1)
{
if (arrfsiEntities[iIndex].Attributes == System.IO.FileAttributes.Directory)
{
//Console.WriteLine("Searching directory " + arrfsiEntities[iIndex].FullName);
myStack.Push(arrfsiEntities[iIndex].FullName);
}
else
{
//Console.WriteLine("{0}", arrfsiEntities[iIndex].FullName);
}
} // Next iIndex
dirInfo = null;
Array.Clear(arrfsiEntities, 0, arrfsiEntities.Length);
// Dont try to do move the next line in the loop while/until statement, null reference when pop on an empty stack...
if (myStack.Count == 0)
break;
dirInfo = new System.IO.DirectoryInfo(myStack.Pop().ToString());
arrfsiEntities = dirInfo.GetFileSystemInfos();
iIndex = 0;
iMaxEntities = arrfsiEntities.Length - 1;
}
while (true);
return true;
} // End Function IterativeSearch1
//Recursive File and Folder Listing VB.NET
public static bool RecursiveSearch(string path)
{
System.IO.DirectoryInfo dirInfo = new System.IO.DirectoryInfo(path);
//System.IO.FileSystemInfo fileObject = default(System.IO.FileSystemInfo);
foreach (System.IO.FileSystemInfo fsiThisEntityInfo in dirInfo.GetFileSystemInfos())
{
if (fsiThisEntityInfo.Attributes == System.IO.FileAttributes.Directory)
{
//Console.WriteLine("Searching directory " + fsiThisEntityInfo.FullName);
RecursiveSearch(fsiThisEntityInfo.FullName);
}
else
{
//Console.WriteLine(fsiThisEntityInfo.FullName);
}
} // Next fiThisFileInfo
return true;
} // End Function RecursiveSearch
// http://forums.asp.net/p/1414929/3116196.aspx
public class TimeFrame
{
public DateTime dtStartTime;
public DateTime dtEndTime;
public TimeFrame(DateTime dtStart, DateTime dtEnd)
{
this.dtStartTime = dtStart;
this.dtEndTime = dtEnd;
} // End Constructor
} // End Class TimeFrame
// Small amount of files
// Iter1 Iter2 Recurs.
// Median 1206.231 3910.367 1232.4079
// Average 1216.431647 3940.147975 1239.092354
// Minimum 1172.5827 3832.0984 1201.2308
// Maximum 1393.4091 4400.4237 1440.3386
public static System.Data.DataTable TestStrategies(string strDirectoryToSearch)
{
System.Data.DataTable dt = new System.Data.DataTable();
System.Collections.Generic.Dictionary dictResults = new System.Collections.Generic.Dictionary();
dt.Columns.Add("TestRun", typeof(string));
dt.Columns.Add("IterativeSearch1", typeof(double));
dt.Columns.Add("IterativeSearch2", typeof(double));
dt.Columns.Add("RecursiveSearch", typeof(double));
System.Data.DataRow dr = null;
System.Collections.Generic.Dictionary dictPerformance = null;
for (int i = 0; i < 100; ++i)
{
dr = dt.NewRow();
dr["TestRun"] = i + 1;
dictPerformance = new System.Collections.Generic.Dictionary();
DateTime startTime;
DateTime endTime;
Console.WriteLine("*********************************************************");
startTime = DateTime.Now;
IterativeSearch1(strDirectoryToSearch);
endTime = DateTime.Now;
dictPerformance.Add("IterativeSearch1", new TimeFrame(startTime, endTime));
Console.WriteLine("*********************************************************");
startTime = DateTime.Now;
IterativeSearch2(strDirectoryToSearch);
endTime = DateTime.Now;
dictPerformance.Add("IterativeSearch2", new TimeFrame(startTime, endTime));
Console.WriteLine("*********************************************************");
startTime = DateTime.Now;
RecursiveSearch(strDirectoryToSearch);
endTime = DateTime.Now;
dictPerformance.Add("RecursiveSearch", new TimeFrame(startTime, endTime));
Console.WriteLine("*********************************************************");
string strResult = "";
foreach (string strKey in dictPerformance.Keys)
{
TimeSpan elapsedTime = dictPerformance[strKey].dtEndTime - dictPerformance[strKey].dtStartTime;
dr[strKey] = elapsedTime.TotalMilliseconds;
strResult += strKey + ": " + elapsedTime.TotalMilliseconds.ToString() + Environment.NewLine;
} // Next
//Console.WriteLine(strResult);
dictResults.Add(i, strResult);
dt.Rows.Add(dr);
} // Next i
foreach(int iMeasurement in dictResults.Keys)
{
Console.WriteLine("Measurement " + iMeasurement.ToString());
Console.WriteLine(dictResults[iMeasurement]);
Console.WriteLine(Environment.NewLine);
} // Next iMeasurement
double[] adblIterSearch1 = dt
.AsEnumerable()
.Select(row => row.Field("IterativeSearch1"))
.ToArray();
double[] adblIterSearch2 = dt
.AsEnumerable()
.Select(row => row.Field("IterativeSearch2"))
.ToArray();
double[] adblRecursiveSearch = dt
.AsEnumerable()
.Select(row => row.Field("RecursiveSearch"))
.ToArray();
dr = dt.NewRow();
dr["TestRun"] = "Median";
dr["IterativeSearch1"] = Median(adblIterSearch1);
dr["IterativeSearch2"] = Median(adblIterSearch2);
dr["RecursiveSearch"] = Median(adblRecursiveSearch);
dt.Rows.Add(dr);
dr = dt.NewRow();
dr["TestRun"] = "Average";
dr["IterativeSearch1"] = dt.Compute("Avg(IterativeSearch1)", string.Empty);
dr["IterativeSearch2"] = dt.Compute("Avg(IterativeSearch2)", string.Empty);
dr["RecursiveSearch"] = dt.Compute("Avg(RecursiveSearch)", string.Empty);
dt.Rows.Add(dr);
dr = dt.NewRow();
dr["TestRun"] = "Minimum ";
dr["IterativeSearch1"] = dt.Compute("Min(IterativeSearch1)", string.Empty);
dr["IterativeSearch2"] = dt.Compute("Min(IterativeSearch2)", string.Empty);
dr["RecursiveSearch"] = dt.Compute("Min(RecursiveSearch)", string.Empty);
dt.Rows.Add(dr);
dr = dt.NewRow();
dr["TestRun"] = "Maximum ";
dr["IterativeSearch1"] = dt.Compute("Max(IterativeSearch1)", string.Empty);
dr["IterativeSearch2"] = dt.Compute("Max(IterativeSearch2)", string.Empty);
dr["RecursiveSearch"] = dt.Compute("Max(RecursiveSearch)", string.Empty);
dt.Rows.Add(dr);
return dt;
} // End Sub TestMain
public static double Median(T[] numbers)
{
int numberCount = numbers.Count();
if (numberCount == 0)
return 0.0;
int halfIndex = numbers.Count() / 2;
var sortedNumbers = numbers.OrderBy(n => n);
double median;
if ((numberCount % 2) == 0)
{
median =
(
(
System.Convert.ToDouble(sortedNumbers.ElementAt(halfIndex)) +
System.Convert.ToDouble(sortedNumbers.ElementAt((halfIndex - 1))
)
) / 2);
}
else
{
median = System.Convert.ToDouble(sortedNumbers.ElementAt(halfIndex));
}
return median;
} // End Function GetMedian
// http://msmvps.com/blogs/deborahk/archive/2010/05/07/linq-mean-median-and-mode.aspx
public static double CalcMedian(int[] numbers)
{
int numberCount = numbers.Count();
int halfIndex = numbers.Count() / 2;
var sortedNumbers = numbers.OrderBy(n => n);
double median;
if ((numberCount % 2) == 0)
{
median = ((sortedNumbers.ElementAt(halfIndex) +
sortedNumbers.ElementAt((halfIndex - 1))) / 2);
}
else
{
median = sortedNumbers.ElementAt(halfIndex);
}
return median;
} // End Function CalcMedian
} // End Class SearchStrategy
} // End Namespace IterativeDirectoryCSharp