This is a commonly asked question.
The scenario is:-
folderA____ folderA1____folderA1a
\\____folderA2____folderA2a
\\___fo
Sharing a Python solution to the excellent Alternative 3 by @pinoyyid, above, in case it's useful to anyone. I'm not a developer so it's probably hopelessly un-pythonic... but it works, only makes 2 API calls, and is pretty quick.
'' in parents
segment per subfolder found.Interestingly, Google Drive seems to have a hard limit of 599 '
segments per query, so if your folder-to-search has more subfolders than this, you need to chunk the list.
FOLDER_TO_SEARCH = '123456789' # ID of folder to search
DRIVE_ID = '654321' # ID of shared drive in which it lives
MAX_PARENTS = 500 # Limit set safely below Google max of 599 parents per query.
def get_all_folders_in_drive():
"""
Return a dictionary of all the folder IDs in a drive mapped to their parent folder IDs (or to the
drive itself if a top-level folder). That is, flatten the entire folder structure.
"""
folders_in_drive_dict = {}
page_token = None
max_allowed_page_size = 1000
just_folders = "trashed = false and mimeType = 'application/vnd.google-apps.folder'"
while True:
results = drive_api_ref.files().list(
pageSize=max_allowed_page_size,
fields="nextPageToken, files(id, name, mimeType, parents)",
includeItemsFromAllDrives=True, supportsAllDrives=True,
corpora='drive',
driveId=DRIVE_ID,
pageToken=page_token,
q=just_folders).execute()
folders = results.get('files', [])
page_token = results.get('nextPageToken', None)
for folder in folders:
folders_in_drive_dict[folder['id']] = folder['parents'][0]
if page_token is None:
break
return folders_in_drive_dict
def get_subfolders_of_folder(folder_to_search, all_folders):
"""
Yield subfolders of the folder-to-search, and then subsubfolders etc. Must be called by an iterator.
:param all_folders: The dictionary returned by :meth:`get_all_folders_in-drive`.
"""
temp_list = [k for k, v in all_folders.items() if v == folder_to_search] # Get all subfolders
for sub_folder in temp_list: # For each subfolder...
yield sub_folder # Return it
yield from get_subfolders_of_folder(sub_folder, all_folders) # Get subsubfolders etc
def get_relevant_files(self, relevant_folders):
"""
Get files under the folder-to-search and all its subfolders.
"""
relevant_files = {}
chunked_relevant_folders_list = [relevant_folders[i:i + MAX_PARENTS] for i in
range(0, len(relevant_folders), MAX_PARENTS)]
for folder_list in chunked_relevant_folders_list:
query_term = ' in parents or '.join('"{0}"'.format(f) for f in folder_list) + ' in parents'
relevant_files.update(get_all_files_in_folders(query_term))
return relevant_files
def get_all_files_in_folders(self, parent_folders):
"""
Return a dictionary of file IDs mapped to file names for the specified parent folders.
"""
files_under_folder_dict = {}
page_token = None
max_allowed_page_size = 1000
just_files = f"mimeType != 'application/vnd.google-apps.folder' and trashed = false and ({parent_folders})"
while True:
results = drive_api_ref.files().list(
pageSize=max_allowed_page_size,
fields="nextPageToken, files(id, name, mimeType, parents)",
includeItemsFromAllDrives=True, supportsAllDrives=True,
corpora='drive',
driveId=DRIVE_ID,
pageToken=page_token,
q=just_files).execute()
files = results.get('files', [])
page_token = results.get('nextPageToken', None)
for file in files:
files_under_folder_dict[file['id']] = file['name']
if page_token is None:
break
return files_under_folder_dict
if __name__ == "__main__":
all_folders_dict = get_all_folders_in_drive() # Flatten folder structure
relevant_folders_list = [FOLDER_TO_SEARCH] # Start with the folder-to-archive
for folder in get_subfolders_of_folder(FOLDER_TO_SEARCH, all_folders_dict):
relevant_folders_list.append(folder) # Recursively search for subfolders
relevant_files_dict = get_relevant_files(relevant_folders_list) # Get the files