I want to get the folder size for all my team folders. I am excecuting the following code in python using dropbox SDK :
import dropbox
import dropbox.team
import time
from concurrent.futures import ThreadPoolExecutor
from logging_config import logger
def team_folder(dbx_team, member_id_admin, save_client😞
start_time = time.time()
logger.info('Executing extraction of team folders')
result = dbx_team.team_team_folder_list()
team_folders = result.team_folders
folder_details = []
for folder in team_folders:
folder_info = {
"ID": folder.team_folder_id,
"Name": folder.name
}
folder_details.append(folder_info)
#save as json block
logger.info("Team folder details have been saved to team_folder.json")
end_time = time.time()
logger.info("Finished team_folder method in %.2f seconds", end_time - start_time)
list_all_contents_teamFolders(folder_details, dbx_team, member_id_admin, save_client)
def handle_listing_result(result, namespace😞
"""Processes each page of file/folder entries.
Refer to the documentation for information on how to use these entries:
teamFolder_files = []
for entry in result.entries:
entry_size = getattr(entry, 'size', 0) # Use '0' if size attribute is not present
entry_parent_shared_folder_id = getattr(entry, 'parent_shared_folder_id', '')
entry_dict = {
"type": str(type(entry)),
"path": entry.path_lower,
'size': entry_size,
'parent_shared_folder_id': entry_parent_shared_folder_id,
'namespace_id': namespace
}
teamFolder_files.append(entry_dict)
return teamFolder_files
def process_namespace(dbx_admin_with_ns, namespace_id, save_client😞
logger.info(f"Processing namespace: {namespace_id}")
teamFolder_files = []
try:
listing_result = dbx_admin_with_ns.files_list_folder(
path="",
recursive=True,
include_media_info=False,
include_mounted_folders=True
)
teamFolder_files = handle_listing_result(listing_result, namespace_id)
while listing_result.has_more:
listing_result = dbx_admin_with_ns.files_list_folder_continue(cursor=listing_result.cursor)
teamFolder_files.extend(handle_listing_result(listing_result, namespace_id))
path_to_save = 'teamFolderSpace/' + namespace_id + '_pathFilesTeamFolder.json'
logger.info(f"Uploading files to blob: {path_to_save}")
.....save block.......
logger.info(f"Upload completed: {path_to_save}")
logger.info(f"Finished processing namespace: {namespace_id}")
return teamFolder_files
except dropbox.exceptions.InternalServerError as e:
return logger.error(f"An unexpected error occurred: {e}")
except Exception as e:
return logger.error(f"An unexpected error occurred: {e}")
def list_all_contents_teamFolders(folders_id, dbx_team, member_id_admin, blob_client😞
start_time = time.time()
start_position=0
namespace_ids = [folder['ID'] for folder in folders_id]
with ThreadPoolExecutor(max_workers=5) as executor: # Adjust max_workers as needed
futures = []
for i, namespace_id in enumerate(namespace_ids[start_position:], start=0😞
logger.info(f'Submitting folder {i + 1}/{len(namespace_ids)} with id {namespace_id}')
future = executor.submit(process_namespace, dbx_admin.with_path_root(dropbox.common.PathRoot.namespace_id(namespace_id)), namespace_id, blob_client)
futures.append(future)
.......save block............
end_time = time.time()
logger.info("Finished team_folder method in %.2f seconds", end_time - start_time)
return "Finished team_folder method"
However the excecuting time is extremely long.
How can I get the same more efficiently?