file_downloader.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import os
  2. import time
  3. import requests
  4. from tools.log import logger
  5. from tools.utils import Utils
  6. class FileDownloader:
  7. def __init__(self, url, local_filename, max_retries=1):
  8. self.url = url
  9. self.local_filename = local_filename
  10. self.max_retries = max_retries
  11. self.chunk_size = 8192 # 每块数据大小 (8 KB)
  12. self.timer = Utils.Timer()
  13. self.last_logged_time = 0
  14. self.last_logged_size = 0
  15. self.total_size = 0
  16. def download(self, resume=True):
  17. retry_count = 0
  18. retry_delay = 2
  19. while retry_count <= self.max_retries:
  20. try:
  21. return self._download(resume)
  22. except requests.RequestException as e:
  23. retry_count += 1
  24. logger.error(f"Download failed: {e}. Retrying {retry_count}/{self.max_retries} after {retry_delay}s...")
  25. time.sleep(retry_delay)
  26. retry_delay *= 2 # 指数退避
  27. raise Exception(f"Failed to download {self.url} after {self.max_retries} retries")
  28. def _download(self, resume):
  29. resume_header = {}
  30. downloaded_size = 0
  31. # 检查是否需要断点续传
  32. if resume and os.path.exists(self.local_filename):
  33. downloaded_size = os.path.getsize(self.local_filename)
  34. resume_header = {'Range': f'bytes={downloaded_size}-'}
  35. logger.info(f"Resuming download from byte: {downloaded_size}")
  36. else:
  37. logger.info("Starting new download.")
  38. # 发起请求
  39. with requests.get(self.url, headers=resume_header, stream=True, timeout=30) as response:
  40. response.raise_for_status()
  41. self.total_size = int(response.headers.get('Content-Length', 0)) + downloaded_size
  42. with open(self.local_filename, 'ab') as f:
  43. start_time = time.time()
  44. previous_time = start_time
  45. for chunk in response.iter_content(chunk_size=self.chunk_size):
  46. if not chunk: # 忽略空数据块
  47. continue
  48. f.write(chunk)
  49. downloaded_size += len(chunk)
  50. current_time = time.time()
  51. time_elapsed = int(current_time - previous_time)
  52. # 每隔 1 秒更新日志
  53. if time_elapsed >= 1 or self.timer.timer("download", 5):
  54. self._log_progress(downloaded_size, self.total_size, start_time, previous_time)
  55. previous_time = current_time
  56. logger.info(f"Download completed: {self.local_filename}")
  57. def _log_progress(self, downloaded_size, total_size, start_time, previous_time):
  58. """
  59. 输出下载进度日志,包含当前进度和下载速度。
  60. :param downloaded_size: 已下载大小(字节)
  61. :param total_size: 文件总大小(字节)
  62. :param start_time: 下载开始时间
  63. :param previous_time: 上一次记录日志的时间
  64. """
  65. percent = (downloaded_size / total_size) * 100
  66. elapsed_time = time.time() - start_time
  67. interval_time = time.time() - previous_time
  68. # 计算平均速度和瞬时速度
  69. average_speed = downloaded_size / elapsed_time if elapsed_time > 0 else 0
  70. instant_speed = (downloaded_size - self.last_logged_size) / interval_time if interval_time > 0 else 0
  71. # 转换为 MB/s
  72. average_speed_mb = average_speed / 1024 / 1024
  73. instant_speed_mb = instant_speed / 1024 / 1024
  74. logger.info(f"{os.path.basename(self.local_filename)} - Downloaded: "
  75. f"{downloaded_size / 1024 / 1024:.1f}MB of {total_size / 1024 / 1024:.1f}MB "
  76. f"({percent:.2f}%) | {instant_speed_mb:.2f} MB/s")
  77. # 更新记录的时间和大小
  78. self.last_logged_time = time.time()
  79. self.last_logged_size = downloaded_size