From 94d26d0ddbaac65d0b2ac436986aa2aa3bff3eee Mon Sep 17 00:00:00 2001 From: LiaMath Date: Tue, 16 Jun 2026 12:47:11 +0900 Subject: [PATCH] Fix CUDA synchronization bottleneck in LCMScheduler by maintaining CPU timesteps --- src/diffusers/schedulers/scheduling_lcm.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/diffusers/schedulers/scheduling_lcm.py b/src/diffusers/schedulers/scheduling_lcm.py index e70bc4745a24..191e0d50d0ed 100644 --- a/src/diffusers/schedulers/scheduling_lcm.py +++ b/src/diffusers/schedulers/scheduling_lcm.py @@ -523,6 +523,7 @@ def set_timesteps( timesteps = lcm_origin_timesteps[inference_indices] self.timesteps = torch.from_numpy(timesteps).to(device=device, dtype=torch.long) + self.cpu_timesteps = self.timesteps.to("cpu") self._step_index = None self._begin_index = None @@ -573,20 +574,23 @@ def step( # 1. get previous step value prev_step_index = self.step_index + 1 - if prev_step_index < len(self.timesteps): - prev_timestep = self.timesteps[prev_step_index] + current_cpu_timestep = self.cpu_timesteps[self.step_index] + if prev_step_index < len(self.cpu_timesteps): + prev_cpu_timestep = self.cpu_timesteps[prev_step_index] else: - prev_timestep = timestep + prev_cpu_timestep = current_cpu_timestep # 2. compute alphas, betas - alpha_prod_t = self.alphas_cumprod[timestep] - alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + alpha_prod_t = self.alphas_cumprod[current_cpu_timestep] + alpha_prod_t_prev = ( + self.alphas_cumprod[prev_cpu_timestep] if prev_cpu_timestep >= 0 else self.final_alpha_cumprod + ) beta_prod_t = 1 - alpha_prod_t beta_prod_t_prev = 1 - alpha_prod_t_prev # 3. Get scalings for boundary conditions - c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(timestep) + c_skip, c_out = self.get_scalings_for_boundary_condition_discrete(current_cpu_timestep) # 4. Compute the predicted original sample x_0 based on the model parameterization if self.config.prediction_type == "epsilon": # noise-prediction