prepare_dcp_local_seq_lens(
dcp_local_seq_lens: Tensor,
seq_lens: Tensor,
num_reqs: int,
dcp_size: int,
dcp_rank: int,
cp_interleave: int,
) -> None
Populate the persistent DCP local seq_lens buffer (CUDA graph safe).
Source code in vllm/v1/worker/gpu/cp_utils.py
| def prepare_dcp_local_seq_lens(
dcp_local_seq_lens: torch.Tensor,
seq_lens: torch.Tensor,
num_reqs: int,
dcp_size: int,
dcp_rank: int,
cp_interleave: int,
) -> None:
"""Populate the persistent DCP local seq_lens buffer (CUDA graph safe)."""
if dcp_size == 1:
return
max_num_reqs = dcp_local_seq_lens.shape[0]
BLOCK_SIZE = 128
num_blocks = triton.cdiv(max_num_reqs, BLOCK_SIZE)
_dcp_local_seq_lens_kernel[(num_blocks,)](
dcp_local_seq_lens,
seq_lens,
dcp_size,
dcp_rank,
cp_interleave,
num_reqs,
max_num_reqs,
BLOCK_SIZE,
)
|