File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change 66
66
67
67
if TYPE_CHECKING :
68
68
import xgrammar as xgr
69
+ import xgrammar .kernels .apply_token_bitmask_inplace_torch_compile as xgr_torch_compile # noqa: E501
69
70
70
71
from vllm .model_executor .model_loader .tensorizer import TensorizerConfig
71
72
from vllm .v1 .core .sched .output import SchedulerOutput
72
73
else :
73
74
xgr = LazyLoader ("xgr" , globals (), "xgrammar" )
75
+ xgr_torch_compile = LazyLoader (
76
+ "xgr_torch_compile" , globals (),
77
+ "xgrammar.kernels.apply_token_bitmask_inplace_torch_compile" )
74
78
75
79
logger = init_logger (__name__ )
76
80
@@ -1103,7 +1107,10 @@ def apply_grammar_bitmask(
1103
1107
# so we receive it in that format.
1104
1108
grammar_bitmask = torch .from_numpy (grammar_bitmask )
1105
1109
1106
- xgr .apply_token_bitmask_inplace (
1110
+ # Force use of the torch.compile implementation from xgrammar to work
1111
+ # around issues with the Triton kernel in concurrent structured output
1112
+ # scenarios. See PR #19565 and issues #19493, #18376 for details.
1113
+ xgr_torch_compile .apply_token_bitmask_inplace_torch_compile (
1107
1114
logits ,
1108
1115
grammar_bitmask .to (self .device , non_blocking = True ),
1109
1116
indices = out_indices ,
You can’t perform that action at this time.
0 commit comments