From abc36edd519474f5375b70030f070f871d032aa9 Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sat, 19 Jul 2025 10:49:56 +0800
Subject: [PATCH 1/5] add chat doc in quick start

---
 docs/getting_started/quickstart.md | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 74235db16a15..6de52c0912ec 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -98,6 +98,41 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
+!!! note
+    The generate method does not automatically apply the corresponding model's chat template to the input prompt, as this method is designed to align with OpenAI's `completions` interface rather than the `chat/completions` interface. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the LLM.chat method and pass properly formatted data.
+
+    ```python
+    # Using tokenizer to apply chat template
+    from transformers import AutoTokenizer
+
+    tokenizer = AutoTokenizer.from_pretrained("/path/to/chat_model")
+    messages_list = [
+        [{"role": "user", "content": prompt}]
+        for prompt in prompts
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    
+    # Generate outputs
+    outputs = llm.generate([text], sampling_params)
+    
+    # Print the outputs.
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+    # Using chat interface.
+    outputs = llm.chat(messages_list, sampling_params)
+    for idx, output in enumerate(outputs):
+        prompt = messages_list[idx][0]["content"]
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    ```
+
 [](){ #quickstart-online }
 
 ## OpenAI-Compatible Server

From f78e708d4811f88dda03a2c00f2f65d9e4e8bfcd Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sat, 19 Jul 2025 10:56:31 +0800
Subject: [PATCH 2/5] Update quickstart.md

---
 docs/getting_started/quickstart.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 6de52c0912ec..1e1f68b20801 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -110,14 +110,14 @@ for output in outputs:
         [{"role": "user", "content": prompt}]
         for prompt in prompts
     ]
-    text = tokenizer.apply_chat_template(
-        messages,
+    texts = tokenizer.apply_chat_template(
+        messages_list,
         tokenize=False,
         add_generation_prompt=True,
     )
     
     # Generate outputs
-    outputs = llm.generate([text], sampling_params)
+    outputs = llm.generate(texts, sampling_params)
     
     # Print the outputs.
     for output in outputs:
@@ -128,7 +128,7 @@ for output in outputs:
     # Using chat interface.
     outputs = llm.chat(messages_list, sampling_params)
     for idx, output in enumerate(outputs):
-        prompt = messages_list[idx][0]["content"]
+        prompt = prompts[idx]
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
     ```

From 0f57d6f1b8c5915090fd2d8c7133be2390c8803c Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sun, 3 Aug 2025 22:29:12 +0800
Subject: [PATCH 3/5] Update docs/getting_started/quickstart.md

Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
---
 docs/getting_started/quickstart.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 1e1f68b20801..dbea09068a45 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -99,7 +99,10 @@ for output in outputs:
 ```
 
 !!! note
-    The generate method does not automatically apply the corresponding model's chat template to the input prompt, as this method is designed to align with OpenAI's `completions` interface rather than the `chat/completions` interface. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the LLM.chat method and pass properly formatted data.
+    The `llm.generate` method does not automatically apply the model's chat template to the input prompt. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the `llm.chat` method and pass a list of messages which have the same format as those passed to OpenAI's `client.chat.completions`:
+``
+
+For quickstart, there is no need to provide much explanation.
 
     ```python
     # Using tokenizer to apply chat template

From c177d44196c888f70d4a0441b9e4438c16e50671 Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sun, 3 Aug 2025 22:34:17 +0800
Subject: [PATCH 4/5] add ??? code

---
 docs/getting_started/quickstart.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index dbea09068a45..172b1e13fe8e 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -100,9 +100,8 @@ for output in outputs:
 
 !!! note
     The `llm.generate` method does not automatically apply the model's chat template to the input prompt. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the `llm.chat` method and pass a list of messages which have the same format as those passed to OpenAI's `client.chat.completions`:
-``
 
-For quickstart, there is no need to provide much explanation.
+??? code
 
     ```python
     # Using tokenizer to apply chat template

From 83b6929aae2e4eabff026655d91e81551eadc0f8 Mon Sep 17 00:00:00 2001
From: TankNee <nee@tanknee.cn>
Date: Sun, 3 Aug 2025 22:38:23 +0800
Subject: [PATCH 5/5] Update quickstart.md

---
 docs/getting_started/quickstart.md | 62 +++++++++++++++---------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
index 172b1e13fe8e..d7ff8783398e 100644
--- a/docs/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -101,39 +101,39 @@ for output in outputs:
 !!! note
     The `llm.generate` method does not automatically apply the model's chat template to the input prompt. Therefore, if you are using an Instruct model or Chat model, you should manually apply the corresponding chat template to ensure the expected behavior. Alternatively, you can use the `llm.chat` method and pass a list of messages which have the same format as those passed to OpenAI's `client.chat.completions`:
 
-??? code
-
-    ```python
-    # Using tokenizer to apply chat template
-    from transformers import AutoTokenizer
-
-    tokenizer = AutoTokenizer.from_pretrained("/path/to/chat_model")
-    messages_list = [
-        [{"role": "user", "content": prompt}]
-        for prompt in prompts
-    ]
-    texts = tokenizer.apply_chat_template(
-        messages_list,
-        tokenize=False,
-        add_generation_prompt=True,
-    )
+    ??? code
     
-    # Generate outputs
-    outputs = llm.generate(texts, sampling_params)
+        ```python
+        # Using tokenizer to apply chat template
+        from transformers import AutoTokenizer
     
-    # Print the outputs.
-    for output in outputs:
-        prompt = output.prompt
-        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-    # Using chat interface.
-    outputs = llm.chat(messages_list, sampling_params)
-    for idx, output in enumerate(outputs):
-        prompt = prompts[idx]
-        generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-    ```
+        tokenizer = AutoTokenizer.from_pretrained("/path/to/chat_model")
+        messages_list = [
+            [{"role": "user", "content": prompt}]
+            for prompt in prompts
+        ]
+        texts = tokenizer.apply_chat_template(
+            messages_list,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        
+        # Generate outputs
+        outputs = llm.generate(texts, sampling_params)
+        
+        # Print the outputs.
+        for output in outputs:
+            prompt = output.prompt
+            generated_text = output.outputs[0].text
+            print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    
+        # Using chat interface.
+        outputs = llm.chat(messages_list, sampling_params)
+        for idx, output in enumerate(outputs):
+            prompt = prompts[idx]
+            generated_text = output.outputs[0].text
+            print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+        ```
 
 [](){ #quickstart-online }