From 7fa39b2eeffe9d16edbe138c0006773887cdd063 Mon Sep 17 00:00:00 2001 From: trigaten Date: Sun, 11 Dec 2022 10:24:47 -0500 Subject: [PATCH 01/16] feat: placeholder --- docs/ape/_category_.json | 8 ++++++++ docs/ape/ape.md | 5 +++++ docs/ape/overview.md | 7 +++++++ 3 files changed, 20 insertions(+) create mode 100644 docs/ape/_category_.json create mode 100644 docs/ape/ape.md create mode 100644 docs/ape/overview.md diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json new file mode 100644 index 00000000000..5ac8575b961 --- /dev/null +++ b/docs/ape/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "⚙️ Automated Prompt Engineering", + "position": 30, + "link": { + "type": "generated-index", + "description": "Methods that automate prompt engineering" + } +} diff --git a/docs/ape/ape.md b/docs/ape/ape.md new file mode 100644 index 00000000000..0dab82bbe3e --- /dev/null +++ b/docs/ape/ape.md @@ -0,0 +1,5 @@ +--- +sidebar_position: 1 +--- + +# APE diff --git a/docs/ape/overview.md b/docs/ape/overview.md new file mode 100644 index 00000000000..9b2d4ce9dc8 --- /dev/null +++ b/docs/ape/overview.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 1 +--- + +# Overview + +Can prompt engineering really be automated? Sometimes. \ No newline at end of file From e7233119f68cf9365032bd4ad501cf561befed1f Mon Sep 17 00:00:00 2001 From: trigaten Date: Sun, 11 Dec 2022 10:50:00 -0500 Subject: [PATCH 02/16] feat: more stubs --- docs/ape/autoprompt.md | 5 +++++ docs/ape/tempera.md | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 docs/ape/autoprompt.md create mode 100644 docs/ape/tempera.md diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md new file mode 100644 index 00000000000..517acb7ab47 --- /dev/null +++ b/docs/ape/autoprompt.md @@ -0,0 +1,5 @@ +--- +sidebar_position: 20 +--- + +# Autoprompt diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md new file mode 100644 index 00000000000..6739b9384b0 --- /dev/null +++ b/docs/ape/tempera.md @@ -0,0 +1,5 @@ +--- +sidebar_position: 30 +--- + +# TEMPERA From d9997c550ef7593c0824eba7e5ae0a5afd1f5560 Mon Sep 17 00:00:00 2001 From: trigaten Date: Mon, 12 Dec 2022 16:53:36 -0500 Subject: [PATCH 03/16] feat: ape stuff --- bibliography.bib | 9 +++++++++ docs/ape/_category_.json | 4 ++-- docs/ape/ape.md | 42 +++++++++++++++++++++++++++++++++++++++- docs/ape/autoprompt.md | 2 +- docs/ape/overview.md | 2 +- docs/ape/tempera.md | 2 +- docs/bibliography.md | 4 +++- 7 files changed, 58 insertions(+), 7 deletions(-) diff --git a/bibliography.bib b/bibliography.bib index 192c6874fde..999c6264047 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -164,6 +164,15 @@ @misc{zhou2022large primaryClass={cs.LG} } +@misc{zhang2022tempera, + title={TEMPERA: Test-Time Prompting via Reinforcement Learning}, + author={Tianjun Zhang and Xuezhi Wang and Denny Zhou and Dale Schuurmans and Joseph E. Gonzalez}, + year={2022}, + eprint={2211.11890}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} + % Models % Language Models diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json index 5ac8575b961..edc24a74e4d 100644 --- a/docs/ape/_category_.json +++ b/docs/ape/_category_.json @@ -1,6 +1,6 @@ { - "label": "⚙️ Automated Prompt Engineering", - "position": 30, + "label": "⚙️ Automated Prompting", + "position": 35, "link": { "type": "generated-index", "description": "Methods that automate prompt engineering" diff --git a/docs/ape/ape.md b/docs/ape/ape.md index 0dab82bbe3e..27005dd9a1e 100644 --- a/docs/ape/ape.md +++ b/docs/ape/ape.md @@ -1,5 +1,45 @@ --- -sidebar_position: 1 +sidebar_position: 10 --- # APE + +Automatic Prompt Engineering (APE)(@zhou2022large) is an approach to automating the generation and +selection of prompts. The basic idea of APE is to give a LLM a prompt containing +a few shot exemplars, and ask it generate a prompt that would create these exemplars. + +## Example + +For example, if we give the LLM the following prompt: + +``` +Is a banana a fruit? +Yes +Is a tomato a fruit? +No +Is a fish a fruit? +No + +What would be a good prompt to generate an answer to the above questions? + +``` + +
+banana
+Yes
+
+tomato
+No
+
+fish
+No
+
+watermelon
+Yes
+
+What would be a good prompt to generate an answer to the above questions?
+
+    
+    Is the following item a fruit:
+    
+
\ No newline at end of file diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md index 517acb7ab47..84b78e20143 100644 --- a/docs/ape/autoprompt.md +++ b/docs/ape/autoprompt.md @@ -2,4 +2,4 @@ sidebar_position: 20 --- -# Autoprompt +# Autoprompt(@shin2020autoprompt) \ No newline at end of file diff --git a/docs/ape/overview.md b/docs/ape/overview.md index 9b2d4ce9dc8..fd1606056e3 100644 --- a/docs/ape/overview.md +++ b/docs/ape/overview.md @@ -1,5 +1,5 @@ --- -sidebar_position: 1 +sidebar_position: 0 --- # Overview diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md index 6739b9384b0..fae035c07ed 100644 --- a/docs/ape/tempera.md +++ b/docs/ape/tempera.md @@ -2,4 +2,4 @@ sidebar_position: 30 --- -# TEMPERA +# TEMPERA(@zhang2022tempera) \ No newline at end of file diff --git a/docs/bibliography.md b/docs/bibliography.md index 162b5d59056..b43f47c10f7 100644 --- a/docs/bibliography.md +++ b/docs/bibliography.md @@ -46,7 +46,9 @@ cite them as such. #### AutoPrompt(@shin2020autoprompt) 🔵 -#### Automatic Prompt Engineer(@zhou2022large) +#### Automatic Prompt Engineer(@zhou2022large) 🔵 + +#### TEMPERA(@zhang2022tempera) 🔵 ## Models From 029401597450a1514dce5a45c3af6b9670e54f4a Mon Sep 17 00:00:00 2001 From: trigaten Date: Sat, 17 Dec 2022 16:34:44 -0500 Subject: [PATCH 04/16] feat: more --- docs/ape/autoprompt.md | 4 +++- docs/ape/tempera.md | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md index 84b78e20143..718dc261b0f 100644 --- a/docs/ape/autoprompt.md +++ b/docs/ape/autoprompt.md @@ -2,4 +2,6 @@ sidebar_position: 20 --- -# Autoprompt(@shin2020autoprompt) \ No newline at end of file +# Autoprompt + +Autoprompt(@shin2020autoprompt) \ No newline at end of file diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md index fae035c07ed..5ff15c6e154 100644 --- a/docs/ape/tempera.md +++ b/docs/ape/tempera.md @@ -2,4 +2,20 @@ sidebar_position: 30 --- -# TEMPERA(@zhang2022tempera) \ No newline at end of file +# TEMPERA + +**TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning +(TEMPERA)(@zhang2022tempera) is a method for automatically generating +interpretable prompts. + + + + +For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly. + +For example, Liu et al. (2022) propose to retrieve exemplars from a training pool that are semantically similar to a test example, and show it can significantly boost the performance + + + + +significant gains compared with recent SoTA approaches like prompt tun- ing, AutoPrompt, and RLPrompt \ No newline at end of file From ae510b4dfae8de5b72588c5ccaf4c00bab7d483c Mon Sep 17 00:00:00 2001 From: trigaten Date: Tue, 27 Dec 2022 22:05:11 -0500 Subject: [PATCH 05/16] feat: refactor a bit --- bibliography.bib | 9 +++++++++ docs/ape/ape.md | 14 ++++++-------- docs/ape/{tempera.md => rl.md} | 6 +++++- docs/bibliography.md | 2 ++ 4 files changed, 22 insertions(+), 9 deletions(-) rename docs/ape/{tempera.md => rl.md} (93%) diff --git a/bibliography.bib b/bibliography.bib index aa19008c8b4..43916b58635 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -193,6 +193,15 @@ @misc{zhang2022tempera primaryClass={cs.CL} } +@misc{deng2022rlprompt, + title={RLPrompt: Optimizing Discrete Text Prompts with Reinforcement Learning}, + author={Mingkai Deng and Jianyu Wang and Cheng-Ping Hsieh and Yihan Wang and Han Guo and Tianmin Shu and Meng Song and Eric P. Xing and Zhiting Hu}, + year={2022}, + eprint={2205.12548}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} + % Models % Language Models diff --git a/docs/ape/ape.md b/docs/ape/ape.md index 27005dd9a1e..3cd1d13f908 100644 --- a/docs/ape/ape.md +++ b/docs/ape/ape.md @@ -12,7 +12,7 @@ a few shot exemplars, and ask it generate a prompt that would create these exemp For example, if we give the LLM the following prompt: -``` +```text Is a banana a fruit? Yes Is a tomato a fruit? @@ -21,10 +21,9 @@ Is a fish a fruit? No What would be a good prompt to generate an answer to the above questions? - ``` -
+```text
 banana
 Yes
 
@@ -38,8 +37,7 @@ watermelon
 Yes
 
 What would be a good prompt to generate an answer to the above questions?
-
-    
-    Is the following item a fruit:
-    
-
\ No newline at end of file +// highlight-start +Is the following item a fruit: +// highlight-end +``` \ No newline at end of file diff --git a/docs/ape/tempera.md b/docs/ape/rl.md similarity index 93% rename from docs/ape/tempera.md rename to docs/ape/rl.md index 5ff15c6e154..fc612fbde75 100644 --- a/docs/ape/tempera.md +++ b/docs/ape/rl.md @@ -2,7 +2,11 @@ sidebar_position: 30 --- -# TEMPERA +# Reinforcement Learning + +## RLPrompt + +## TEMPERA **TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning (TEMPERA)(@zhang2022tempera) is a method for automatically generating diff --git a/docs/bibliography.md b/docs/bibliography.md index b054971967c..3a4aa725a09 100644 --- a/docs/bibliography.md +++ b/docs/bibliography.md @@ -54,6 +54,8 @@ cite them as such. #### TEMPERA(@zhang2022tempera) 🔵 +#### RLPrompt(@deng2022rlprompt) + ## Models ### Language Models From bc7e3ed40f5a57c9f1521026c1676212c55c667c Mon Sep 17 00:00:00 2001 From: trigaten Date: Fri, 27 Jan 2023 22:46:46 -0500 Subject: [PATCH 06/16] fix: update citation --- CITATION.cff | 9 +++++++++ README.md | 11 +---------- docs/bibliography.md | 13 +------------ 3 files changed, 11 insertions(+), 22 deletions(-) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000000..591acee2f17 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,9 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Schulhoff" + given-names: "Sander" +- family-names: "Community Contributors" +title: "Learn Prompting" +date-released: 2022-12-01 +url: "https://github.com/trigaten/Learn_Prompting" \ No newline at end of file diff --git a/README.md b/README.md index 15b72a7ab38..f74f8483200 100644 --- a/README.md +++ b/README.md @@ -39,13 +39,4 @@ This command starts a local development server and opens up a browser window. Mo ## Cite -```text -@misc{schulhoff2022learnprompting, - title={Learn Prompting}, - author={Sander Schulhoff and Community Contributors}, - url={https://learnprompting.org} - year={2022}, - month={Dec}, - day={1} -} -``` +Use the provided Github citation in this repository. \ No newline at end of file diff --git a/docs/bibliography.md b/docs/bibliography.md index 676d0f1e911..3472e04f787 100644 --- a/docs/bibliography.md +++ b/docs/bibliography.md @@ -7,18 +7,7 @@ sidebar_position: 1000 The page contains an organized list of all papers used by this course. The papers are organized by topic. -Please cite this resource as: - -```text -@misc{schulhoff2022learnprompting, - title={Learn Prompting}, - author={Sander Schulhoff and Community Contributors}, - url={https://learnprompting.org} - year={2022}, - month={Dec}, - day={1} -} -``` +**To cite this course, use the provided citation in the Github repository.** 🔵 = Paper directly cited in this course. Other papers have informed my understanding of the topic. From e933caa8a4aa51bbcd351c4e0973d5b503ea904e Mon Sep 17 00:00:00 2001 From: trigaten Date: Fri, 27 Jan 2023 23:05:47 -0500 Subject: [PATCH 07/16] refactor: smush trainable into autoprompting section --- docs/ape/_category_.json | 2 +- docs/ape/ape.md | 4 ++-- docs/ape/autoprompt.md | 7 ------- docs/{trainable => ape}/discretized.md | 0 docs/ape/more.md | 7 +++++++ docs/ape/rl.md | 4 ++-- docs/{trainable => ape}/soft_prompting.md | 0 docs/trainable/_category_.json | 8 -------- 8 files changed, 12 insertions(+), 20 deletions(-) delete mode 100644 docs/ape/autoprompt.md rename docs/{trainable => ape}/discretized.md (100%) create mode 100644 docs/ape/more.md rename docs/{trainable => ape}/soft_prompting.md (100%) delete mode 100644 docs/trainable/_category_.json diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json index edc24a74e4d..1c5c44c4be1 100644 --- a/docs/ape/_category_.json +++ b/docs/ape/_category_.json @@ -1,6 +1,6 @@ { "label": "⚙️ Automated Prompting", - "position": 35, + "position": 70, "link": { "type": "generated-index", "description": "Methods that automate prompt engineering" diff --git a/docs/ape/ape.md b/docs/ape/ape.md index 3cd1d13f908..d54f0dfe53b 100644 --- a/docs/ape/ape.md +++ b/docs/ape/ape.md @@ -1,8 +1,8 @@ --- -sidebar_position: 10 +sidebar_position: 1 --- -# APE +# 🟢 APE Automatic Prompt Engineering (APE)(@zhou2022large) is an approach to automating the generation and selection of prompts. The basic idea of APE is to give a LLM a prompt containing diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md deleted file mode 100644 index 718dc261b0f..00000000000 --- a/docs/ape/autoprompt.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -sidebar_position: 20 ---- - -# Autoprompt - -Autoprompt(@shin2020autoprompt) \ No newline at end of file diff --git a/docs/trainable/discretized.md b/docs/ape/discretized.md similarity index 100% rename from docs/trainable/discretized.md rename to docs/ape/discretized.md diff --git a/docs/ape/more.md b/docs/ape/more.md new file mode 100644 index 00000000000..1d94baacb01 --- /dev/null +++ b/docs/ape/more.md @@ -0,0 +1,7 @@ +--- +sidebar_position: 200 +--- + +# More + +Other methods exist, such as Autoprompt(@shin2020autoprompt), which uses gradient based search to build prompts for MLMs. \ No newline at end of file diff --git a/docs/ape/rl.md b/docs/ape/rl.md index fc612fbde75..5961f35a2e9 100644 --- a/docs/ape/rl.md +++ b/docs/ape/rl.md @@ -1,8 +1,8 @@ --- -sidebar_position: 30 +sidebar_position: 130 --- -# Reinforcement Learning +# 🟣 Reinforcement Learning ## RLPrompt diff --git a/docs/trainable/soft_prompting.md b/docs/ape/soft_prompting.md similarity index 100% rename from docs/trainable/soft_prompting.md rename to docs/ape/soft_prompting.md diff --git a/docs/trainable/_category_.json b/docs/trainable/_category_.json deleted file mode 100644 index bba090d936b..00000000000 --- a/docs/trainable/_category_.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "label": "💪 Prompt Tuning", - "position": 70, - "link": { - "type": "generated-index", - "description": "Prompt engineering that you can fine tune with gradients" - } -} From 9de29864f3237dcb2a2594d4aa91e805ee0c436c Mon Sep 17 00:00:00 2001 From: trigaten Date: Sat, 28 Jan 2023 14:22:00 -0500 Subject: [PATCH 08/16] feat: more --- docs/ape/ape.md | 6 +++++- docs/ape/rl.md | 24 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/docs/ape/ape.md b/docs/ape/ape.md index d54f0dfe53b..e713b51ea73 100644 --- a/docs/ape/ape.md +++ b/docs/ape/ape.md @@ -40,4 +40,8 @@ What would be a good prompt to generate an answer to the above questions? // highlight-start Is the following item a fruit: // highlight-end -``` \ No newline at end of file +``` + +## Notes + +Another simple automatic prompt engineering strategy is to simply give GPT-3 your improved and ask GPT-3 to improve it. \ No newline at end of file diff --git a/docs/ape/rl.md b/docs/ape/rl.md index 5961f35a2e9..34a464cfaba 100644 --- a/docs/ape/rl.md +++ b/docs/ape/rl.md @@ -4,6 +4,8 @@ sidebar_position: 130 # 🟣 Reinforcement Learning +This section covers reinforcement learning methods which optimize discrete prompts (not soft prompts).
This is extremely complicated. + ## RLPrompt ## TEMPERA @@ -12,8 +14,30 @@ sidebar_position: 130 (TEMPERA)(@zhang2022tempera) is a method for automatically generating interpretable prompts. +At a high level, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most. + +## Action Space + +TEMPERA is allowed to edit 3 parts of the prompt: + +1) The instruction +2) in-context examples +3) The verbalizers + +## Reward + +They use a reward which consists of the difference of score between a prompt before/after an edit. + +TEMPERA is densely reward, computing a reward for each edit step according to + +## Training + +TEMPERA uses a GPT architecture and is trained with proximal policy optimization. + +They use a reward which consists of the difference of score between a prompt before/after an edit. +can edit instructions, in context exemplars, or verbalizers For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly. From f28aeec2869312aa40c295888c7e7f0625ff59fa Mon Sep 17 00:00:00 2001 From: trigaten Date: Fri, 3 Feb 2023 20:06:51 -0500 Subject: [PATCH 09/16] refactor: rename folder --- docs/{ape => automated_pe}/_category_.json | 0 docs/{ape => automated_pe}/ape.md | 0 docs/{ape => automated_pe}/discretized.md | 0 docs/{ape => automated_pe}/more.md | 0 docs/{ape => automated_pe}/overview.md | 0 docs/{ape => automated_pe}/rl.md | 9 --------- docs/{ape => automated_pe}/soft_prompting.md | 0 7 files changed, 9 deletions(-) rename docs/{ape => automated_pe}/_category_.json (100%) rename docs/{ape => automated_pe}/ape.md (100%) rename docs/{ape => automated_pe}/discretized.md (100%) rename docs/{ape => automated_pe}/more.md (100%) rename docs/{ape => automated_pe}/overview.md (100%) rename docs/{ape => automated_pe}/rl.md (66%) rename docs/{ape => automated_pe}/soft_prompting.md (100%) diff --git a/docs/ape/_category_.json b/docs/automated_pe/_category_.json similarity index 100% rename from docs/ape/_category_.json rename to docs/automated_pe/_category_.json diff --git a/docs/ape/ape.md b/docs/automated_pe/ape.md similarity index 100% rename from docs/ape/ape.md rename to docs/automated_pe/ape.md diff --git a/docs/ape/discretized.md b/docs/automated_pe/discretized.md similarity index 100% rename from docs/ape/discretized.md rename to docs/automated_pe/discretized.md diff --git a/docs/ape/more.md b/docs/automated_pe/more.md similarity index 100% rename from docs/ape/more.md rename to docs/automated_pe/more.md diff --git a/docs/ape/overview.md b/docs/automated_pe/overview.md similarity index 100% rename from docs/ape/overview.md rename to docs/automated_pe/overview.md diff --git a/docs/ape/rl.md b/docs/automated_pe/rl.md similarity index 66% rename from docs/ape/rl.md rename to docs/automated_pe/rl.md index 34a464cfaba..a70f72f0af4 100644 --- a/docs/ape/rl.md +++ b/docs/automated_pe/rl.md @@ -38,12 +38,3 @@ They use a reward which consists of the difference of score between a prompt bef can edit instructions, in context exemplars, or verbalizers - -For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly. - -For example, Liu et al. (2022) propose to retrieve exemplars from a training pool that are semantically similar to a test example, and show it can significantly boost the performance - - - - -significant gains compared with recent SoTA approaches like prompt tun- ing, AutoPrompt, and RLPrompt \ No newline at end of file diff --git a/docs/ape/soft_prompting.md b/docs/automated_pe/soft_prompting.md similarity index 100% rename from docs/ape/soft_prompting.md rename to docs/automated_pe/soft_prompting.md From 307e778f4e6adc3ee2c4a1fae3e24505265a966d Mon Sep 17 00:00:00 2001 From: trigaten Date: Fri, 3 Feb 2023 20:19:45 -0500 Subject: [PATCH 10/16] feat: RLPrompt training note --- docs/automated_pe/rl.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index a70f72f0af4..8e79ca1ffd3 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -8,6 +8,10 @@ This section covers reinforcement learning methods which optimize discrete promp ## RLPrompt + +## Training +Optimize via soft q learning + ## TEMPERA **TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning @@ -35,6 +39,3 @@ TEMPERA is densely reward, computing a reward for each edit step according to TEMPERA uses a GPT architecture and is trained with proximal policy optimization. They use a reward which consists of the difference of score between a prompt before/after an edit. - - -can edit instructions, in context exemplars, or verbalizers From 8d42ea4c635754c3d2db63c1df7b84eea2a0ab49 Mon Sep 17 00:00:00 2001 From: trigaten Date: Fri, 3 Feb 2023 23:46:33 -0500 Subject: [PATCH 11/16] feat: more rlprompt info --- docs/automated_pe/rl.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index 8e79ca1ffd3..9eab8875d33 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -8,6 +8,24 @@ This section covers reinforcement learning methods which optimize discrete promp ## RLPrompt +RLPrompt(@deng2022rlprompt) is a method that takes an input and trains a language model (the policy) +to generate a good prompt for that input. + +More formally, given an input sequence $x$, the policy designs a prompt $z$ by selecting $[z_1, z_2, ..., z_T]$ tokens from the vocabulary sequentially. + +After creating the prompt, it combines it with $x$, and uses another language model to +generate the completion. The LM output of x prompted by z can be described as $y_{LM}(\hat{z}, x)$. + +Then, the policy receives some reward according to this output: $R(y_{LM}(\hat{z}, x))$ + +### Example + +Assuming we have partially trained RLPrompt on classifying movie reviews, and our next +training point example is `x = "I hate this movie."`. RLPrompt will generate a prompt like +`z = "Movie review bad or good:`. Then, it will combine the prompt with the input to get +`x' = "Movie review bad or good: I hate this movie."`. Then, it will use a language model +to generate the completion. Say it generates `bad`. Then, the reward is computed as +`R(y_{LM}(\hat{z}, x))`... ## Training Optimize via soft q learning From 3abb35c22fc93d58f8f4855412e805f1fd05e7de Mon Sep 17 00:00:00 2001 From: trigaten Date: Sat, 4 Feb 2023 00:41:08 -0500 Subject: [PATCH 12/16] feat: add rlprompt training info --- bibliography.bib | 9 +++++++++ docs/automated_pe/rl.md | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bibliography.bib b/bibliography.bib index cd87d659ddf..6155d79f2be 100644 --- a/bibliography.bib +++ b/bibliography.bib @@ -228,6 +228,15 @@ @misc{deng2022rlprompt primaryClass={cs.CL} } +@misc{guo2021efficient, + title={Efficient (Soft) Q-Learning for Text Generation with Limited Good Data}, + author={Han Guo and Bowen Tan and Zhengzhong Liu and Eric P. Xing and Zhiting Hu}, + year={2021}, + eprint={2106.07704}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} + % Models % Language Model Guides diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index 9eab8875d33..485f3259c06 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -25,10 +25,11 @@ training point example is `x = "I hate this movie."`. RLPrompt will generate a p `z = "Movie review bad or good:`. Then, it will combine the prompt with the input to get `x' = "Movie review bad or good: I hate this movie."`. Then, it will use a language model to generate the completion. Say it generates `bad`. Then, the reward is computed as -`R(y_{LM}(\hat{z}, x))`... +`R(y_{LM}(\hat{z}, x))`. Deng et al. do not use a simple 0/1 reward. ## Training -Optimize via soft q learning + +RLPrompt embeds a task specific MLP inside a frozen LM. The MLP is trained with Soft Q Learning(@guo2021efficient). ## TEMPERA From 4307f20327e4aa81d86e63463e8fc9e906a49123 Mon Sep 17 00:00:00 2001 From: trigaten Date: Sat, 4 Feb 2023 00:42:42 -0500 Subject: [PATCH 13/16] feat: tempera v rlprompt note --- docs/automated_pe/rl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index 485f3259c06..5f06c625e0e 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -37,7 +37,7 @@ RLPrompt embeds a task specific MLP inside a frozen LM. The MLP is trained with (TEMPERA)(@zhang2022tempera) is a method for automatically generating interpretable prompts. -At a high level, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most. +At a high level, instead of building a prompt from scratch like RLPrompt, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most. ## Action Space From ef47bbaa9011480cbd3aed0d739cf86741e0bc2b Mon Sep 17 00:00:00 2001 From: trigaten Date: Sun, 5 Feb 2023 00:34:28 -0500 Subject: [PATCH 14/16] fix: typo --- docs/automated_pe/ape.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/automated_pe/ape.md b/docs/automated_pe/ape.md index e713b51ea73..b438314993a 100644 --- a/docs/automated_pe/ape.md +++ b/docs/automated_pe/ape.md @@ -44,4 +44,4 @@ Is the following item a fruit: ## Notes -Another simple automatic prompt engineering strategy is to simply give GPT-3 your improved and ask GPT-3 to improve it. \ No newline at end of file +Another simple automatic prompt engineering strategy is to simply give GPT-3 your prompt and ask GPT-3 to improve it. \ No newline at end of file From 12f5e8700fd7f8b55570f5280e37eb66d74a0fc3 Mon Sep 17 00:00:00 2001 From: tianjunz Date: Mon, 13 Feb 2023 22:15:02 -0800 Subject: [PATCH 15/16] Update rl.md --- docs/automated_pe/rl.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index 5f06c625e0e..055564657e8 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -43,15 +43,15 @@ At a high level, instead of building a prompt from scratch like RLPrompt, TEMPER TEMPERA is allowed to edit 3 parts of the prompt: -1) The instruction -2) in-context examples -3) The verbalizers +1) The instruction: given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases. +2) in-context examples: given a example pool of $K$ examples, we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. +3) The verbalizers: the editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`. ## Reward They use a reward which consists of the difference of score between a prompt before/after an edit. -TEMPERA is densely reward, computing a reward for each edit step according to +TEMPERA is densely reward, computing a reward for each edit step according to the accuracy improvement comparing the current prompt (after editing) and the previous prompt (before editing). ## Training From c82ac2dc13ac33b431598409b79ea10ed9349f28 Mon Sep 17 00:00:00 2001 From: trigaten Date: Tue, 14 Feb 2023 17:58:03 -0500 Subject: [PATCH 16/16] feat: better formatting --- docs/automated_pe/rl.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md index 055564657e8..2144341d579 100644 --- a/docs/automated_pe/rl.md +++ b/docs/automated_pe/rl.md @@ -43,9 +43,17 @@ At a high level, instead of building a prompt from scratch like RLPrompt, TEMPER TEMPERA is allowed to edit 3 parts of the prompt: -1) The instruction: given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases. -2) in-context examples: given a example pool of $K$ examples, we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. -3) The verbalizers: the editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`. +### 1) The instruction + +Given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases. + +### 2) In-context examples + +Given a example pool of $K$ examples (aka %%exemplars|exemplars%%), we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. + +### 3) The verbalizers + +The editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`. ## Reward