From 7fa39b2eeffe9d16edbe138c0006773887cdd063 Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sun, 11 Dec 2022 10:24:47 -0500
Subject: [PATCH 01/16] feat: placeholder

---
 docs/ape/_category_.json | 8 ++++++++
 docs/ape/ape.md          | 5 +++++
 docs/ape/overview.md     | 7 +++++++
 3 files changed, 20 insertions(+)
 create mode 100644 docs/ape/_category_.json
 create mode 100644 docs/ape/ape.md
 create mode 100644 docs/ape/overview.md

diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json
new file mode 100644
index 00000000000..5ac8575b961
--- /dev/null
+++ b/docs/ape/_category_.json
@@ -0,0 +1,8 @@
+{
+  "label": "⚙️ Automated Prompt Engineering",
+  "position": 30,
+  "link": {
+    "type": "generated-index",
+    "description": "Methods that automate prompt engineering"
+  }
+}
diff --git a/docs/ape/ape.md b/docs/ape/ape.md
new file mode 100644
index 00000000000..0dab82bbe3e
--- /dev/null
+++ b/docs/ape/ape.md
@@ -0,0 +1,5 @@
+---
+sidebar_position: 1
+---
+
+# APE
diff --git a/docs/ape/overview.md b/docs/ape/overview.md
new file mode 100644
index 00000000000..9b2d4ce9dc8
--- /dev/null
+++ b/docs/ape/overview.md
@@ -0,0 +1,7 @@
+---
+sidebar_position: 1
+---
+
+# Overview
+
+Can prompt engineering really be automated? Sometimes.
\ No newline at end of file

From e7233119f68cf9365032bd4ad501cf561befed1f Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sun, 11 Dec 2022 10:50:00 -0500
Subject: [PATCH 02/16] feat: more stubs

---
 docs/ape/autoprompt.md | 5 +++++
 docs/ape/tempera.md    | 5 +++++
 2 files changed, 10 insertions(+)
 create mode 100644 docs/ape/autoprompt.md
 create mode 100644 docs/ape/tempera.md

diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md
new file mode 100644
index 00000000000..517acb7ab47
--- /dev/null
+++ b/docs/ape/autoprompt.md
@@ -0,0 +1,5 @@
+---
+sidebar_position: 20
+---
+
+# Autoprompt
diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md
new file mode 100644
index 00000000000..6739b9384b0
--- /dev/null
+++ b/docs/ape/tempera.md
@@ -0,0 +1,5 @@
+---
+sidebar_position: 30
+---
+
+# TEMPERA

From d9997c550ef7593c0824eba7e5ae0a5afd1f5560 Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Mon, 12 Dec 2022 16:53:36 -0500
Subject: [PATCH 03/16] feat: ape stuff

---
 bibliography.bib         |  9 +++++++++
 docs/ape/_category_.json |  4 ++--
 docs/ape/ape.md          | 42 +++++++++++++++++++++++++++++++++++++++-
 docs/ape/autoprompt.md   |  2 +-
 docs/ape/overview.md     |  2 +-
 docs/ape/tempera.md      |  2 +-
 docs/bibliography.md     |  4 +++-
 7 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/bibliography.bib b/bibliography.bib
index 192c6874fde..999c6264047 100644
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -164,6 +164,15 @@ @misc{zhou2022large
     primaryClass={cs.LG}
 }
 
+@misc{zhang2022tempera,
+    title={TEMPERA: Test-Time Prompting via Reinforcement Learning},
+    author={Tianjun Zhang and Xuezhi Wang and Denny Zhou and Dale Schuurmans and Joseph E. Gonzalez},
+    year={2022},
+    eprint={2211.11890},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+
 % Models
 
 % Language Models
diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json
index 5ac8575b961..edc24a74e4d 100644
--- a/docs/ape/_category_.json
+++ b/docs/ape/_category_.json
@@ -1,6 +1,6 @@
 {
-  "label": "⚙️ Automated Prompt Engineering",
-  "position": 30,
+  "label": "⚙️ Automated Prompting",
+  "position": 35,
   "link": {
     "type": "generated-index",
     "description": "Methods that automate prompt engineering"
diff --git a/docs/ape/ape.md b/docs/ape/ape.md
index 0dab82bbe3e..27005dd9a1e 100644
--- a/docs/ape/ape.md
+++ b/docs/ape/ape.md
@@ -1,5 +1,45 @@
 ---
-sidebar_position: 1
+sidebar_position: 10
 ---
 
 # APE
+
+Automatic Prompt Engineering (APE)(@zhou2022large) is an approach to automating the generation and
+selection of prompts. The basic idea of APE is to give a LLM a prompt containing
+a few shot exemplars, and ask it generate a prompt that would create these exemplars.
+
+## Example
+
+For example, if we give the LLM the following prompt:
+
+```
+Is a banana a fruit?
+Yes
+Is a tomato a fruit?
+No
+Is a fish a fruit?
+No
+
+What would be a good prompt to generate an answer to the above questions?
+
+```
+
+<pre>
+banana
+Yes
+
+tomato
+No
+
+fish
+No
+
+watermelon
+Yes
+
+What would be a good prompt to generate an answer to the above questions?
+
+    <span style={{backgroundColor: "#D2F4D3"}}>
+    Is the following item a fruit:
+    </span>
+</pre>
\ No newline at end of file
diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md
index 517acb7ab47..84b78e20143 100644
--- a/docs/ape/autoprompt.md
+++ b/docs/ape/autoprompt.md
@@ -2,4 +2,4 @@
 sidebar_position: 20
 ---
 
-# Autoprompt
+# Autoprompt(@shin2020autoprompt)
\ No newline at end of file
diff --git a/docs/ape/overview.md b/docs/ape/overview.md
index 9b2d4ce9dc8..fd1606056e3 100644
--- a/docs/ape/overview.md
+++ b/docs/ape/overview.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 1
+sidebar_position: 0
 ---
 
 # Overview
diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md
index 6739b9384b0..fae035c07ed 100644
--- a/docs/ape/tempera.md
+++ b/docs/ape/tempera.md
@@ -2,4 +2,4 @@
 sidebar_position: 30
 ---
 
-# TEMPERA
+# TEMPERA(@zhang2022tempera)
\ No newline at end of file
diff --git a/docs/bibliography.md b/docs/bibliography.md
index 162b5d59056..b43f47c10f7 100644
--- a/docs/bibliography.md
+++ b/docs/bibliography.md
@@ -46,7 +46,9 @@ cite them as such.
 
 #### AutoPrompt(@shin2020autoprompt) 🔵
 
-#### Automatic Prompt Engineer(@zhou2022large)
+#### Automatic Prompt Engineer(@zhou2022large) 🔵
+
+#### TEMPERA(@zhang2022tempera) 🔵
 
 ## Models
 

From 029401597450a1514dce5a45c3af6b9670e54f4a Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sat, 17 Dec 2022 16:34:44 -0500
Subject: [PATCH 04/16] feat: more

---
 docs/ape/autoprompt.md |  4 +++-
 docs/ape/tempera.md    | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md
index 84b78e20143..718dc261b0f 100644
--- a/docs/ape/autoprompt.md
+++ b/docs/ape/autoprompt.md
@@ -2,4 +2,6 @@
 sidebar_position: 20
 ---
 
-# Autoprompt(@shin2020autoprompt)
\ No newline at end of file
+# Autoprompt
+
+Autoprompt(@shin2020autoprompt)
\ No newline at end of file
diff --git a/docs/ape/tempera.md b/docs/ape/tempera.md
index fae035c07ed..5ff15c6e154 100644
--- a/docs/ape/tempera.md
+++ b/docs/ape/tempera.md
@@ -2,4 +2,20 @@
 sidebar_position: 30
 ---
 
-# TEMPERA(@zhang2022tempera)
\ No newline at end of file
+# TEMPERA
+
+**TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning 
+(TEMPERA)(@zhang2022tempera) is a method for automatically generating
+interpretable prompts.
+
+
+
+
+For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly.
+
+For example, Liu et al. (2022) propose to retrieve exemplars from a training pool that are semantically similar to a test example, and show it can significantly boost the performance
+
+
+
+
+significant gains compared with recent SoTA approaches like prompt tun- ing, AutoPrompt, and RLPrompt
\ No newline at end of file

From ae510b4dfae8de5b72588c5ccaf4c00bab7d483c Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Tue, 27 Dec 2022 22:05:11 -0500
Subject: [PATCH 05/16] feat: refactor a bit

---
 bibliography.bib               |  9 +++++++++
 docs/ape/ape.md                | 14 ++++++--------
 docs/ape/{tempera.md => rl.md} |  6 +++++-
 docs/bibliography.md           |  2 ++
 4 files changed, 22 insertions(+), 9 deletions(-)
 rename docs/ape/{tempera.md => rl.md} (93%)

diff --git a/bibliography.bib b/bibliography.bib
index aa19008c8b4..43916b58635 100644
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -193,6 +193,15 @@ @misc{zhang2022tempera
     primaryClass={cs.CL}
 }
 
+@misc{deng2022rlprompt,
+    title={RLPrompt: Optimizing Discrete Text Prompts with Reinforcement Learning},
+    author={Mingkai Deng and Jianyu Wang and Cheng-Ping Hsieh and Yihan Wang and Han Guo and Tianmin Shu and Meng Song and Eric P. Xing and Zhiting Hu},
+    year={2022},
+    eprint={2205.12548},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+
 % Models
 
 % Language Models
diff --git a/docs/ape/ape.md b/docs/ape/ape.md
index 27005dd9a1e..3cd1d13f908 100644
--- a/docs/ape/ape.md
+++ b/docs/ape/ape.md
@@ -12,7 +12,7 @@ a few shot exemplars, and ask it generate a prompt that would create these exemp
 
 For example, if we give the LLM the following prompt:
 
-```
+```text
 Is a banana a fruit?
 Yes
 Is a tomato a fruit?
@@ -21,10 +21,9 @@ Is a fish a fruit?
 No
 
 What would be a good prompt to generate an answer to the above questions?
-
 ```
 
-<pre>
+```text
 banana
 Yes
 
@@ -38,8 +37,7 @@ watermelon
 Yes
 
 What would be a good prompt to generate an answer to the above questions?
-
-    <span style={{backgroundColor: "#D2F4D3"}}>
-    Is the following item a fruit:
-    </span>
-</pre>
\ No newline at end of file
+// highlight-start
+Is the following item a fruit:
+// highlight-end
+```
\ No newline at end of file
diff --git a/docs/ape/tempera.md b/docs/ape/rl.md
similarity index 93%
rename from docs/ape/tempera.md
rename to docs/ape/rl.md
index 5ff15c6e154..fc612fbde75 100644
--- a/docs/ape/tempera.md
+++ b/docs/ape/rl.md
@@ -2,7 +2,11 @@
 sidebar_position: 30
 ---
 
-# TEMPERA
+# Reinforcement Learning
+
+## RLPrompt
+
+## TEMPERA
 
 **TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning 
 (TEMPERA)(@zhang2022tempera) is a method for automatically generating
diff --git a/docs/bibliography.md b/docs/bibliography.md
index b054971967c..3a4aa725a09 100644
--- a/docs/bibliography.md
+++ b/docs/bibliography.md
@@ -54,6 +54,8 @@ cite them as such.
 
 #### TEMPERA(@zhang2022tempera) 🔵
 
+#### RLPrompt(@deng2022rlprompt)
+
 ## Models
 
 ### Language Models

From bc7e3ed40f5a57c9f1521026c1676212c55c667c Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Fri, 27 Jan 2023 22:46:46 -0500
Subject: [PATCH 06/16] fix: update citation

---
 CITATION.cff         |  9 +++++++++
 README.md            | 11 +----------
 docs/bibliography.md | 13 +------------
 3 files changed, 11 insertions(+), 22 deletions(-)
 create mode 100644 CITATION.cff

diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000000..591acee2f17
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,9 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Schulhoff"
+  given-names: "Sander"
+- family-names: "Community Contributors"
+title: "Learn Prompting"
+date-released: 2022-12-01
+url: "https://github.com/trigaten/Learn_Prompting"
\ No newline at end of file
diff --git a/README.md b/README.md
index 15b72a7ab38..f74f8483200 100644
--- a/README.md
+++ b/README.md
@@ -39,13 +39,4 @@ This command starts a local development server and opens up a browser window. Mo
 
 ## Cite
 
-```text
-@misc{schulhoff2022learnprompting,
-    title={Learn Prompting},
-    author={Sander Schulhoff and Community Contributors},
-    url={https://learnprompting.org}
-    year={2022},
-    month={Dec},
-    day={1}
-}
-```
+Use the provided Github citation in this repository.
\ No newline at end of file
diff --git a/docs/bibliography.md b/docs/bibliography.md
index 676d0f1e911..3472e04f787 100644
--- a/docs/bibliography.md
+++ b/docs/bibliography.md
@@ -7,18 +7,7 @@ sidebar_position: 1000
 The page contains an organized list of all papers used by this course. 
 The papers are organized by topic.
 
-Please cite this resource as:
-
-```text
-@misc{schulhoff2022learnprompting,
-    title={Learn Prompting},
-    author={Sander Schulhoff and Community Contributors},
-    url={https://learnprompting.org}
-    year={2022},
-    month={Dec},
-    day={1}
-}
-```
+**To cite this course, use the provided citation in the Github repository.**
 
 🔵 = Paper directly cited in this course. Other papers have informed my understanding of the topic.
 

From e933caa8a4aa51bbcd351c4e0973d5b503ea904e Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Fri, 27 Jan 2023 23:05:47 -0500
Subject: [PATCH 07/16] refactor: smush trainable into autoprompting section

---
 docs/ape/_category_.json                  | 2 +-
 docs/ape/ape.md                           | 4 ++--
 docs/ape/autoprompt.md                    | 7 -------
 docs/{trainable => ape}/discretized.md    | 0
 docs/ape/more.md                          | 7 +++++++
 docs/ape/rl.md                            | 4 ++--
 docs/{trainable => ape}/soft_prompting.md | 0
 docs/trainable/_category_.json            | 8 --------
 8 files changed, 12 insertions(+), 20 deletions(-)
 delete mode 100644 docs/ape/autoprompt.md
 rename docs/{trainable => ape}/discretized.md (100%)
 create mode 100644 docs/ape/more.md
 rename docs/{trainable => ape}/soft_prompting.md (100%)
 delete mode 100644 docs/trainable/_category_.json

diff --git a/docs/ape/_category_.json b/docs/ape/_category_.json
index edc24a74e4d..1c5c44c4be1 100644
--- a/docs/ape/_category_.json
+++ b/docs/ape/_category_.json
@@ -1,6 +1,6 @@
 {
   "label": "⚙️ Automated Prompting",
-  "position": 35,
+  "position": 70,
   "link": {
     "type": "generated-index",
     "description": "Methods that automate prompt engineering"
diff --git a/docs/ape/ape.md b/docs/ape/ape.md
index 3cd1d13f908..d54f0dfe53b 100644
--- a/docs/ape/ape.md
+++ b/docs/ape/ape.md
@@ -1,8 +1,8 @@
 ---
-sidebar_position: 10
+sidebar_position: 1
 ---
 
-# APE
+# 🟢 APE
 
 Automatic Prompt Engineering (APE)(@zhou2022large) is an approach to automating the generation and
 selection of prompts. The basic idea of APE is to give a LLM a prompt containing
diff --git a/docs/ape/autoprompt.md b/docs/ape/autoprompt.md
deleted file mode 100644
index 718dc261b0f..00000000000
--- a/docs/ape/autoprompt.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-sidebar_position: 20
----
-
-# Autoprompt
-
-Autoprompt(@shin2020autoprompt)
\ No newline at end of file
diff --git a/docs/trainable/discretized.md b/docs/ape/discretized.md
similarity index 100%
rename from docs/trainable/discretized.md
rename to docs/ape/discretized.md
diff --git a/docs/ape/more.md b/docs/ape/more.md
new file mode 100644
index 00000000000..1d94baacb01
--- /dev/null
+++ b/docs/ape/more.md
@@ -0,0 +1,7 @@
+---
+sidebar_position: 200
+---
+
+# More
+
+Other methods exist, such as Autoprompt(@shin2020autoprompt), which uses gradient based search to build prompts for MLMs.
\ No newline at end of file
diff --git a/docs/ape/rl.md b/docs/ape/rl.md
index fc612fbde75..5961f35a2e9 100644
--- a/docs/ape/rl.md
+++ b/docs/ape/rl.md
@@ -1,8 +1,8 @@
 ---
-sidebar_position: 30
+sidebar_position: 130
 ---
 
-# Reinforcement Learning
+# 🟣 Reinforcement Learning
 
 ## RLPrompt
 
diff --git a/docs/trainable/soft_prompting.md b/docs/ape/soft_prompting.md
similarity index 100%
rename from docs/trainable/soft_prompting.md
rename to docs/ape/soft_prompting.md
diff --git a/docs/trainable/_category_.json b/docs/trainable/_category_.json
deleted file mode 100644
index bba090d936b..00000000000
--- a/docs/trainable/_category_.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "label": "💪 Prompt Tuning",
-  "position": 70,
-  "link": {
-    "type": "generated-index",
-    "description": "Prompt engineering that you can fine tune with gradients"
-  }
-}

From 9de29864f3237dcb2a2594d4aa91e805ee0c436c Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sat, 28 Jan 2023 14:22:00 -0500
Subject: [PATCH 08/16] feat: more

---
 docs/ape/ape.md |  6 +++++-
 docs/ape/rl.md  | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/docs/ape/ape.md b/docs/ape/ape.md
index d54f0dfe53b..e713b51ea73 100644
--- a/docs/ape/ape.md
+++ b/docs/ape/ape.md
@@ -40,4 +40,8 @@ What would be a good prompt to generate an answer to the above questions?
 // highlight-start
 Is the following item a fruit:
 // highlight-end
-```
\ No newline at end of file
+```
+
+## Notes
+
+Another simple automatic prompt engineering strategy is to simply give GPT-3 your improved and ask GPT-3 to improve it.
\ No newline at end of file
diff --git a/docs/ape/rl.md b/docs/ape/rl.md
index 5961f35a2e9..34a464cfaba 100644
--- a/docs/ape/rl.md
+++ b/docs/ape/rl.md
@@ -4,6 +4,8 @@ sidebar_position: 130
 
 # 🟣 Reinforcement Learning
 
+This section covers reinforcement learning methods which optimize discrete prompts (not soft prompts). <br/>This is extremely complicated.
+
 ## RLPrompt
 
 ## TEMPERA
@@ -12,8 +14,30 @@ sidebar_position: 130
 (TEMPERA)(@zhang2022tempera) is a method for automatically generating
 interpretable prompts.
 
+At a high level, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most.
+
+## Action Space
+
+TEMPERA is allowed to edit 3 parts of the prompt:
+
+1) The instruction
+2) in-context examples
+3) The verbalizers
+
+## Reward
+
+They use a reward which consists of the difference of score between a prompt before/after an edit.
+
+TEMPERA is densely reward, computing a reward for each edit step according to 
+
+## Training
+
+TEMPERA uses a GPT architecture and is trained with proximal policy optimization. 
+
+They use a reward which consists of the difference of score between a prompt before/after an edit.
 
 
+can edit instructions, in context exemplars, or verbalizers
 
 For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly.
 

From f28aeec2869312aa40c295888c7e7f0625ff59fa Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Fri, 3 Feb 2023 20:06:51 -0500
Subject: [PATCH 09/16] refactor: rename folder

---
 docs/{ape => automated_pe}/_category_.json   | 0
 docs/{ape => automated_pe}/ape.md            | 0
 docs/{ape => automated_pe}/discretized.md    | 0
 docs/{ape => automated_pe}/more.md           | 0
 docs/{ape => automated_pe}/overview.md       | 0
 docs/{ape => automated_pe}/rl.md             | 9 ---------
 docs/{ape => automated_pe}/soft_prompting.md | 0
 7 files changed, 9 deletions(-)
 rename docs/{ape => automated_pe}/_category_.json (100%)
 rename docs/{ape => automated_pe}/ape.md (100%)
 rename docs/{ape => automated_pe}/discretized.md (100%)
 rename docs/{ape => automated_pe}/more.md (100%)
 rename docs/{ape => automated_pe}/overview.md (100%)
 rename docs/{ape => automated_pe}/rl.md (66%)
 rename docs/{ape => automated_pe}/soft_prompting.md (100%)

diff --git a/docs/ape/_category_.json b/docs/automated_pe/_category_.json
similarity index 100%
rename from docs/ape/_category_.json
rename to docs/automated_pe/_category_.json
diff --git a/docs/ape/ape.md b/docs/automated_pe/ape.md
similarity index 100%
rename from docs/ape/ape.md
rename to docs/automated_pe/ape.md
diff --git a/docs/ape/discretized.md b/docs/automated_pe/discretized.md
similarity index 100%
rename from docs/ape/discretized.md
rename to docs/automated_pe/discretized.md
diff --git a/docs/ape/more.md b/docs/automated_pe/more.md
similarity index 100%
rename from docs/ape/more.md
rename to docs/automated_pe/more.md
diff --git a/docs/ape/overview.md b/docs/automated_pe/overview.md
similarity index 100%
rename from docs/ape/overview.md
rename to docs/automated_pe/overview.md
diff --git a/docs/ape/rl.md b/docs/automated_pe/rl.md
similarity index 66%
rename from docs/ape/rl.md
rename to docs/automated_pe/rl.md
index 34a464cfaba..a70f72f0af4 100644
--- a/docs/ape/rl.md
+++ b/docs/automated_pe/rl.md
@@ -38,12 +38,3 @@ They use a reward which consists of the difference of score between a prompt bef
 
 
 can edit instructions, in context exemplars, or verbalizers
-
-For example, Lu et al. (2022) found that the prompt order can have a large effect on the final task performance; Zhao et al. (2021) show that the choice of prompt format, training examples, and prompt order can cause the performance to vary quite significantly.
-
-For example, Liu et al. (2022) propose to retrieve exemplars from a training pool that are semantically similar to a test example, and show it can significantly boost the performance
-
-
-
-
-significant gains compared with recent SoTA approaches like prompt tun- ing, AutoPrompt, and RLPrompt
\ No newline at end of file
diff --git a/docs/ape/soft_prompting.md b/docs/automated_pe/soft_prompting.md
similarity index 100%
rename from docs/ape/soft_prompting.md
rename to docs/automated_pe/soft_prompting.md

From 307e778f4e6adc3ee2c4a1fae3e24505265a966d Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Fri, 3 Feb 2023 20:19:45 -0500
Subject: [PATCH 10/16] feat: RLPrompt training note

---
 docs/automated_pe/rl.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index a70f72f0af4..8e79ca1ffd3 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -8,6 +8,10 @@ This section covers reinforcement learning methods which optimize discrete promp
 
 ## RLPrompt
 
+
+## Training
+Optimize via soft q learning
+
 ## TEMPERA
 
 **TE**st-ti**M**e **P**rompt **E**diting using **R**einforcement le**A**rning 
@@ -35,6 +39,3 @@ TEMPERA is densely reward, computing a reward for each edit step according to
 TEMPERA uses a GPT architecture and is trained with proximal policy optimization. 
 
 They use a reward which consists of the difference of score between a prompt before/after an edit.
-
-
-can edit instructions, in context exemplars, or verbalizers

From 8d42ea4c635754c3d2db63c1df7b84eea2a0ab49 Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Fri, 3 Feb 2023 23:46:33 -0500
Subject: [PATCH 11/16] feat: more rlprompt info

---
 docs/automated_pe/rl.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index 8e79ca1ffd3..9eab8875d33 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -8,6 +8,24 @@ This section covers reinforcement learning methods which optimize discrete promp
 
 ## RLPrompt
 
+RLPrompt(@deng2022rlprompt) is a method that takes an input and trains a language model (the policy)
+to generate a good prompt for that input.
+
+More formally, given an input sequence $x$, the policy designs a prompt $z$ by selecting $[z_1, z_2, ..., z_T]$ tokens from the vocabulary sequentially.
+
+After creating the prompt, it combines it with $x$, and uses another language model to
+generate the completion. The LM output of x prompted by z can be described as $y_{LM}(\hat{z}, x)$.
+
+Then, the policy receives some reward according to this output: $R(y_{LM}(\hat{z}, x))$
+
+### Example
+
+Assuming we have partially trained RLPrompt on classifying movie reviews, and our next
+training point example is `x = "I hate this movie."`. RLPrompt will generate a prompt like
+`z = "Movie review bad or good:`. Then, it will combine the prompt with the input to get
+`x' = "Movie review bad or good: I hate this movie."`. Then, it will use a language model
+to generate the completion. Say it generates `bad`. Then, the reward is computed as
+`R(y_{LM}(\hat{z}, x))`...
 
 ## Training
 Optimize via soft q learning

From 3abb35c22fc93d58f8f4855412e805f1fd05e7de Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sat, 4 Feb 2023 00:41:08 -0500
Subject: [PATCH 12/16] feat: add rlprompt training info

---
 bibliography.bib        | 9 +++++++++
 docs/automated_pe/rl.md | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/bibliography.bib b/bibliography.bib
index cd87d659ddf..6155d79f2be 100644
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -228,6 +228,15 @@ @misc{deng2022rlprompt
     primaryClass={cs.CL}
 }
 
+@misc{guo2021efficient,
+    title={Efficient (Soft) Q-Learning for Text Generation with Limited Good Data},
+    author={Han Guo and Bowen Tan and Zhengzhong Liu and Eric P. Xing and Zhiting Hu},
+    year={2021},
+    eprint={2106.07704},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+
 % Models
 
 % Language Model Guides
diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index 9eab8875d33..485f3259c06 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -25,10 +25,11 @@ training point example is `x = "I hate this movie."`. RLPrompt will generate a p
 `z = "Movie review bad or good:`. Then, it will combine the prompt with the input to get
 `x' = "Movie review bad or good: I hate this movie."`. Then, it will use a language model
 to generate the completion. Say it generates `bad`. Then, the reward is computed as
-`R(y_{LM}(\hat{z}, x))`...
+`R(y_{LM}(\hat{z}, x))`. Deng et al. do not use a simple 0/1 reward.
 
 ## Training
-Optimize via soft q learning
+
+RLPrompt embeds a task specific MLP inside a frozen LM. The MLP is trained with Soft Q Learning(@guo2021efficient).
 
 ## TEMPERA
 

From 4307f20327e4aa81d86e63463e8fc9e906a49123 Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sat, 4 Feb 2023 00:42:42 -0500
Subject: [PATCH 13/16] feat: tempera v rlprompt note

---
 docs/automated_pe/rl.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index 485f3259c06..5f06c625e0e 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -37,7 +37,7 @@ RLPrompt embeds a task specific MLP inside a frozen LM. The MLP is trained with
 (TEMPERA)(@zhang2022tempera) is a method for automatically generating
 interpretable prompts.
 
-At a high level, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most.
+At a high level, instead of building a prompt from scratch like RLPrompt, TEMPERA takes a starting prompt and modifies different parts of it in order to see what changes help most.
 
 ## Action Space
 

From ef47bbaa9011480cbd3aed0d739cf86741e0bc2b Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Sun, 5 Feb 2023 00:34:28 -0500
Subject: [PATCH 14/16] fix: typo

---
 docs/automated_pe/ape.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/automated_pe/ape.md b/docs/automated_pe/ape.md
index e713b51ea73..b438314993a 100644
--- a/docs/automated_pe/ape.md
+++ b/docs/automated_pe/ape.md
@@ -44,4 +44,4 @@ Is the following item a fruit:
 
 ## Notes
 
-Another simple automatic prompt engineering strategy is to simply give GPT-3 your improved and ask GPT-3 to improve it.
\ No newline at end of file
+Another simple automatic prompt engineering strategy is to simply give GPT-3 your prompt and ask GPT-3 to improve it.
\ No newline at end of file

From 12f5e8700fd7f8b55570f5280e37eb66d74a0fc3 Mon Sep 17 00:00:00 2001
From: tianjunz <tianjunz@berkeley.edu>
Date: Mon, 13 Feb 2023 22:15:02 -0800
Subject: [PATCH 15/16] Update rl.md

---
 docs/automated_pe/rl.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index 5f06c625e0e..055564657e8 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -43,15 +43,15 @@ At a high level, instead of building a prompt from scratch like RLPrompt, TEMPER
 
 TEMPERA is allowed to edit 3 parts of the prompt:
 
-1) The instruction
-2) in-context examples
-3) The verbalizers
+1) The instruction: given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases.
+2) in-context examples: given a example pool of $K$ examples, we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. 
+3) The verbalizers: the editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`.
 
 ## Reward
 
 They use a reward which consists of the difference of score between a prompt before/after an edit.
 
-TEMPERA is densely reward, computing a reward for each edit step according to 
+TEMPERA is densely reward, computing a reward for each edit step according to the accuracy improvement comparing the current prompt (after editing) and the previous prompt (before editing).
 
 ## Training
 

From c82ac2dc13ac33b431598409b79ea10ed9349f28 Mon Sep 17 00:00:00 2001
From: trigaten <sanderschulhoff@gmail.com>
Date: Tue, 14 Feb 2023 17:58:03 -0500
Subject: [PATCH 16/16] feat: better formatting

---
 docs/automated_pe/rl.md | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/automated_pe/rl.md b/docs/automated_pe/rl.md
index 055564657e8..2144341d579 100644
--- a/docs/automated_pe/rl.md
+++ b/docs/automated_pe/rl.md
@@ -43,9 +43,17 @@ At a high level, instead of building a prompt from scratch like RLPrompt, TEMPER
 
 TEMPERA is allowed to edit 3 parts of the prompt:
 
-1) The instruction: given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases.
-2) in-context examples: given a example pool of $K$ examples, we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. 
-3) The verbalizers: the editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`.
+### 1) The instruction
+
+Given the instruction $i$, one could parse it through `nltk.tokenize.treebank` into a set of phrases. Then the actions allow swapping, addition and deletion between current set of phrases. For example, this will first parse the sentence `"Given text, classify whether it is good or bad."` to `["Given text", "classify", "whether", "it is", "good", "or", "bad"]`. Then we can perform different editing strategies (e.g., swapping two phrases, delete one phrase or repeat one phrase) on this set of phrases.
+
+### 2) In-context examples
+
+Given a example pool of $K$ examples (aka %%exemplars|exemplars%%), we want to select $k$ from them to formulate the final prompt. The action space allows change position of examples $i, j$ with $0 < i < j < k$. It also supports replacing example $0 < i < k$ with any candidate from the pool $k < j < K+1$. 
+
+### 3) The verbalizers
+
+The editing space simply allows changing the current verbalizer to any other verbalizer from the `promptsource` collections. For examples, changing from `["positive", "negative"]` to `["great", "terrible"]`.
 
 ## Reward