Skip to content

Commit 0709665

Browse files
committed
OpenGame.ipynb
1 parent b8ca0b5 commit 0709665

File tree

1 file changed

+291
-0
lines changed

1 file changed

+291
-0
lines changed

Examples/OpenGame.ipynb

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "4a2a11de",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"Outcome: ('Cooperate', 'Cooperate')\n",
14+
"Payoff: 3\n",
15+
"Feedback: Strategy always_cooperate resulted in a reward of 3\n",
16+
"Best Response Strategy against always cooperate: my_strategy\n",
17+
"Best response outcome: ('Defect', 'Defect')\n",
18+
"Best response payoff: 1\n"
19+
]
20+
}
21+
],
22+
"source": [
23+
"from typing import TypeVar, Generic, Callable, Tuple\n",
24+
"\n",
25+
"# Define generic type variables for the OpenGame class\n",
26+
"Input = TypeVar('Input')\n",
27+
"Output = TypeVar('Output')\n",
28+
"Payoff = TypeVar('Payoff')\n",
29+
"Feedback = TypeVar('Feedback')\n",
30+
"\n",
31+
"class BaseStrategy(Generic[Input, Output]):\n",
32+
" \"\"\"\n",
33+
" Base class for all strategies.\n",
34+
" A strategy is a class that can decide an output given an input.\n",
35+
" It can store its own internal state (e.g., weights, memory).\n",
36+
" \"\"\"\n",
37+
" def select_action(self, input_val: Input) -> Output:\n",
38+
" \"\"\"\n",
39+
" Selects an action (output) based on the given input.\n",
40+
" This method must be implemented by concrete strategy classes.\n",
41+
" \"\"\"\n",
42+
" raise NotImplementedError\n",
43+
"\n",
44+
"Strategy = TypeVar('Strategy', bound=BaseStrategy[Input, Output])\n",
45+
"\n",
46+
"\n",
47+
"class OpenGame(Generic[Input, Output, Payoff, Feedback, Strategy]):\n",
48+
" \"\"\"\n",
49+
" A generic class representing an open game.\n",
50+
" \"\"\"\n",
51+
"\n",
52+
" def play(\n",
53+
" self,\n",
54+
" strategy: Strategy,\n",
55+
" input_val: Input,\n",
56+
" ) -> Output:\n",
57+
" \"\"\"\n",
58+
" Plays the game given a strategy and an input, and returns the output.\n",
59+
"\n",
60+
" Args:\n",
61+
" strategy: The strategy to use.\n",
62+
" input_val: The input to the game.\n",
63+
"\n",
64+
" Returns:\n",
65+
" The output of the game.\n",
66+
" \"\"\"\n",
67+
" raise NotImplementedError\n",
68+
"\n",
69+
" def coplay(\n",
70+
" self,\n",
71+
" strategy: Strategy,\n",
72+
" input_val: Input,\n",
73+
" reward: Payoff,\n",
74+
" ) -> Feedback:\n",
75+
" \"\"\"\n",
76+
" Co-plays the game, providing feedback based on the strategy, input, and reward.\n",
77+
"\n",
78+
" Args:\n",
79+
" strategy: The strategy to use.\n",
80+
" input_val: The input to the game.\n",
81+
" reward: The reward received.\n",
82+
"\n",
83+
" Returns:\n",
84+
" Feedback on the game play.\n",
85+
" \"\"\"\n",
86+
" raise NotImplementedError\n",
87+
"\n",
88+
" def best_response(\n",
89+
" self,\n",
90+
" input_val: Input,\n",
91+
" payoff_function: Callable[[Output], Payoff],\n",
92+
" ) -> Callable[[Strategy, Strategy], Strategy]:\n",
93+
" \"\"\"\n",
94+
" Finds the best response strategy given an input and a reward function that\n",
95+
" maps outputs to payoffs. Returns a function that takes two strategies\n",
96+
" and returns the best response strategy. The intent is that the two input\n",
97+
" strategies are the other players strategies, and the function determines\n",
98+
" how to best play in response to those.\n",
99+
"\n",
100+
" Args:\n",
101+
" input_val: The input to the game.\n",
102+
" payoff_function: A function that maps outputs to payoffs.\n",
103+
"\n",
104+
" Returns:\n",
105+
" A function that takes two strategies and returns the best response strategy.\n",
106+
" \"\"\"\n",
107+
" raise NotImplementedError\n"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": null,
113+
"id": "248784ae",
114+
"metadata": {},
115+
"outputs": [],
116+
"source": [
117+
"from typing import TypeAlias\n",
118+
"\n",
119+
"# Example Implementation: Prisoner's Dilemma\n",
120+
"\n",
121+
"# Define type aliases for the Prisoner's Dilemma\n",
122+
"PDAction: TypeAlias = str # \"Cooperate\" or \"Defect\"\n",
123+
"PDPayoff: TypeAlias = int # Numerical representation of reward/penalty\n",
124+
"PDFeedback: TypeAlias = str # Can be some string indicating what happened\n",
125+
"PDStrategy: TypeAlias = Callable[[PDAction], PDAction] # Function to choose action\n",
126+
"PDInput: TypeAlias = None # No real input, can make it a turn number if needed\n",
127+
"\n",
128+
"\n",
129+
"class PrisonerDilemma(OpenGame[\n",
130+
" PDInput,\n",
131+
" Tuple[PDAction, PDAction],\n",
132+
" PDPayoff,\n",
133+
" PDFeedback, PDStrategy]\n",
134+
"):\n",
135+
" \"\"\"\n",
136+
" An implementation of the Prisoner's Dilemma as an OpenGame.\n",
137+
" \"\"\"\n",
138+
"\n",
139+
" def play(self, strategy: PDStrategy, input_val: PDInput) -> Tuple[PDAction, PDAction]:\n",
140+
" \"\"\"\n",
141+
" Simulates a single round of the Prisoner's Dilemma.\n",
142+
"\n",
143+
" Args:\n",
144+
" strategy: A strategy to use (takes their own action and returns action).\n",
145+
" input_val: Unused in this simple implementation.\n",
146+
"\n",
147+
" Returns:\n",
148+
" A tuple containing the actions of both players (player1, player2) assuming they use the same strategy. In a more complex setup,\n",
149+
" we'd take two strategies as arguments to play against each other.\n",
150+
" \"\"\"\n",
151+
" action1 = strategy(None) # Choose action based on empty input (no history)\n",
152+
" action2 = strategy(None) # Same for the other player.\n",
153+
" return (action1, action2)\n",
154+
"\n",
155+
" def coplay(self, strategy: PDStrategy, input_val: PDInput, reward: PDPayoff) -> PDFeedback:\n",
156+
" \"\"\"\n",
157+
" Provides feedback based on the strategy, input, and reward.\n",
158+
"\n",
159+
" Args:\n",
160+
" strategy: The strategy used.\n",
161+
" input_val: The input to the game.\n",
162+
" reward: The reward received.\n",
163+
"\n",
164+
" Returns:\n",
165+
" Feedback on the game play.\n",
166+
" \"\"\"\n",
167+
" return f\"Strategy {strategy.__name__} resulted in a reward of {reward}\"\n",
168+
"\n",
169+
" def best_response(self, input_val: PDInput, reward_function: Callable[[Tuple[PDAction, PDAction]], Payoff]) -> Callable[[PDStrategy, PDStrategy], PDStrategy]:\n",
170+
" \"\"\"\n",
171+
" Finds the best response strategy, given the other player's strategy. This is a simplified example and doesn't account for repeated games.\n",
172+
"\n",
173+
" Args:\n",
174+
" input_val: The input to the game.\n",
175+
" reward_function: A function that maps outputs to rewards.\n",
176+
"\n",
177+
" Returns:\n",
178+
" A function that takes two strategies (other players) and returns the best response strategy.\n",
179+
" \"\"\"\n",
180+
"\n",
181+
" def best_response_strategy(\n",
182+
" opponent_strategy: PDStrategy,\n",
183+
" self_strategy: PDStrategy\n",
184+
" ) -> PDStrategy: # added self_strategy to match signature\n",
185+
"\n",
186+
" def my_strategy(previous_action: PDAction = None) -> PDAction: # \"previous_action\" argument removed as it's not used\n",
187+
" \"\"\"\n",
188+
" A simple strategy that defects if the opponent defects, otherwise cooperates. This is a kind of \"tit-for-tat\"\n",
189+
" but only for one round, so not very good.\n",
190+
" \"\"\"\n",
191+
" opponent_action = opponent_strategy(None)\n",
192+
"\n",
193+
" if opponent_action == \"Defect\":\n",
194+
" return \"Defect\"\n",
195+
" else:\n",
196+
" return \"Defect\" # Always defect for the best response in a single round\n",
197+
" return my_strategy\n",
198+
"\n",
199+
" return best_response_strategy\n",
200+
"\n",
201+
"\n",
202+
"# Example Usage\n",
203+
"if __name__ == '__main__':\n",
204+
" pd = PrisonerDilemma()\n",
205+
"\n",
206+
" # Example Strategy: Always Cooperate\n",
207+
" def always_cooperate(previous_action: PDAction = None) -> PDAction:\n",
208+
" return \"Cooperate\"\n",
209+
"\n",
210+
" # Example Strategy: Always Defect\n",
211+
" def always_defect(previous_action: PDAction = None) -> PDAction:\n",
212+
" return \"Defect\"\n",
213+
"\n",
214+
" # Example Payoff function\n",
215+
" def prisoner_dilemma_payoff(actions: Tuple[PDAction, PDAction]) -> Payoff:\n",
216+
" \"\"\"\n",
217+
" Defines the payoff matrix for the Prisoner's Dilemma.\n",
218+
" \"\"\"\n",
219+
" action1, action2 = actions\n",
220+
" if action1 == \"Cooperate\" and action2 == \"Cooperate\":\n",
221+
" return 3 # Both cooperate\n",
222+
" elif action1 == \"Cooperate\" and action2 == \"Defect\":\n",
223+
" return 0 # Player 1 gets suckered\n",
224+
" elif action1 == \"Defect\" and action2 == \"Cooperate\":\n",
225+
" return 5 # Player 1 defects\n",
226+
" else: # Both defect\n",
227+
" return 1\n",
228+
"\n",
229+
" # Play the game\n",
230+
" outcome = pd.play(always_cooperate, None) # Input is None in this case\n",
231+
" print(f\"Outcome: {outcome}\")\n",
232+
"\n",
233+
" # Calculate payoff\n",
234+
" payoff = prisoner_dilemma_payoff(outcome)\n",
235+
" print(f\"Payoff: {payoff}\")\n",
236+
"\n",
237+
" # Get feedback\n",
238+
" feedback = pd.coplay(always_cooperate, None, payoff)\n",
239+
" print(f\"Feedback: {feedback}\")\n",
240+
"\n",
241+
" # Find the best response strategy\n",
242+
" best_response_func = pd.best_response(None, prisoner_dilemma_payoff)\n",
243+
" best_response_strategy = best_response_func(always_cooperate, always_cooperate)\n",
244+
"\n",
245+
" print(f\"Best Response Strategy against always cooperate: {best_response_strategy.__name__}\") # the best response strategy *IS* a strategy to play against the always cooperate strategy\n",
246+
"\n",
247+
" best_response_outcome = pd.play(best_response_strategy, None)\n",
248+
"\n",
249+
" print(f\"Best response outcome: {best_response_outcome}\")\n",
250+
" print(f\"Best response payoff: {prisoner_dilemma_payoff(best_response_outcome)}\")"
251+
]
252+
},
253+
{
254+
"cell_type": "code",
255+
"execution_count": null,
256+
"id": "4a9f8c13",
257+
"metadata": {},
258+
"outputs": [],
259+
"source": []
260+
},
261+
{
262+
"cell_type": "code",
263+
"execution_count": null,
264+
"id": "a09715d4",
265+
"metadata": {},
266+
"outputs": [],
267+
"source": []
268+
}
269+
],
270+
"metadata": {
271+
"kernelspec": {
272+
"display_name": "3.12.3",
273+
"language": "python",
274+
"name": "python3"
275+
},
276+
"language_info": {
277+
"codemirror_mode": {
278+
"name": "ipython",
279+
"version": 3
280+
},
281+
"file_extension": ".py",
282+
"mimetype": "text/x-python",
283+
"name": "python",
284+
"nbconvert_exporter": "python",
285+
"pygments_lexer": "ipython3",
286+
"version": "3.12.3"
287+
}
288+
},
289+
"nbformat": 4,
290+
"nbformat_minor": 5
291+
}

0 commit comments

Comments
 (0)