1
+ <!DOCTYPE html>
2
+ < html lang ="en ">
3
+ < head >
4
+ < meta http-equiv ="content-type " content ="text/html;charset=utf-8 "/>
5
+ < meta name ="viewport " content ="width=device-width, initial-scale=1.0 "/>
6
+ < meta name ="description " content =""/>
7
+
8
+ < meta name ="twitter:card " content ="summary "/>
9
+ < meta name ="twitter:image:src " content ="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4 "/>
10
+ < meta name ="twitter:title " content ="configs.py "/>
11
+ < meta name ="twitter:description " content =""/>
12
+ < meta name ="twitter:site " content ="@labmlai "/>
13
+ < meta name ="twitter:creator " content ="@labmlai "/>
14
+
15
+ < meta property ="og:url " content ="https://nn.labml.ai/RWKV/configs.html "/>
16
+ < meta property ="og:title " content ="configs.py "/>
17
+ < meta property ="og:image " content ="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4 "/>
18
+ < meta property ="og:site_name " content ="configs.py "/>
19
+ < meta property ="og:type " content ="object "/>
20
+ < meta property ="og:title " content ="configs.py "/>
21
+ < meta property ="og:description " content =""/>
22
+
23
+ < title > configs.py</ title >
24
+ < link rel ="shortcut icon " href ="/icon.png "/>
25
+ < link rel ="stylesheet " href ="../pylit.css?v=1 ">
26
+ < link rel ="canonical " href ="https://nn.labml.ai/RWKV/configs.html "/>
27
+ < link rel ="stylesheet " href ="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css " integrity ="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET " crossorigin ="anonymous ">
28
+
29
+ <!-- Global site tag (gtag.js) - Google Analytics -->
30
+ < script async src ="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH "> </ script >
31
+ < script >
32
+ window . dataLayer = window . dataLayer || [ ] ;
33
+
34
+ function gtag ( ) {
35
+ dataLayer . push ( arguments ) ;
36
+ }
37
+
38
+ gtag ( 'js' , new Date ( ) ) ;
39
+
40
+ gtag ( 'config' , 'G-4V3HC8HBLH' ) ;
41
+ </ script >
42
+ </ head >
43
+ < body >
44
+ < div id ='container '>
45
+ < div id ="background "> </ div >
46
+ < div class ='section '>
47
+ < div class ='docs '>
48
+ < p >
49
+ < a class ="parent " href ="/ "> home</ a >
50
+ < a class ="parent " href ="index.html "> RWKV</ a >
51
+ </ p >
52
+ < p >
53
+ < a href ="https://github.com/labmlai/annotated_deep_learning_paper_implementations " target ="_blank ">
54
+ < img alt ="Github "
55
+ src ="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social "
56
+ style ="max-width:100%; "/> </ a >
57
+ < a href ="https://twitter.com/labmlai " rel ="nofollow " target ="_blank ">
58
+ < img alt ="Twitter "
59
+ src ="https://img.shields.io/twitter/follow/labmlai?style=social "
60
+ style ="max-width:100%; "/> </ a >
61
+ </ p >
62
+ < p >
63
+ < a href ="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/RWKV/configs.py " target ="_blank ">
64
+ View code on Github</ a >
65
+ </ p >
66
+ </ div >
67
+ </ div >
68
+ < div class ='section ' id ='section-0 '>
69
+ < div class ='docs '>
70
+ < div class ='section-link '>
71
+ < a href ='#section-0 '> #</ a >
72
+ </ div >
73
+
74
+ </ div >
75
+ < div class ='code '>
76
+ < div class ="highlight "> < pre > < span class ="lineno "> 1</ span > < span > </ span > < span class ="kn "> from</ span > < span class ="nn "> labml.configs</ span > < span class ="kn "> import</ span > < span class ="n "> BaseConfigs</ span > </ pre > </ div >
77
+ </ div >
78
+ </ div >
79
+ < div class ='section ' id ='section-1 '>
80
+ < div class ='docs doc-strings '>
81
+ < div class ='section-link '>
82
+ < a href ='#section-1 '> #</ a >
83
+ </ div >
84
+ < h2 > Transformer Configurations</ h2 >
85
+ < p > This defines configurations for a transformer. The configurations are calculate using option functions. These are lazy loaded and therefore only the necessary modules are calculated.</ p >
86
+
87
+ </ div >
88
+ < div class ='code '>
89
+ < div class ="highlight "> < pre > < span class ="lineno "> 4</ span > < span class ="k "> class</ span > < span class ="nc "> RWKVConfigs</ span > < span class ="p "> (</ span > < span class ="n "> BaseConfigs</ span > < span class ="p "> ):</ span > </ pre > </ div >
90
+ </ div >
91
+ </ div >
92
+ < div class ='section ' id ='section-2 '>
93
+ < div class ='docs '>
94
+ < div class ='section-link '>
95
+ < a href ='#section-2 '> #</ a >
96
+ </ div >
97
+ < p > Number of attention heads </ p >
98
+
99
+ </ div >
100
+ < div class ='code '>
101
+ < div class ="highlight "> < pre > < span class ="lineno "> 14</ span > < span class ="n "> n_heads</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 8</ span > </ pre > </ div >
102
+ </ div >
103
+ </ div >
104
+ < div class ='section ' id ='section-3 '>
105
+ < div class ='docs '>
106
+ < div class ='section-link '>
107
+ < a href ='#section-3 '> #</ a >
108
+ </ div >
109
+ < p > Transformer embedding size </ p >
110
+
111
+ </ div >
112
+ < div class ='code '>
113
+ < div class ="highlight "> < pre > < span class ="lineno "> 16</ span > < span class ="n "> d_model</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 512</ span > </ pre > </ div >
114
+ </ div >
115
+ </ div >
116
+ < div class ='section ' id ='section-4 '>
117
+ < div class ='docs '>
118
+ < div class ='section-link '>
119
+ < a href ='#section-4 '> #</ a >
120
+ </ div >
121
+ < p > Number of layers </ p >
122
+
123
+ </ div >
124
+ < div class ='code '>
125
+ < div class ="highlight "> < pre > < span class ="lineno "> 18</ span > < span class ="n "> n_layers</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 6</ span > </ pre > </ div >
126
+ </ div >
127
+ </ div >
128
+ < div class ='section ' id ='section-5 '>
129
+ < div class ='docs '>
130
+ < div class ='section-link '>
131
+ < a href ='#section-5 '> #</ a >
132
+ </ div >
133
+ < p > Dropout probability </ p >
134
+
135
+ </ div >
136
+ < div class ='code '>
137
+ < div class ="highlight "> < pre > < span class ="lineno "> 20</ span > < span class ="n "> dropout</ span > < span class ="p "> :</ span > < span class ="nb "> float</ span > < span class ="o "> =</ span > < span class ="mf "> 0.1</ span > </ pre > </ div >
138
+ </ div >
139
+ </ div >
140
+ < div class ='section ' id ='section-6 '>
141
+ < div class ='docs '>
142
+ < div class ='section-link '>
143
+ < a href ='#section-6 '> #</ a >
144
+ </ div >
145
+ < p > Number of tokens in the source vocabulary (for token embeddings) </ p >
146
+
147
+ </ div >
148
+ < div class ='code '>
149
+ < div class ="highlight "> < pre > < span class ="lineno "> 22</ span > < span class ="n "> n_src_vocab</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > </ pre > </ div >
150
+ </ div >
151
+ </ div >
152
+ < div class ='section ' id ='section-7 '>
153
+ < div class ='docs '>
154
+ < div class ='section-link '>
155
+ < a href ='#section-7 '> #</ a >
156
+ </ div >
157
+ < p > Number of tokens in the target vocabulary (to generate logits for prediction) </ p >
158
+
159
+ </ div >
160
+ < div class ='code '>
161
+ < div class ="highlight "> < pre > < span class ="lineno "> 24</ span > < span class ="n "> n_tgt_vocab</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > </ pre > </ div >
162
+ </ div >
163
+ </ div >
164
+ < div class ='footer '>
165
+ < a href ="https://papers.labml.ai "> Trending Research Papers</ a >
166
+ < a href ="https://labml.ai "> labml.ai</ a >
167
+ </ div >
168
+ </ div >
169
+ < script src =../interactive.js?v =1"> </ script >
170
+ < script >
171
+ function handleImages ( ) {
172
+ var images = document . querySelectorAll ( 'p>img' )
173
+
174
+ for ( var i = 0 ; i < images . length ; ++ i ) {
175
+ handleImage ( images [ i ] )
176
+ }
177
+ }
178
+
179
+ function handleImage ( img ) {
180
+ img . parentElement . style . textAlign = 'center'
181
+
182
+ var modal = document . createElement ( 'div' )
183
+ modal . id = 'modal'
184
+
185
+ var modalContent = document . createElement ( 'div' )
186
+ modal . appendChild ( modalContent )
187
+
188
+ var modalImage = document . createElement ( 'img' )
189
+ modalContent . appendChild ( modalImage )
190
+
191
+ var span = document . createElement ( 'span' )
192
+ span . classList . add ( 'close' )
193
+ span . textContent = 'x'
194
+ modal . appendChild ( span )
195
+
196
+ img . onclick = function ( ) {
197
+ console . log ( 'clicked' )
198
+ document . body . appendChild ( modal )
199
+ modalImage . src = img . src
200
+ }
201
+
202
+ span . onclick = function ( ) {
203
+ document . body . removeChild ( modal )
204
+ }
205
+ }
206
+
207
+ handleImages ( )
208
+ </ script >
209
+ </ body >
210
+ </ html >
0 commit comments