Skip to content

Commit 999f203

Browse files
committed
RWKV docs
1 parent 418d1ec commit 999f203

File tree

3 files changed

+1670
-0
lines changed

3 files changed

+1670
-0
lines changed

docs/RWKV/configs.html

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta http-equiv="content-type" content="text/html;charset=utf-8"/>
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6+
<meta name="description" content=""/>
7+
8+
<meta name="twitter:card" content="summary"/>
9+
<meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&amp;v=4"/>
10+
<meta name="twitter:title" content="configs.py"/>
11+
<meta name="twitter:description" content=""/>
12+
<meta name="twitter:site" content="@labmlai"/>
13+
<meta name="twitter:creator" content="@labmlai"/>
14+
15+
<meta property="og:url" content="https://nn.labml.ai/RWKV/configs.html"/>
16+
<meta property="og:title" content="configs.py"/>
17+
<meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&amp;v=4"/>
18+
<meta property="og:site_name" content="configs.py"/>
19+
<meta property="og:type" content="object"/>
20+
<meta property="og:title" content="configs.py"/>
21+
<meta property="og:description" content=""/>
22+
23+
<title>configs.py</title>
24+
<link rel="shortcut icon" href="/icon.png"/>
25+
<link rel="stylesheet" href="../pylit.css?v=1">
26+
<link rel="canonical" href="https://nn.labml.ai/RWKV/configs.html"/>
27+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
28+
29+
<!-- Global site tag (gtag.js) - Google Analytics -->
30+
<script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
31+
<script>
32+
window.dataLayer = window.dataLayer || [];
33+
34+
function gtag() {
35+
dataLayer.push(arguments);
36+
}
37+
38+
gtag('js', new Date());
39+
40+
gtag('config', 'G-4V3HC8HBLH');
41+
</script>
42+
</head>
43+
<body>
44+
<div id='container'>
45+
<div id="background"></div>
46+
<div class='section'>
47+
<div class='docs'>
48+
<p>
49+
<a class="parent" href="/">home</a>
50+
<a class="parent" href="index.html">RWKV</a>
51+
</p>
52+
<p>
53+
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations" target="_blank">
54+
<img alt="Github"
55+
src="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social"
56+
style="max-width:100%;"/></a>
57+
<a href="https://twitter.com/labmlai" rel="nofollow" target="_blank">
58+
<img alt="Twitter"
59+
src="https://img.shields.io/twitter/follow/labmlai?style=social"
60+
style="max-width:100%;"/></a>
61+
</p>
62+
<p>
63+
<a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/RWKV/configs.py" target="_blank">
64+
View code on Github</a>
65+
</p>
66+
</div>
67+
</div>
68+
<div class='section' id='section-0'>
69+
<div class='docs'>
70+
<div class='section-link'>
71+
<a href='#section-0'>#</a>
72+
</div>
73+
74+
</div>
75+
<div class='code'>
76+
<div class="highlight"><pre><span class="lineno">1</span><span></span><span class="kn">from</span> <span class="nn">labml.configs</span> <span class="kn">import</span> <span class="n">BaseConfigs</span></pre></div>
77+
</div>
78+
</div>
79+
<div class='section' id='section-1'>
80+
<div class='docs doc-strings'>
81+
<div class='section-link'>
82+
<a href='#section-1'>#</a>
83+
</div>
84+
<h2>Transformer Configurations</h2>
85+
<p>This defines configurations for a transformer. The configurations are calculate using option functions. These are lazy loaded and therefore only the necessary modules are calculated.</p>
86+
87+
</div>
88+
<div class='code'>
89+
<div class="highlight"><pre><span class="lineno">4</span><span class="k">class</span> <span class="nc">RWKVConfigs</span><span class="p">(</span><span class="n">BaseConfigs</span><span class="p">):</span></pre></div>
90+
</div>
91+
</div>
92+
<div class='section' id='section-2'>
93+
<div class='docs'>
94+
<div class='section-link'>
95+
<a href='#section-2'>#</a>
96+
</div>
97+
<p>Number of attention heads </p>
98+
99+
</div>
100+
<div class='code'>
101+
<div class="highlight"><pre><span class="lineno">14</span> <span class="n">n_heads</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span></pre></div>
102+
</div>
103+
</div>
104+
<div class='section' id='section-3'>
105+
<div class='docs'>
106+
<div class='section-link'>
107+
<a href='#section-3'>#</a>
108+
</div>
109+
<p>Transformer embedding size </p>
110+
111+
</div>
112+
<div class='code'>
113+
<div class="highlight"><pre><span class="lineno">16</span> <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">512</span></pre></div>
114+
</div>
115+
</div>
116+
<div class='section' id='section-4'>
117+
<div class='docs'>
118+
<div class='section-link'>
119+
<a href='#section-4'>#</a>
120+
</div>
121+
<p>Number of layers </p>
122+
123+
</div>
124+
<div class='code'>
125+
<div class="highlight"><pre><span class="lineno">18</span> <span class="n">n_layers</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">6</span></pre></div>
126+
</div>
127+
</div>
128+
<div class='section' id='section-5'>
129+
<div class='docs'>
130+
<div class='section-link'>
131+
<a href='#section-5'>#</a>
132+
</div>
133+
<p>Dropout probability </p>
134+
135+
</div>
136+
<div class='code'>
137+
<div class="highlight"><pre><span class="lineno">20</span> <span class="n">dropout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span></pre></div>
138+
</div>
139+
</div>
140+
<div class='section' id='section-6'>
141+
<div class='docs'>
142+
<div class='section-link'>
143+
<a href='#section-6'>#</a>
144+
</div>
145+
<p>Number of tokens in the source vocabulary (for token embeddings) </p>
146+
147+
</div>
148+
<div class='code'>
149+
<div class="highlight"><pre><span class="lineno">22</span> <span class="n">n_src_vocab</span><span class="p">:</span> <span class="nb">int</span></pre></div>
150+
</div>
151+
</div>
152+
<div class='section' id='section-7'>
153+
<div class='docs'>
154+
<div class='section-link'>
155+
<a href='#section-7'>#</a>
156+
</div>
157+
<p>Number of tokens in the target vocabulary (to generate logits for prediction) </p>
158+
159+
</div>
160+
<div class='code'>
161+
<div class="highlight"><pre><span class="lineno">24</span> <span class="n">n_tgt_vocab</span><span class="p">:</span> <span class="nb">int</span></pre></div>
162+
</div>
163+
</div>
164+
<div class='footer'>
165+
<a href="https://papers.labml.ai">Trending Research Papers</a>
166+
<a href="https://labml.ai">labml.ai</a>
167+
</div>
168+
</div>
169+
<script src=../interactive.js?v=1"></script>
170+
<script>
171+
function handleImages() {
172+
var images = document.querySelectorAll('p>img')
173+
174+
for (var i = 0; i < images.length; ++i) {
175+
handleImage(images[i])
176+
}
177+
}
178+
179+
function handleImage(img) {
180+
img.parentElement.style.textAlign = 'center'
181+
182+
var modal = document.createElement('div')
183+
modal.id = 'modal'
184+
185+
var modalContent = document.createElement('div')
186+
modal.appendChild(modalContent)
187+
188+
var modalImage = document.createElement('img')
189+
modalContent.appendChild(modalImage)
190+
191+
var span = document.createElement('span')
192+
span.classList.add('close')
193+
span.textContent = 'x'
194+
modal.appendChild(span)
195+
196+
img.onclick = function () {
197+
console.log('clicked')
198+
document.body.appendChild(modal)
199+
modalImage.src = img.src
200+
}
201+
202+
span.onclick = function () {
203+
document.body.removeChild(modal)
204+
}
205+
}
206+
207+
handleImages()
208+
</script>
209+
</body>
210+
</html>

0 commit comments

Comments
 (0)