<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://marovi.ai/index.php?action=history&amp;feed=atom&amp;title=Translations%3AStochastic_Gradient_Descent%2F25%2Fen</id>
	<title>Translations:Stochastic Gradient Descent/25/en - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://marovi.ai/index.php?action=history&amp;feed=atom&amp;title=Translations%3AStochastic_Gradient_Descent%2F25%2Fen"/>
	<link rel="alternate" type="text/html" href="https://marovi.ai/index.php?title=Translations:Stochastic_Gradient_Descent/25/en&amp;action=history"/>
	<updated>2026-04-27T22:02:27Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.39.1</generator>
	<entry>
		<id>https://marovi.ai/index.php?title=Translations:Stochastic_Gradient_Descent/25/en&amp;diff=13061&amp;oldid=prev</id>
		<title>FuzzyBot: Importing a new version from external source</title>
		<link rel="alternate" type="text/html" href="https://marovi.ai/index.php?title=Translations:Stochastic_Gradient_Descent/25/en&amp;diff=13061&amp;oldid=prev"/>
		<updated>2026-04-27T19:42:03Z</updated>

		<summary type="html">&lt;p&gt;Importing a new version from external source&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 19:42, 27 April 2026&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l7&quot;&gt;Line 7:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 7:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;Nesterov accelerated gradient&amp;#039;&amp;#039;&amp;#039; || Evaluates the gradient at a &amp;quot;look-ahead&amp;quot; position || Nesterov, 1983&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;Nesterov accelerated gradient&amp;#039;&amp;#039;&amp;#039; || Evaluates the gradient at a &amp;quot;look-ahead&amp;quot; position || Nesterov, 1983&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Adagrad&lt;/del&gt;&amp;#039;&amp;#039;&amp;#039; || Per-parameter rates that shrink for frequently updated features || Duchi et al., 2011&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{Term|adagrad}}&lt;/ins&gt;&amp;#039;&amp;#039;&amp;#039; || Per-parameter rates that shrink for frequently updated features || Duchi et al., 2011&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;RMSProp&amp;#039;&amp;#039;&amp;#039; || Fixes &lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Adagrad&lt;/del&gt;&amp;#039;s diminishing rates using a moving average of squared gradients || Hinton (lecture notes), 2012&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;RMSProp&amp;#039;&amp;#039;&amp;#039; || Fixes &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{Term|adagrad}}&lt;/ins&gt;&amp;#039;s diminishing rates using a moving average of squared gradients || Hinton (lecture notes), 2012&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;{{Term|Adam}}&amp;#039;&amp;#039;&amp;#039; || Combines {{Term|momentum}} with RMSProp-style adaptive rates || Kingma &amp;amp; Ba, 2015&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;{{Term|Adam}}&amp;#039;&amp;#039;&amp;#039; || Combines {{Term|momentum}} with RMSProp-style adaptive rates || Kingma &amp;amp; Ba, 2015&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|-&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;AdamW&amp;#039;&amp;#039;&amp;#039; || Decouples weight decay from the adaptive gradient step || Loshchilov &amp;amp; Hutter, 2019&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| &amp;#039;&amp;#039;&amp;#039;AdamW&amp;#039;&amp;#039;&amp;#039; || Decouples &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;{{Term|&lt;/ins&gt;weight decay&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;}} &lt;/ins&gt;from the adaptive gradient step || Loshchilov &amp;amp; Hutter, 2019&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|}&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|}&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;

&lt;!-- diff cache key mediawiki:diff::1.12:old-3329:rev-13061 --&gt;
&lt;/table&gt;</summary>
		<author><name>FuzzyBot</name></author>
	</entry>
	<entry>
		<id>https://marovi.ai/index.php?title=Translations:Stochastic_Gradient_Descent/25/en&amp;diff=3329&amp;oldid=prev</id>
		<title>FuzzyBot: Importing a new version from external source</title>
		<link rel="alternate" type="text/html" href="https://marovi.ai/index.php?title=Translations:Stochastic_Gradient_Descent/25/en&amp;diff=3329&amp;oldid=prev"/>
		<updated>2026-04-27T00:35:51Z</updated>

		<summary type="html">&lt;p&gt;Importing a new version from external source&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
|-&lt;br /&gt;
! Method !! Key idea !! Reference&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;{{Term|momentum|Momentum}}&amp;#039;&amp;#039;&amp;#039; || Accumulates an exponentially decaying moving average of past gradients || Polyak, 1964&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;Nesterov accelerated gradient&amp;#039;&amp;#039;&amp;#039; || Evaluates the gradient at a &amp;quot;look-ahead&amp;quot; position || Nesterov, 1983&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;Adagrad&amp;#039;&amp;#039;&amp;#039; || Per-parameter rates that shrink for frequently updated features || Duchi et al., 2011&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;RMSProp&amp;#039;&amp;#039;&amp;#039; || Fixes Adagrad&amp;#039;s diminishing rates using a moving average of squared gradients || Hinton (lecture notes), 2012&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;{{Term|Adam}}&amp;#039;&amp;#039;&amp;#039; || Combines {{Term|momentum}} with RMSProp-style adaptive rates || Kingma &amp;amp; Ba, 2015&lt;br /&gt;
|-&lt;br /&gt;
| &amp;#039;&amp;#039;&amp;#039;AdamW&amp;#039;&amp;#039;&amp;#039; || Decouples weight decay from the adaptive gradient step || Loshchilov &amp;amp; Hutter, 2019&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>FuzzyBot</name></author>
	</entry>
</feed>