User contributions for Aelmancy
Jump to navigation
Jump to search
11 March 2025
- 01:2501:25, 11 March 2025 diff hist +2 stat946W25 →Retentive Network (RetNet): A Successor to Transformer for Large Language Models
- 01:2301:23, 11 March 2025 diff hist +226 stat946W25 →Retentive Network (RetNet): A Successor to Transformer for Large Language Models
- 00:5400:54, 11 March 2025 diff hist 0 stat946W25 →Topic 10: Linear Attention
- 00:5200:52, 11 March 2025 diff hist +327 stat946W25 →Key Approaches to Linear Attention
- 00:5000:50, 11 March 2025 diff hist +476 N File:retnet comparison.png comparing retnet to other models. from @misc{sun2023retentivenetworksuccessortransformer, title={Retentive Network: A Successor to Transformer for Large Language Models}, author={Yutao Sun and Li Dong and Shaohan Huang and Shuming Ma and Yuqing Xia and Jilong Xue and Jianyong Wang and Furu Wei}, year={2023}, eprint={2307.08621}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2307.08621}, } current
- 00:4000:40, 11 March 2025 diff hist +1,199 stat946W25 →Retentive Network (RetNet): A Successor to Transformer for Large Language Models
10 March 2025
- 19:0519:05, 10 March 2025 diff hist −44 stat946W25 →Retentive Network: A Successor to Transformer for Large Language Models
- 19:0419:04, 10 March 2025 diff hist +325 stat946W25 →Retentive Network: A Successor to Transformer for Large Language Models
- 18:4518:45, 10 March 2025 diff hist 0 stat946W25 →Topic 10: Linear Attention
- 18:4318:43, 10 March 2025 diff hist +181 stat946W25 →Retentive Network: A Successor to Transformer for Large Language Models
- 18:1918:19, 10 March 2025 diff hist +921 stat946W25 No edit summary
- 17:3917:39, 10 March 2025 diff hist +347 N File:retnet impossible triangle.png impossible triangle from @article{sun2023retentive, title={Retentive network: A successor to transformer for large language models}, author={Sun, Yutao and Dong, Li and Huang, Shaohan and Ma, Shuming and Xia, Yuqing and Xue, Jilong and Wang, Jianyong and Wei, Furu}, journal={arXiv preprint arXiv:2307.08621}, year={2023} } current