<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Llm on Jinying Tech Blog</title>
    <link>https://chejinying.com/tech/tags/llm/</link>
    <description>Recent content in Llm on Jinying Tech Blog</description>
    <generator>Hugo</generator>
    <language>en-us</language>
    <lastBuildDate>Sat, 04 Apr 2026 00:08:50 +0800</lastBuildDate>
    <atom:link href="https://chejinying.com/tech/tags/llm/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>LLM Fundamentals</title>
      <link>https://chejinying.com/tech/posts/llm/overview/</link>
      <pubDate>Sat, 04 Apr 2026 00:08:50 +0800</pubDate>
      <guid>https://chejinying.com/tech/posts/llm/overview/</guid>
      <description>&lt;blockquote&gt;&#xA;&lt;p&gt;This post uses many abbreviations (BPE, FFN, RoPE, etc.). See the &lt;a href=&#34;https://chejinying.com/tech/posts/llm/abbreviations/&#34;&gt;LLM Abbreviations Glossary&lt;/a&gt; for a quick reference.&lt;/p&gt;&#xA;&lt;/blockquote&gt;&#xA;&lt;h1 id=&#34;what-is-llm&#34;&gt;What is LLM?&lt;/h1&gt;&#xA;&lt;p&gt;At its core, LLM is a &lt;strong&gt;next-token predictor&lt;/strong&gt;.&lt;/p&gt;&#xA;&lt;p&gt;Given a sequence of tokens, it predicts the most probable next token. By repeating this (autoregressive generation), it produces coherent text.&lt;/p&gt;&#xA;&lt;pre tabindex=&#34;0&#34;&gt;&lt;code&gt;Input:  &amp;#34;The cat sat on the&amp;#34;&#xA;Model:  P(&amp;#34;mat&amp;#34;)=0.23, P(&amp;#34;floor&amp;#34;)=0.18, P(&amp;#34;roof&amp;#34;)=0.07, ...&#xA;Pick:   &amp;#34;mat&amp;#34;&#xA;&lt;/code&gt;&lt;/pre&gt;&lt;p&gt;Here&amp;rsquo;s the high-level pipeline — every concept in this post maps to one of these stages:&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
