% \iffalse meta-comment
%
%% File: l3tl-analysis.dtx
%
% Copyright (C) 2011-2024 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
%
% \title{^^A
%   The \pkg{l3tl-analysis} module\\ Analysing token lists^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2024-03-14}
%
% \maketitle
%
% \begin{documentation}
%
% This module provides functions that are particularly useful in the
% \pkg{l3regex} module for mapping through a token list one \meta{token}
% at a time (including begin-group/end-group tokens).  For
% \cs{tl_analysis_map_inline:Nn} or \cs{tl_analysis_map_inline:nn}, the
% token list is given as an argument; the analogous function
% \cs{peek_analysis_map_inline:n} documented in \pkg{l3token} finds
% tokens in the input stream instead.  In both cases the user provides
% \meta{inline code} that receives three arguments for each
% \meta{token}:
% \begin{itemize}
%   \item \meta{tokens}, which both \texttt{o}-expand and
%     \texttt{e}/\texttt{x}-expand to the \meta{token}. The detailed form of
%     \meta{tokens} may change in later releases.
%   \item \meta{char code}, a decimal representation of the character
%     code of the \meta{token}, $-1$ if it is a control sequence.
%   \item \meta{catcode}, a capital hexadecimal digit which denotes the
%     category code of the \meta{token} (0:~control sequence,
%     1:~begin-group, 2:~end-group, 3:~math shift, 4:~alignment tab,
%     6:~parameter, 7:~superscript, 8:~subscript, A:~space, B:~letter,
%     C:~other, D:~active).  This can be converted to an integer by
%     writing |"|\meta{catcode}.
% \end{itemize}
% In addition, there is a debugging function \cs{tl_analysis_show:n},
% very similar to the \cs[no-index]{ShowTokens} macro from the \pkg{ted} package.
%
% \begin{function}[added = 2021-05-11]
%   {
%     \tl_analysis_show:N, \tl_analysis_show:n,
%     \tl_analysis_log:N, \tl_analysis_log:n
%   }
%   \begin{syntax}
%     \cs{tl_analysis_show:n} \Arg{token list}
%     \cs{tl_analysis_log:n} \Arg{token list}
%   \end{syntax}
%   Displays to the terminal (or log) the detailed decomposition of the
%   \meta{token list} into tokens, showing the category code of each
%   character token, the meaning of control sequences and active
%   characters, and the value of registers.
% \end{function}
%
% \begin{function}[added = 2018-04-09, updated = 2022-03-26]
%   {\tl_analysis_map_inline:nn, \tl_analysis_map_inline:Nn}
%   \begin{syntax}
%     \cs{tl_analysis_map_inline:nn} \Arg{token list} \Arg{inline function}
%   \end{syntax}
%   Applies the \meta{inline function} to each individual \meta{token}
%   in the \meta{token list}. The \meta{inline function} receives three
%   arguments as explained above.  As all other mappings the mapping is
%   done at the current group level, \emph{i.e.}~any local assignments
%   made by the \meta{inline function} remain in effect after the loop.
% \end{function}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3tl-analysis} implementation}
%
%    \begin{macrocode}
%<@@=tl>
%    \end{macrocode}
%
% \subsection{Internal functions}
%
% \begin{variable}{\s_@@}
%   The format used to store token lists internally uses the scan mark
%   \cs{s_@@} as a delimiter.
% \end{variable}
%
% \subsection{Internal format}
%
% The task of the \pkg{l3tl-analysis} module is to convert token lists
% to an internal format which allows us to extract all the relevant
% information about individual tokens (category code, character code),
% as well as reconstruct the token list quickly. This internal format is
% used in \pkg{l3regex} where we need to support arbitrary tokens, and
% it is used in conversion functions in \pkg{l3str-convert}, where we wish to
% support clusters of characters instead of single tokens.
%
% We thus need a way to encode any \meta{token} (even begin-group and
% end-group character tokens) in a way amenable to manipulating tokens
% individually. The best we can do is to find \meta{tokens} which both
% \texttt{o}-expand and \texttt{e}/\texttt{x}-expand to the given
% \meta{token}. Collecting more information about the category code and
% character code is also useful for regular expressions, since most
% regexes are catcode-agnostic. The internal format thus takes the form
% of a succession of items of the form
% \begin{quote}
%   \meta{tokens} \cs{s_@@} \meta{catcode} \meta{char code} \cs{s_@@}
% \end{quote}
% The \meta{tokens} \texttt{o}- \emph{and} \texttt{e}/\texttt{x}-expand to the
% original token in the token list or to the cluster of tokens
% corresponding to one Unicode character in the given encoding (for
% \pkg{l3str-convert}). The \meta{catcode} is given as a single hexadecimal
% digit, $0$ for control sequences. The \meta{char code} is given as a
% decimal number, $-1$ for control sequences.
%
% Using delimited arguments lets us build the \meta{tokens}
% progressively when doing an encoding conversion in \pkg{l3str-convert}. On the
% other hand, the delimiter \cs{s_@@} may not appear unbraced in
% \meta{tokens}. This is not a problem because we are careful to wrap
% control sequences in braces (as an argument to \cs{exp_not:n}) when
% converting from a general token list to the internal format.
%
% The current rule for converting a \meta{token} to a balanced set of
% \meta{tokens} which both \texttt{o}-expands and \texttt{e}/\texttt{x}-expands to
% it is the following.
% \begin{itemize}
%   \item A control sequence |\cs| becomes |\exp_not:n { \cs }|
%     \cs{s_@@} $0$ $-1$ \cs{s_@@}.
%   \item A begin-group character |{| becomes \cs{exp_after:wN} |{|
%     \cs{if_false:} |}| \cs{fi:} \cs{s_@@} $1$ \meta{char code}
%     \cs{s_@@}.
%   \item An end-group character |}| becomes \cs{if_false:} |{| \cs{fi:}
%     |}| \cs{s_@@} $2$ \meta{char code} \cs{s_@@}.
%   \item A character with any other category code becomes
%     \cs{exp_not:n} \Arg{character} \cs{s_@@} \meta{hex catcode}
%     \meta{char code} \cs{s_@@}.
% \end{itemize}
% In contrast, for \cs{peek_analysis_map_inline:n} we must allow for an
% input stream containing \tn{outer} macros, so that wrapping all
% control sequences in \cs{exp_not:n} is unsafe.  Instead, we write the
% more elaborate \cs{__kernel_exp_not:w} \cs{exp_after:wN} |{|
% \cs{exp_not:N} |\cs| |}|.  (On the other hand we make a better effort
% by avoiding \cs{exp_not:n} for characters other than active and macro
% parameters.)
%
%    \begin{macrocode}
%<*package>
%    \end{macrocode}
%
% \subsection{Variables and helper functions}
%
% \begin{variable}{\s_@@}
%   The scan mark \cs{s_@@} is used as a delimiter in the internal
%   format. This is more practical than using a quark, because we would
%   then need to control expansion much more carefully: compare
%   \cs{int_value:w} |`#1| \cs{s_@@} with \cs{int_value:w} |`#1|
%   \cs{exp_stop_f:} \cs{exp_not:N} \cs{q_mark} to extract a character
%   code followed by the delimiter in an \texttt{e}-expansion.
%    \begin{macrocode}
\scan_new:N \s_@@
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}
%   {\l_@@_analysis_token, \l_@@_analysis_char_token}
%   The tokens in the token list are probed with the \TeX{} primitive
%   \tn{futurelet}. We use \cs{l_@@_analysis_token} in that
%   construction. In some cases, we convert the following token to a
%   string before probing it: then the token variable used is
%   \cs{l_@@_analysis_char_token}.
%    \begin{macrocode}
\cs_new_eq:NN \l_@@_analysis_token ?
\cs_new_eq:NN \l_@@_analysis_char_token ?
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_peek_code_tl}
%   Holds some code to be run once the next token has been fully
%   analysed in \cs{peek_analysis_map_inline:n}.
%    \begin{macrocode}
\tl_new:N \l_@@_peek_code_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\c_@@_peek_catcodes_tl}
%   A token list containing the character number~$32$ (space) with all
%   possible category codes except $1$ and $2$ (begin-group and
%   end-group).  Why $32$?  Because some \LuaTeX{} versions only allow
%   creation of catcode~$10$ (space) tokens with this character code, so
%   that we decided to make \cs{char_generate:nn} refuse to create such
%   weird spaces as well.  We do not include the macro parameter case
%   (catcode~$6$) because it cannot be used as a macro delimiter.
%    \begin{macrocode}
\group_begin:
\char_set_active_eq:NN \  \scan_stop:
\tl_const:Ne \c_@@_peek_catcodes_tl
  {
    \char_generate:nn { 32 } { 3 }   3
    \char_generate:nn { 32 } { 4 }   4
    \char_generate:nn { 32 } { 7 }   7
    \char_generate:nn { 32 } { 8 }   8
    \c_space_tl                     \token_to_str:N A
    \char_generate:nn { 32 } { 11 } \token_to_str:N B
    \char_generate:nn { 32 } { 12 } \token_to_str:N C
    \char_generate:nn { 32 } { 13 } \token_to_str:N D
  }
\group_end:
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_analysis_normal_int}
%   The number of normal (\texttt{N}-type argument) tokens since the
%   last special token.
%    \begin{macrocode}
\int_new:N \l_@@_analysis_normal_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_analysis_index_int}
%   During the first pass, this is the index in the array being built.
%   During the second pass, it is equal to the maximum index in the
%   array from the first pass.
%    \begin{macrocode}
\int_new:N \l_@@_analysis_index_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_analysis_nesting_int}
%   Nesting depth of explicit begin-group and end-group characters
%   during the first pass. This lets us detect the end of the token list
%   without a reserved end-marker.
%    \begin{macrocode}
\int_new:N \l_@@_analysis_nesting_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_analysis_type_int}
%   When encountering special characters, we record their \enquote{type}
%   in this integer.
%    \begin{macrocode}
\int_new:N \l_@@_analysis_type_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_analysis_result_tl}
%   The result of the conversion is stored in this token list, with a
%   succession of items of the form
%   \begin{quote}
%     \meta{tokens} \cs{s_@@} \meta{catcode} \meta{char code} \cs{s_@@}
%   \end{quote}
%    \begin{macrocode}
\tl_new:N \g_@@_analysis_result_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}[EXP]{\@@_analysis_extract_charcode:}
% \begin{macro}[EXP]{\@@_analysis_extract_charcode_aux:w}
%   Extracting the character code from the meaning of
%   \cs{l_@@_analysis_token}. This has no error checking, and should
%   only be assumed to work for begin-group and end-group character
%   tokens. It produces a number in the form |`|\meta{char}.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_extract_charcode:
  {
    \exp_after:wN \@@_analysis_extract_charcode_aux:w
      \token_to_meaning:N \l_@@_analysis_token
  }
\cs_new:Npn \@@_analysis_extract_charcode_aux:w #1 ~ #2 ~ { ` }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_cs_space_count:NN}
% \begin{macro}[EXP]{\@@_analysis_cs_space_count:w}
% \begin{macro}[EXP]{\@@_analysis_cs_space_count_end:w}
%   Counts the number of spaces in the string representation of its
%   second argument, as well as the number of characters following the
%   last space in that representation, and feeds the two numbers as
%   semicolon-delimited arguments to the first argument. When this
%   function is used, the escape character is printable and non-space.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_cs_space_count:NN #1 #2
  {
    \exp_after:wN #1
    \int_value:w \int_eval:w 0
      \exp_after:wN \@@_analysis_cs_space_count:w
        \token_to_str:N #2
        \fi: \@@_analysis_cs_space_count_end:w ; ~ !
  }
\cs_new:Npn \@@_analysis_cs_space_count:w #1 ~
  {
    \if_false: #1 #1 \fi:
    + 1
    \@@_analysis_cs_space_count:w
  }
\cs_new:Npn \@@_analysis_cs_space_count_end:w ; #1 \fi: #2 !
  { \exp_after:wN ; \int_value:w \str_count_ignore_spaces:n {#1} ; }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Plan of attack}
%
% Our goal is to produce a token list of the form roughly
% \begin{quote}
%   \meta{token 1} \cs{s_@@} \meta{catcode 1} \meta{char code 1} \cs{s_@@} \\
%   \meta{token 2} \cs{s_@@} \meta{catcode 2} \meta{char code 2} \cs{s_@@} \\
%   \ldots{}
%   \meta{token N} \cs{s_@@} \meta{catcode N} \meta{char code N} \cs{s_@@}
% \end{quote}
% Most but not all tokens can be grabbed as an undelimited
% (\texttt{N}-type) argument by \TeX{}. The plan is to have a two pass
% system. In the first pass, locate special tokens, and store them in
% various \tn{toks} registers. In the second pass, which is done within
% an \texttt{e}-expanding assignment, normal tokens are taken in as
% \texttt{N}-type arguments, and special tokens are retrieved from the
% \tn{toks} registers, and removed from the input stream by some means.
% The whole process takes linear time, because we avoid building the
% result one item at a time.
%
% We make the escape character printable (backslash, but this later
% oscillates between slash and backslash): this allows us to
% distinguish characters from control sequences.
%
% A token has two characteristics: its \tn{meaning}, and what it looks
% like for \TeX{} when it is in scanning mode (\emph{e.g.}, when
% capturing parameters for a macro). For our purposes, we distinguish
% the following meanings:
% \begin{itemize}
%   \item begin-group token (category code $1$), either space (character
%     code $32$), or non-space;
%   \item end-group token (category code $2$), either space (character
%     code $32$), or non-space;
%   \item space token (category code $10$, character code $32$);
%   \item anything else (then the token is always an \texttt{N}-type
%     argument).
% \end{itemize}
% The token itself can \enquote{look like} one of the following
% \begin{itemize}
%   \item a non-active character, in which case its meaning is
%     automatically that associated to its character code and category
%     code, we call it \enquote{true} character;
%   \item an active character;
%   \item a control sequence.
% \end{itemize}
% The only tokens which are not valid \texttt{N}-type arguments are true
% begin-group characters, true end-group characters, and true spaces.
% We detect those characters by scanning ahead with \tn{futurelet},
% then distinguishing true characters from control sequences set equal
% to them using the \tn{string} representation.
%
% The second pass is a simple exercise in expandable loops.
%
% \begin{macro}{\@@_analysis:n}
%   Everything is done within a group, and all definitions are
%   local. We use \cs{group_align_safe_begin/end:} to avoid problems in
%   case \cs{@@_analysis:n} is used within an alignment and its argument
%   contains alignment tab tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis:n #1
  {
    \group_begin:
      \group_align_safe_begin:
        \@@_analysis_a:n {#1}
        \@@_analysis_b:n {#1}
      \group_align_safe_end:
    \group_end:
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Disabling active characters}
%
% \begin{macro}{\@@_analysis_disable:n}
%   Active characters can cause problems later on in the processing, so
%   we provide a way to disable them, by setting them to
%   \texttt{undefined}. Since Unicode contains too many characters to
%   loop over all of them, we instead do this whenever we encounter a
%   character.  For \pTeX{} and \upTeX{} we skip characters beyond
%   $[0,255]$ because \tn{lccode} only allows those values.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_active:N \^^@
  \cs_new_protected:Npn \@@_analysis_disable:n #1
    {
      \tex_lccode:D 0 = #1 \exp_stop_f:
      \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
    }
  \bool_lazy_or:nnT
    { \sys_if_engine_ptex_p: }
    { \sys_if_engine_uptex_p: }
    {
      \cs_gset_protected:Npn \@@_analysis_disable:n #1
        {
          \if_int_compare:w 256 > #1 \exp_stop_f:
            \tex_lccode:D 0 = #1 \exp_stop_f:
            \tex_lowercase:D { \tex_let:D ^^@ } \tex_undefined:D
          \fi:
        }
    }
\group_end:
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_analysis_disable_char:N}
%   Similar to \cs{@@_analysis_disable:n}, but it receives a normal
%   character token, tests if that token is active (by turning it into
%   a space: the active space has been undefined at this point), and
%   if so, disables it.  Even if the character is active and set equal
%   to a primitive conditional, nothing blows up.
%   Again, in \pTeX{} and \upTeX{} we skip characters beyond $[0,255]$,
%   which cannot be active anyways.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_active:N \^^@
  \cs_new_protected:Npn \@@_analysis_disable_char:N #1
    {
      \tex_lccode:D `#1 = 32 \exp_stop_f:
      \tex_lowercase:D { \if_meaning:w #1 } \tex_undefined:D
        \tex_let:D #1 \tex_undefined:D
      \fi:
    }
  \bool_lazy_or:nnT
    { \sys_if_engine_ptex_p: }
    { \sys_if_engine_uptex_p: }
    {
      \cs_gset_protected:Npn \@@_analysis_disable_char:N #1
        {
          \if_int_compare:w 256 > `#1 \exp_stop_f:
            \tex_lccode:D `#1 = 32 \exp_stop_f:
            \tex_lowercase:D { \if_meaning:w #1 } \tex_undefined:D
              \tex_let:D #1 \tex_undefined:D
            \fi:
          \fi:
        }
    }
\group_end:
%    \end{macrocode}
% \end{macro}
%
% \subsection{First pass}
%
% The goal of this pass is to detect special (non-\texttt{N}-type) tokens,
% and count how many \texttt{N}-type tokens lie between special tokens.
% Also, we wish to store some representation of each special token
% in a \tn{toks} register.
%
% We have $11$ types of tokens:
% \begin{itemize}
% \item[1.] a true non-space begin-group character;
% \item[2.] a true space begin-group character;
% \item[3.] a true non-space end-group character;
% \item[4.] a true space end-group character;
% \item[5.] a true space blank space character;
% \item[6.] an active character;
% \item[7.] any other true character;
% \item[8.] a control sequence equal to a begin-group token (category code $1$);
% \item[9.] a control sequence equal to an end-group token (category code $2$);
% \item[10.] a control sequence equal to a space token
%   (character code $32$, category code $10$);
% \item[11.] any other control sequence.
% \end{itemize}
% Our first tool is \tn{futurelet}. This cannot distinguish
% case $8$ from $1$ or $2$, nor case $9$ from $3$ or $4$,
% nor case $10$ from case $5$. Those cases are later distinguished
% by applying the \tn{string} primitive to the following token,
% after possibly changing the escape character to ensure that
% a control sequence's string representation cannot be mistaken
% for the true character.
%
% In cases $6$, $7$, and $11$, the following token is a valid
% \texttt{N}-type argument, so we grab it and distinguish the case
% of a character from a control sequence: in the latter case,
% \cs{str_tail:n} \Arg{token} is non-empty, because the
% escape character is printable.
%
% \begin{macro}{\@@_analysis_a:n}
%   We read tokens one by one using \tn{futurelet}.
%   While performing the loop, we keep track of the number of
%   true begin-group characters minus the number of
%   true end-group characters in \cs{l_@@_analysis_nesting_int}.
%   This reaches $-1$ when we read the closing brace.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a:n #1
  {
    \@@_analysis_disable:n { 32 }
    \int_set:Nn \tex_escapechar:D { 92 }
    \int_zero:N \l_@@_analysis_normal_int
    \int_zero:N \l_@@_analysis_index_int
    \int_zero:N \l_@@_analysis_nesting_int
    \if_false: { \fi: \@@_analysis_a_loop:w #1 }
    \int_decr:N \l_@@_analysis_index_int
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_loop:w}
%   Read one character and check its type.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a_loop:w
  { \tex_futurelet:D \l_@@_analysis_token \@@_analysis_a_type:w }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_type:w}
%   At this point, \cs{l_@@_analysis_token} holds the meaning
%   of the following token. We store in \cs{l_@@_analysis_type_int}
%   information about the meaning of the token ahead:
%   \begin{itemize}
%   \item 0 space token;
%   \item 1 begin-group token;
%   \item -1 end-group token;
%   \item 2 other.
%   \end{itemize}
%   The values $0$, $1$, $-1$ correspond to how much a true such
%   character changes the nesting level ($2$ is used only here,
%   and is irrelevant later). Then call the auxiliary for each case.
%   Note that nesting conditionals here is safe because we only skip
%   over \cs{l_@@_analysis_token} if it matches with one of the
%   character tokens (hence is not a primitive conditional).
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a_type:w
  {
    \l_@@_analysis_type_int =
      \if_meaning:w \l_@@_analysis_token \c_space_token
        0
      \else:
        \if_catcode:w \exp_not:N \l_@@_analysis_token \c_group_begin_token
          1
        \else:
          \if_catcode:w \exp_not:N \l_@@_analysis_token \c_group_end_token
            - 1
          \else:
            2
          \fi:
        \fi:
      \fi:
      \exp_stop_f:
    \if_case:w \l_@@_analysis_type_int
         \exp_after:wN \@@_analysis_a_space:w
    \or: \exp_after:wN \@@_analysis_a_bgroup:w
    \or: \exp_after:wN \@@_analysis_a_safe:N
    \else: \exp_after:wN \@@_analysis_a_egroup:w
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_space:w}
% \begin{macro}{\@@_analysis_a_space_test:w}
%   In this branch, the following token's meaning is a blank space.
%   Apply \tn{string} to that token: a true blank space gives a space, a
%   control sequence gives a result starting with the escape character,
%   an active character gives something else than a space since we
%   disabled the space.  We grab as \cs{l_@@_analysis_char_token} the first
%   character of the string representation then test it in
%   \cs{@@_analysis_a_space_test:w}.
%   Also, since \cs{@@_analysis_a_store:} expects the special token to be
%   stored in the relevant \tn{toks} register, we do that. The extra
%   \cs{exp_not:n} is unnecessary of course, but it makes the treatment
%   of all tokens more homogeneous.
%   If we discover that the next token was actually a control sequence
%   or an active character
%   instead of a true space, then we step the counter of normal tokens.
%   We now have in front of us the whole string representation of
%   the control sequence, including potential spaces; those will appear
%   to be true spaces later in this pass. Hence, all other branches of
%   the code in this first pass need to consider the string representation,
%   so that the second pass does not need to test the meaning of tokens,
%   only strings.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a_space:w
  {
    \tex_afterassignment:D \@@_analysis_a_space_test:w
    \exp_after:wN \cs_set_eq:NN
    \exp_after:wN \l_@@_analysis_char_token
    \token_to_str:N
  }
\cs_new_protected:Npn \@@_analysis_a_space_test:w
  {
    \if_meaning:w \l_@@_analysis_char_token \c_space_token
      \tex_toks:D \l_@@_analysis_index_int { \exp_not:n { ~ } }
      \@@_analysis_a_store:
    \else:
      \int_incr:N \l_@@_analysis_normal_int
    \fi:
    \@@_analysis_a_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_bgroup:w, \@@_analysis_a_egroup:w}
% \begin{macro}
%   {\@@_analysis_a_group:nw, \@@_analysis_a_group_aux:w, \@@_analysis_a_group_auxii:w, \@@_analysis_a_group_test:w}
%   The token is most likely a true character token with catcode $1$ or
%   $2$, but it might be a control sequence, or an active character.
%   Optimizing for the first case, we store in a toks register some code
%   that expands to that token.  Since we will turn what follows into
%   a string, we make sure the escape character is different from the
%   current character code (by switching between solidus and backslash).
%   To detect the special case of an active character let to the catcode
%   $1$ or~$2$ character with the same character code, we disable the
%   active character with that character code and re-test: if the
%   following token has become undefined we can in fact safely grab it.
%   We are finally ready to turn what follows to a string and test it.
%   This is one place where we need \cs{l_@@_analysis_char_token} to be a
%   separate control sequence from \cs{l_@@_analysis_token}, to compare them.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_group_begin:N \^^@ % {
  \cs_new_protected:Npn \@@_analysis_a_bgroup:w
    { \@@_analysis_a_group:nw { \exp_after:wN ^^@ \if_false: } \fi: } }
  \char_set_catcode_group_end:N \^^@
  \cs_new_protected:Npn \@@_analysis_a_egroup:w
    { \@@_analysis_a_group:nw { \if_false: { \fi: ^^@ } } % }
\group_end:
\cs_new_protected:Npn \@@_analysis_a_group:nw #1
  {
    \tex_lccode:D 0 = \@@_analysis_extract_charcode: \scan_stop:
    \tex_lowercase:D { \tex_toks:D \l_@@_analysis_index_int {#1} }
    \if_int_compare:w \tex_lccode:D 0 = \tex_escapechar:D
      \int_set:Nn \tex_escapechar:D { 139 - \tex_escapechar:D }
    \fi:
    \@@_analysis_disable:n { \tex_lccode:D 0 }
    \tex_futurelet:D \l_@@_analysis_token \@@_analysis_a_group_aux:w
  }
\cs_new_protected:Npn \@@_analysis_a_group_aux:w
  {
    \if_meaning:w \l_@@_analysis_token \tex_undefined:D
      \exp_after:wN \@@_analysis_a_safe:N
    \else:
      \exp_after:wN \@@_analysis_a_group_auxii:w
    \fi:
  }
\cs_new_protected:Npn \@@_analysis_a_group_auxii:w
  {
    \tex_afterassignment:D \@@_analysis_a_group_test:w
    \exp_after:wN \cs_set_eq:NN
    \exp_after:wN \l_@@_analysis_char_token
    \token_to_str:N
  }
\cs_new_protected:Npn \@@_analysis_a_group_test:w
  {
    \if_charcode:w \l_@@_analysis_token \l_@@_analysis_char_token
      \@@_analysis_a_store:
    \else:
      \int_incr:N \l_@@_analysis_normal_int
    \fi:
    \@@_analysis_a_loop:w
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_store:}
%   This function is called each time we meet a special token;
%   at this point, the \tn{toks} register \cs{l_@@_analysis_index_int}
%   holds a token list which expands to the given special token.
%   Also, the value of \cs{l_@@_analysis_type_int} indicates which case
%   we are in:
%   \begin{itemize}
%   \item -1 end-group character;
%   \item 0 space character;
%   \item 1 begin-group character.
%   \end{itemize}
%   We need to distinguish further the case of a space character
%   (code $32$) from other character codes, because those
%   behave differently in the second pass. Namely, after testing
%   the \tn{lccode} of $0$ (which holds the present character code)
%   we change the cases above to
%   \begin{itemize}
%   \item -2 space end-group character;
%   \item -1 non-space end-group character;
%   \item 0 space blank space character;
%   \item 1 non-space begin-group character;
%   \item 2 space begin-group character.
%   \end{itemize}
%   This has the property that non-space characters correspond to odd
%   values of \cs{l_@@_analysis_type_int}.  The number of normal tokens until
%   here and the type of special token are packed into a \tn{skip}
%   register.  Finally, we check whether we reached the last closing
%   brace, in which case we stop by disabling the looping function
%   (locally).
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a_store:
  {
    \tex_advance:D \l_@@_analysis_nesting_int \l_@@_analysis_type_int
    \if_int_compare:w \tex_lccode:D 0 = `\ \exp_stop_f:
      \tex_advance:D \l_@@_analysis_type_int \l_@@_analysis_type_int
    \fi:
    \tex_skip:D \l_@@_analysis_index_int
      = \l_@@_analysis_normal_int sp
         plus \l_@@_analysis_type_int sp \scan_stop:
    \int_incr:N \l_@@_analysis_index_int
    \int_zero:N \l_@@_analysis_normal_int
    \if_int_compare:w \l_@@_analysis_nesting_int = - \c_one_int
      \cs_set_eq:NN \@@_analysis_a_loop:w \scan_stop:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_analysis_a_safe:N}
% \begin{macro}{\@@_analysis_a_cs:ww}
%   This should be the simplest case: since the upcoming token is safe,
%   we can simply grab it in a second pass. If the token is a single
%   character (including space), the \cs{if_charcode:w} test yields
%   true; we disable a potentially active character (that could
%   otherwise masquerade as the true character in the next pass) and we
%   count one \enquote{normal} token. On the other
%   hand, if the token is a control sequence, we should replace it by
%   its string representation for compatibility with other code
%   branches. Instead of slowly looping through the characters with
%   the main code, we use the knowledge of how the second pass works:
%   if the control sequence name contains no space, count that token
%   as a number of normal tokens equal to its string length. If the
%   control sequence contains spaces, they should be registered as
%   special characters by increasing \cs{l_@@_analysis_index_int}
%   (no need to carefully count character between each space), and
%   all characters after the last space should be counted in the
%   following sequence of \enquote{normal} tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_a_safe:N #1
  {
    \if_charcode:w
        \scan_stop:
        \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
        \scan_stop:
      \exp_after:wN \use_i:nn
    \else:
      \exp_after:wN \use_ii:nn
    \fi:
      {
        \@@_analysis_disable_char:N #1
        \int_incr:N \l_@@_analysis_normal_int
      }
      { \@@_analysis_cs_space_count:NN \@@_analysis_a_cs:ww #1 }
    \@@_analysis_a_loop:w
  }
\cs_new_protected:Npn \@@_analysis_a_cs:ww #1; #2;
  {
    \if_int_compare:w #1 > \c_zero_int
      \tex_skip:D \l_@@_analysis_index_int
        = \int_eval:n { \l_@@_analysis_normal_int + 1 } sp \exp_stop_f:
      \tex_advance:D \l_@@_analysis_index_int #1 \exp_stop_f:
    \else:
      \tex_advance:D
    \fi:
    \l_@@_analysis_normal_int #2 \exp_stop_f:
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \subsection{Second pass}
%
% The second pass is an exercise in expandable loops.
% All the necessary information is stored in \tn{skip}
% and \tn{toks} registers.
%
% \begin{macro}{\@@_analysis_b:n}
% \begin{macro}[EXP]{\@@_analysis_b_loop:w}
%   Start the loop with the index $0$. No need for an end-marker:
%   the loop stops by itself when the last index is read.
%   We repeatedly oscillate between reading long stretches
%   of normal tokens, and reading special tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_analysis_b:n #1
  {
    \__kernel_tl_gset:Nx \g_@@_analysis_result_tl
      {
        \@@_analysis_b_loop:w 0; #1
        \prg_break_point:
      }
  }
\cs_new:Npn \@@_analysis_b_loop:w #1;
  {
    \exp_after:wN \@@_analysis_b_normals:ww
      \int_value:w \tex_skip:D #1 ; #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_b_normals:ww}
% \begin{macro}[EXP]{\@@_analysis_b_normal:wwN}
%   The first argument is the number of normal tokens which remain
%   to be read, and the second argument is the index in the array
%   produced in the first step.
%   A character's string representation is always one character long,
%   while a control sequence is always longer (we have set the escape
%   character to a printable value). In both cases, we leave
%   \cs{exp_not:n} \Arg{token} \cs{s_@@} in the input stream
%   (after \texttt{e}-expansion). Here, \cs{exp_not:n} is used
%   rather than \cs{exp_not:N} because |#3| could be
%   a macro parameter character or could be \cs{s_@@}
%   (which must be hidden behind braces in the result).
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_b_normals:ww #1;
  {
    \if_int_compare:w #1 = \c_zero_int
      \@@_analysis_b_special:w
    \fi:
    \@@_analysis_b_normal:wwN #1;
  }
\cs_new:Npn \@@_analysis_b_normal:wwN #1; #2; #3
  {
    \exp_not:n { \exp_not:n { #3 } } \s_@@
    \if_charcode:w
        \scan_stop:
        \exp_after:wN \use_none:n \token_to_str:N #3 \prg_do_nothing:
        \scan_stop:
      \exp_after:wN \@@_analysis_b_char:Nn
      \exp_after:wN \@@_analysis_b_char_aux:nww
    \else:
      \exp_after:wN \@@_analysis_b_cs:Nww
    \fi:
    #3 #1; #2;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_b_char:Nn, \@@_analysis_b_char_aux:nww}
%   This function is called here with arguments
%   \cs{@@_analysis_b_char_aux:nww} and a normal character, while in the
%   peek analysis code it is called with \cs{use_none:n} and possibly a
%   space character, which is why the function has signature |Nn|.
%   If the normal token we grab is a character, leave
%   \meta{catcode} \meta{charcode} followed by \cs{s_@@}
%   in the input stream, and call \cs{@@_analysis_b_normals:ww}
%   with its first argument decremented.
%    \begin{macrocode}
\cs_new:Npe \@@_analysis_b_char:Nn #1#2
  {
    \exp_not:N \if_meaning:w #2 \exp_not:N \tex_undefined:D
      \token_to_str:N D \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_catcode_other_token
      \token_to_str:N C \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_catcode_letter_token
      \token_to_str:N B \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_math_toggle_token      3
      \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_alignment_token        4
      \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_math_superscript_token 7
      \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_math_subscript_token   8
      \exp_not:N \else:
    \exp_not:N \if_catcode:w #2 \c_space_token
      \token_to_str:N A \exp_not:N \else:
      6
    \exp_not:n { \fi: \fi: \fi: \fi: \fi: \fi: \fi: \fi: }
    #1 {#2}
  }
\cs_new:Npn \@@_analysis_b_char_aux:nww #1
  {
    \int_value:w `#1 \s_@@
    \exp_after:wN \@@_analysis_b_normals:ww
      \int_value:w \int_eval:w - 1 +
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_b_cs:Nww}
% \begin{macro}[EXP]{\@@_analysis_b_cs_test:ww}
%   If the token we grab is a control sequence, leave
%   |0 -1| (as category code and character code) in the input stream,
%   followed by \cs{s_@@},
%   and call \cs{@@_analysis_b_normals:ww} with updated arguments.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_b_cs:Nww #1
  {
    0 -1 \s_@@
    \@@_analysis_cs_space_count:NN \@@_analysis_b_cs_test:ww #1
  }
\cs_new:Npn \@@_analysis_b_cs_test:ww #1 ; #2 ; #3 ; #4 ;
  {
    \exp_after:wN \@@_analysis_b_normals:ww
    \int_value:w \int_eval:w
    \if_int_compare:w #1 = \c_zero_int
      #3
    \else:
      \tex_skip:D \int_eval:n { #4 + #1 } \exp_stop_f:
    \fi:
    - #2
    \exp_after:wN ;
    \int_value:w \int_eval:n { #4 + #1 } ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_b_special:w}
% \begin{macro}[EXP]{\@@_analysis_b_special_char:wN}
% \begin{macro}[EXP]{\@@_analysis_b_special_space:w}
%   Here, |#1| is the current index in the array built in the first pass.
%   Check now whether we reached the end (we shouldn't keep the trailing
%   end-group character that marked the end of the token list in the
%   first pass).
%   Unpack the \tn{toks} register: when \texttt{e}/\texttt{x}-expanding again,
%   we will get the special token.
%   Then leave the category code in the input stream, followed by
%   the character code, and call \cs{@@_analysis_b_loop:w} with the next index.
%    \begin{macrocode}
\group_begin:
  \char_set_catcode_other:N A
  \cs_new:Npn \@@_analysis_b_special:w
      \fi: \@@_analysis_b_normal:wwN 0 ; #1 ;
    {
      \fi:
      \if_int_compare:w #1 = \l_@@_analysis_index_int
        \exp_after:wN \prg_break:
      \fi:
      \tex_the:D \tex_toks:D #1 \s_@@
      \if_case:w \tex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
             \token_to_str:N A
      \or:   1
      \or:   1
      \else: 2
      \fi:
      \if_int_odd:w \tex_gluestretch:D \tex_skip:D #1 \exp_stop_f:
        \exp_after:wN \@@_analysis_b_special_char:wN \int_value:w
      \else:
        \exp_after:wN \@@_analysis_b_special_space:w \int_value:w
      \fi:
      \int_eval:n { 1 + #1 } \exp_after:wN ;
      \token_to_str:N
    }
\group_end:
\cs_new:Npn \@@_analysis_b_special_char:wN #1 ; #2
  {
    \int_value:w `#2 \s_@@
    \@@_analysis_b_loop:w #1 ;
  }
\cs_new:Npn \@@_analysis_b_special_space:w #1 ; ~
  {
    32 \s_@@
    \@@_analysis_b_loop:w #1 ;
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Mapping through the analysis}
%
% \begin{macro}{\tl_analysis_map_inline:Nn, \tl_analysis_map_inline:nn}
% \begin{macro}{\@@_analysis_map:Nn}
% \begin{macro}{\@@_analysis_map:NwNw}
%   First obtain the analysis of the token list into
%   \cs{g_@@_analysis_result_tl}. To allow nested mappings, increase the
%   nesting depth \cs{g__kernel_prg_map_int} (shared between all
%   modules), then define the payload macro, which runs the user code
%   and has a name specific to that nesting depth. The looping macro
%   grabs the \meta{tokens}, \meta{catcode} and \meta{char code}; it
%   checks for the end of the loop with \cs{use_none:n} |##2|, normally
%   empty, but which becomes \cs{tl_map_break:} at the end; it then
%   calls the payload macro with the arguments in the correct order
%   (this is the reason why we cannot directly use the same macro for
%   looping and payload), and loops by calling itself. When the loop
%   ends, remember to decrease the nesting depth.
%    \begin{macrocode}
\cs_new_protected:Npn \tl_analysis_map_inline:Nn #1
  { \exp_args:No \tl_analysis_map_inline:nn #1 }
\cs_new_protected:Npn \tl_analysis_map_inline:nn #1
  {
    \@@_analysis:n {#1}
    \int_gincr:N \g__kernel_prg_map_int
    \exp_args:Nc \@@_analysis_map:Nn
      { @@_analysis_map_inline_ \int_use:N \g__kernel_prg_map_int :wNw }
  }
\cs_new_protected:Npn \@@_analysis_map:Nn #1#2
  {
    \cs_gset_protected:Npn #1 ##1##2##3 {#2}
    \exp_after:wN \@@_analysis_map:NwNw \exp_after:wN #1
      \g_@@_analysis_result_tl
      \s_@@ { ? \tl_map_break: } \s_@@
    \prg_break_point:Nn \tl_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
\cs_new_protected:Npn \@@_analysis_map:NwNw #1 #2 \s_@@ #3 #4 \s_@@
  {
    \use_none:n #3
    #1 {#2} {#4} {#3}
    \@@_analysis_map:NwNw #1
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Showing the results}
%
% \begin{macro}{\tl_analysis_show:N, \tl_analysis_log:N, \@@_analysis_show:NNN}
%   Add to \cs{@@_analysis:n} a third pass to display tokens to the terminal.
%   If the token list variable is not defined, throw the same error
%   as \cs{tl_show:N} by simply calling that function.
%    \begin{macrocode}
\cs_new_protected:Npn \tl_analysis_show:N
  { \@@_analysis_show:NNN \msg_show:nneeee \tl_show:N }
\cs_new_protected:Npn \tl_analysis_log:N
  { \@@_analysis_show:NNN \msg_log:nneeee \tl_log:N }
\cs_new_protected:Npn \@@_analysis_show:NNN #1#2#3
  {
    \tl_if_exist:NTF #3
      {
        \exp_args:No \@@_analysis:n {#3}
        #1 { tl } { show-analysis }
          { \token_to_str:N #3 } { \@@_analysis_show: } { } { }
      }
      { #2 #3 }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\tl_analysis_show:n, \tl_analysis_log:n, \@@_analysis_show:Nn}
%   No existence test needed here.
%    \begin{macrocode}
\cs_new_protected:Npn \tl_analysis_show:n
  { \@@_analysis_show:Nn \msg_show:nneeee }
\cs_new_protected:Npn \tl_analysis_log:n
  { \@@_analysis_show:Nn \msg_log:nneeee }
\cs_new_protected:Npn \@@_analysis_show:Nn #1#2
  {
    \@@_analysis:n {#2}
    #1 { tl } { show-analysis } { } { \@@_analysis_show: } { } { }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[rEXP]{\@@_analysis_show:, \@@_analysis_show_loop:wNw}
%   Here, |#1| \texttt{o}- and \texttt{e}/\texttt{x}-expands to the token;
%   |#2| is the category code (one uppercase hexadecimal digit),
%   $0$ for control sequences;
%   |#3| is the character code, which we ignore.
%   In the cases of control sequences and active characters,
%   the meaning may overflow one line, and we want to truncate
%   it. Those cases are thus separated out.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_show:
  {
    \exp_after:wN \@@_analysis_show_loop:wNw \g_@@_analysis_result_tl
    \s_@@ { ? \prg_break: } \s_@@
    \prg_break_point:
  }
\cs_new:Npn \@@_analysis_show_loop:wNw #1 \s_@@ #2 #3 \s_@@
  {
    \use_none:n #2
    \iow_newline: > \use:nn { ~ } { ~ }
    \if_int_compare:w "#2 = \c_zero_int
      \exp_after:wN \@@_analysis_show_cs:n
    \else:
      \if_int_compare:w "#2 = 13 \exp_stop_f:
        \exp_after:wN \exp_after:wN
        \exp_after:wN \@@_analysis_show_active:n
      \else:
        \exp_after:wN \exp_after:wN
        \exp_after:wN \@@_analysis_show_normal:n
      \fi:
    \fi:
    {#1}
    \@@_analysis_show_loop:wNw
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[rEXP]{\@@_analysis_show_normal:n}
%   Non-active characters are a simple matter of printing
%   the character, and its meaning. Our test suite checks that
%   begin-group and end-group characters do not mess up
%   \TeX{}'s alignment status.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_show_normal:n #1
  {
    \exp_after:wN \token_to_str:N #1 ~
    ( \exp_after:wN \token_to_meaning:N #1 )
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]{\@@_analysis_show_value:N}
%   This expands to the value of |#1| if it has any.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_show_value:N #1
  {
    \token_if_expandable:NF #1
      {
        \token_if_chardef:NTF       #1 \prg_break: { }
        \token_if_mathchardef:NTF   #1 \prg_break: { }
        \token_if_dim_register:NTF  #1 \prg_break: { }
        \token_if_int_register:NTF  #1 \prg_break: { }
        \token_if_skip_register:NTF #1 \prg_break: { }
        \token_if_toks_register:NTF #1 \prg_break: { }
        \use_none:nnn
        \prg_break_point:
        \use:n { \exp_after:wN = \tex_the:D #1 }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[rEXP]{\@@_analysis_show_cs:n}
% \begin{macro}[rEXP]{\@@_analysis_show_active:n}
% \begin{macro}[rEXP]{\@@_analysis_show_long:nn}
% \begin{macro}[rEXP]{\@@_analysis_show_long_aux:nnnn}
%   Control sequences and active characters are printed in the same way,
%   making sure not to go beyond the \cs{l_iow_line_count_int}. In case
%   of an overflow, we replace the last characters by
%   \cs{c_@@_analysis_show_etc_str}.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_show_cs:n #1
  { \exp_args:No \@@_analysis_show_long:nn {#1} { control~sequence= } }
\cs_new:Npn \@@_analysis_show_active:n #1
  { \exp_args:No \@@_analysis_show_long:nn {#1} { active~character= } }
\cs_new:Npn \@@_analysis_show_long:nn #1
  {
    \@@_analysis_show_long_aux:oofn
      { \token_to_str:N #1 }
      { \token_to_meaning:N #1 }
      { \@@_analysis_show_value:N #1 }
  }
\cs_new:Npn \@@_analysis_show_long_aux:nnnn #1#2#3#4
  {
    \int_compare:nNnTF
      { \str_count:n { #1 ~ ( #4 #2 #3 ) } }
      > { \l_iow_line_count_int - 3 }
      {
        \str_range:nnn { #1 ~ ( #4 #2 #3 ) } { 1 }
          {
            \l_iow_line_count_int - 3
            - \str_count:N \c_@@_analysis_show_etc_str
          }
        \c_@@_analysis_show_etc_str
      }
      { #1 ~ ( #4 #2 #3 ) }
  }
\cs_generate_variant:Nn \@@_analysis_show_long_aux:nnnn { oof }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Peeking ahead}
%
% \begin{macro}[EXP]{\peek_analysis_map_break:, \peek_analysis_map_break:n}
%   The break statements use the general \cs{prg_map_break:Nn}.
%    \begin{macrocode}
\cs_new:Npn \peek_analysis_map_break:
  { \prg_map_break:Nn \peek_analysis_map_break: { } }
\cs_new:Npn \peek_analysis_map_break:n
  { \prg_map_break:Nn \peek_analysis_map_break: }
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}{\l_@@_peek_charcode_int}
%    \begin{macrocode}
\int_new:N \l_@@_peek_charcode_int
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}{\@@_analysis_char_arg:Nw, \@@_analysis_char_arg_aux:Nw}
%   After a call to \tn{futurelet} \cs{l_@@_analysis_token} followed by
%   a stringified character token (either explicit space or catcode
%   other character), grab the argument and pass it to |#1|.  We only
%   need to do anything in the case of a space.
%    \begin{macrocode}
\cs_new:Npn \@@_analysis_char_arg:Nw
  {
    \if_meaning:w \l_@@_analysis_token \c_space_token
      \exp_after:wN \@@_analysis_char_arg_aux:Nw
    \fi:
  }
\cs_new:Npn \@@_analysis_char_arg_aux:Nw #1 ~ { #1 { ~ } }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}
%   {
%     \peek_analysis_map_inline:n,
%     \@@_peek_analysis_loop:NNn, \@@_peek_analysis_test:,
%     \@@_peek_analysis_exp:N, \@@_peek_analysis_exp_aux:N,
%     \@@_peek_analysis_nonexp:N, \@@_peek_analysis_cs:N,
%     \@@_peek_analysis_char:N, \@@_peek_analysis_char:w,
%     \@@_peek_analysis_special:, \@@_peek_analysis_retest:,
%     \@@_peek_analysis_str:,
%     \@@_peek_analysis_str:w, \@@_peek_analysis_str:n,
%     \@@_peek_analysis_active_str:n, \@@_peek_analysis_explicit:n,
%     \@@_peek_analysis_escape:, \@@_peek_analysis_collect:w,
%     \@@_peek_analysis_collect:n, \@@_peek_analysis_collect_loop:,
%     \@@_peek_analysis_collect_test:, \@@_peek_analysis_collect_end:NNNN
%   }
%   Save the user's code in a control sequence that is suitable for
%   nested maps.  We may wish to pass to this function an \tn{outer}
%   control sequence or active character; for this we will undefine
%   any expandable token (testing if it is \tn{outer} is much slower)
%   within a group, closed immediately after the function reads its
%   arguments to avoid affecting the user's code or even our peek code
%   (there is no risk of undefining \cs{group_end:} itself since that is
%   not expandable).  This user's code function also
%   calls the loop auxiliary, and includes the trailing
%   \cs{prg_break_point:Nn} for when the user wants to stop the loop.
%   The loop auxiliary must remove that break point because it must look
%   at the input stream.
%    \begin{macrocode}
\cs_new_protected:Npn \peek_analysis_map_inline:n #1
  {
    \group_align_safe_begin:
    \int_gincr:N \g__kernel_prg_map_int
    \cs_set_protected:cpn
      { @@_analysis_map_ \int_use:N \g__kernel_prg_map_int :nnN }
      ##1##2##3
      {
        \group_end:
        #1
        \@@_peek_analysis_loop:NNn
          \prg_break_point:Nn \peek_analysis_map_break:
            {
              \int_gdecr:N \g__kernel_prg_map_int
              \group_align_safe_end:
            }
      }
    \@@_peek_analysis_loop:NNn ? ? ?
  }
%    \end{macrocode}
%   The loop starts a group (closed by the user-code function defined
%   above) with a normalized escape character, and checks if the next
%   token is special or \texttt{N}-type (distinguishing expandable from
%   non-expandable tokens).  The test for nonexpandable tokens in
%   \cs{@@_peek_analysis_test:} must be done after the tests for
%   begin-group, end-group, and space tokens, in case \cs{l_peek_token}
%   is either \tn{outer} or is a primitive \TeX{} conditional, as such
%   tokens cannot be skipped over correctly by conditional code.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_loop:NNn #1#2#3
  {
    \group_begin:
    \tl_set:Ne \l_@@_peek_code_tl
      {
        \exp_not:c
          { @@_analysis_map_ \int_use:N \g__kernel_prg_map_int :nnN }
      }
    \int_set:Nn \tex_escapechar:D { `\\ }
    \peek_after:Nw \@@_peek_analysis_test:
  }
\cs_new_protected:Npn \@@_peek_analysis_test:
  {
    \if_case:w
      \if_catcode:w \exp_not:N \l_peek_token {   \c_max_int \fi:
      \if_catcode:w \exp_not:N \l_peek_token }   \c_max_int \fi:
      \if_meaning:w \l_peek_token \c_space_token \c_max_int \fi:
      \exp_after:wN \if_meaning:w \exp_not:N \l_peek_token \l_peek_token
        \c_one_int
      \fi:
      \c_zero_int
      \exp_after:wN \exp_after:wN
      \exp_after:wN \@@_peek_analysis_exp:N
      \exp_after:wN \exp_not:N
    \or:
      \exp_after:wN \@@_peek_analysis_nonexp:N
    \else:
      \exp_after:wN \@@_peek_analysis_special:
    \fi:
  }
%    \end{macrocode}
%   Expandable tokens (which are automatically |N|-type) can be
%   \tn{outer} macros, hence the need for \cs{exp_after:wN} and
%   \cs{exp_not:N} in the code above, which allows the next function to
%   safely grab the token as an argument.  We run some code that is
%   expanded using the primitive \cs{cs_set_nopar:Npe} rather than
%   \cs{tl_set:Ne} to avoid grabbing it as an argument as |#1| may be
%   \tn{outer}.  To allow~|#1| as an argument of the user's function
%   (stored in \cs{l_@@_peek_code_tl}), we set it equal to
%   \cs{scan_stop:}, but we do it at the last minute because |#1|
%   may be some pretty important function such as \cs{exp_after:wN}.
%   Then we put the user's function and the elaborate first argument
%   \cs{__kernel_exp_not:w} \cs{exp_after:wN} |{| \cs{exp_not:N} |#1| |}|:
%   indeed we cannot use \cs{exp_not:n} |{#1}| as this breaks for an
%   \tn{outer} macro and we cannot use \cs{exp_not:N} |#1|, as
%   \texttt{o}-expanding this yields a \enquote{notexpanded} token equal
%   to (a weird) \tn{relax}, which would have the wrong value for
%   primitive \TeX{} conditionals such as \cs{if_meaning:w}.
%
%   Then we must add |{-1}0| if the token is a
%   control sequence and \Arg{charcode}|D| otherwise.  Distinguishing
%   the two cases is easy: since we have made the escape character
%   printable, \cs{token_to_str:N} gives at least two characters for a
%   control sequence versus a single one for an active character
%   (possibly being a space).  Importantly, once we apply
%   \cs{token_to_str:N} we no longer need to worry about \tn{outer}
%   tokens.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_exp:N #1
  {
    \cs_set_nopar:Npe \l_@@_peek_code_tl
      {
        \tex_let:D \exp_not:N #1 \scan_stop:
        \exp_not:o \l_@@_peek_code_tl
        {
          \exp_not:n { \__kernel_exp_not:w \exp_after:wN }
            { \exp_not:N \exp_not:N \exp_not:N #1 }
        }
        \exp_after:wN \@@_peek_analysis_exp_aux:Nw
        \token_to_str:N #1 \prg_do_nothing: \s_@@
      }
    \l_@@_peek_code_tl
  }
\cs_new:Npe \@@_peek_analysis_exp_aux:Nw #1#2 \s_@@
  {
    \exp_not:N \if:w \scan_stop: #2 \scan_stop:
      { \exp_not:N \int_value:w `#1 } \token_to_str:N D
    \exp_not:N \else:
      { -1 } 0
    \exp_not:N \fi:
  }
%    \end{macrocode}
%   For normal non-expandable tokens we must distinguish characters
%   (including active ones and macro parameter characters) from control
%   sequences (whose string representation is more than one character
%   because we made the escape character printable).  For a control
%   sequence call the user code with suitable arguments, wrapping |#1|
%   within \cs{exp_not:n} just in case it happens to be equal to a macro
%   parameter character.  We do not skip \cs{exp_not:n} when
%   unnecessary, because this auxiliary is also called in
%   \cs{@@_peek_analysis_retest:} where we have changed some control
%   sequences or active characters to \cs{scan_stop:} temporarily.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_nonexp:N #1
  {
    \if_charcode:w
        \scan_stop:
        \exp_after:wN \use_none:n \token_to_str:N #1 \prg_do_nothing:
        \scan_stop:
      \exp_after:wN \@@_peek_analysis_char:N
    \else:
      \exp_after:wN \@@_peek_analysis_cs:N
    \fi:
    #1
  }
\cs_new_protected:Npn \@@_peek_analysis_cs:N #1
  { \l_@@_peek_code_tl { \exp_not:n {#1} } { -1 } 0 }
%    \end{macrocode}
%   For normal characters we must determine their catcode.  The main
%   difficulty is that the character may be an active character
%   masquerading as (i.e., set equal to) itself with a different
%   catcode.  Two approaches based on \tn{lowercase} can detect this.
%   One could make an active character with the same catcode as~|#1| and
%   change its definition before testing the catcode of~|#1|, but in
%   some Unicode engine this fills up the hash table uselessly.
%   Instead, we lowercase~|#1| itself, changing its character code
%   to~$32$, namely space (because \LuaTeX{} cannot turn catcode~$10$
%   characters to anything else than character code~$32$), then we apply
%   \cs{@@_analysis_b_char:Nn}, which detects active characters by
%   comparing them to \cs{tex_undefined:D}, and we must have undefined
%   the active space (locally) for this test to work.
%   To define \cs{@@_peek_analysis_char:N} itself we use an
%   |e|-expanding assignment to get the active space in the right place
%   after making it (just for this definition) unexpandable.
%   Finally \cs{@@_peek_analysis_char:w} receives the \meta{charcode},
%   \meta{user function}, \meta{catcode}, and \meta{token}, and places
%   the arguments in the correct order.  It keeps \cs{exp_not:n} for
%   macro parameter characters and active characters (the latter could
%   be macro parameter characters, and it seems more uniform to always
%   put \cs{exp_not:n}), and otherwise eliminates it by expanding once
%   with \cs{exp_args:NNNo}.
%    \begin{macrocode}
\group_begin:
\char_set_active_eq:NN \ \scan_stop:
\cs_new_protected:Npe \@@_peek_analysis_char:N #1
  {
    \cs_set_eq:NN
      \char_generate:nn { 32 } { 13 }
      \exp_not:N \tex_undefined:D
    \tex_lccode:D `#1 = 32 \exp_stop_f:
    \tex_lowercase:D
      {
        \tl_put_right:Ne \exp_not:N \l_@@_peek_code_tl
          { \exp_not:n { \@@_analysis_b_char:Nn \use_none:n } {#1} }
      }
    \exp_not:n
      {
        \exp_after:wN \@@_peek_analysis_char:w
        \int_value:w
      }
      `#1
    \exp_not:n { \exp_after:wN \s_@@ \l_@@_peek_code_tl }
    #1
  }
\group_end:
\cs_new_protected:Npn \@@_peek_analysis_char:w #1 \s_@@ #2#3#4
  {
    \if_charcode:w 6 #3
    \else:
      \if_charcode:w D #3
      \else:
        \exp_args:NNNo
      \fi:
    \fi:
    #2 { \exp_not:n {#4} } {#1} #3
  }
%    \end{macrocode}
%   For special characters the idea is to eventually act with
%   \cs{token_to_str:N}, then pick up one by one the characters of this
%   string representation until hitting the token that follows.  First
%   determine the character code of (the meaning of) the \meta{token}
%   (which we know is a special token), make sure the escape character
%   is different from it, normalize the meanings of two active
%   characters and the empty control sequence, and filter out these
%   cases in \cs{@@_peek_analysis_retest:}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_special:
  {
    \tex_let:D \l_@@_analysis_token = ~ \l_peek_token
    \int_set:Nn \l_@@_peek_charcode_int
      { \@@_analysis_extract_charcode: }
    \if_int_compare:w \l_@@_peek_charcode_int = \tex_escapechar:D
      \int_set:Nn \tex_escapechar:D { `\/ }
    \fi:
    \char_set_active_eq:nN { \l_@@_peek_charcode_int } \scan_stop:
    \char_set_active_eq:nN { \tex_escapechar:D } \scan_stop:
    \cs_set_eq:cN { } \scan_stop:
    \tex_futurelet:D \l_@@_analysis_token
    \@@_peek_analysis_retest:
  }
\cs_new_protected:Npn \@@_peek_analysis_retest:
  {
    \if_meaning:w \l_@@_analysis_token \scan_stop:
      \exp_after:wN \@@_peek_analysis_nonexp:N
    \else:
      \exp_after:wN \@@_peek_analysis_str:
    \fi:
  }
%    \end{macrocode}
%   At this point we know the meaning of the \meta{token} in the input
%   stream is \cs{l_peek_token}, either a space (32, 10) or a
%   begin-group or end-group token (catcode $1$ or~$2$), and we excluded
%   a few cases that would be difficult later (empty control sequence,
%   active character with the same character code as its meaning or as
%   the escape character).  The idea is to apply \cs{token_to_str:N} to
%   the \meta{token} then grab characters (of category code~$12$ except
%   for spaces that have category code~$10$) to reconstruct it.  In
%   earlier versions of the code we would peek at the \meta{next token}
%   that lies after \meta{token} in the input stream, which would help
%   us be more accurate in reconstructing the \meta{token} case in edge
%   cases (mentioned below), but this had the side-effect of tokenizing
%   the input stream (turning characters into tokens) farther ahead than
%   needed.
%
%   We hit the \meta{token} with \cs{token_to_str:N} and start grabbing
%   characters.  More
%   precisely, by looking at the first character in the string
%   representation of the \meta{token} we distinguish three cases:
%   a stringified control sequence starts with the escape character; for
%   an explicit character we find that same character; for an active
%   character we find anything else (we made sure to exclude the case of
%   an active character whose string representation coincides with the
%   other two cases).
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_str:
  {
    \exp_after:wN \tex_futurelet:D
    \exp_after:wN \l_@@_analysis_token
    \exp_after:wN \@@_peek_analysis_str:w
    \token_to_str:N
  }
\cs_new_protected:Npn \@@_peek_analysis_str:w
  { \@@_analysis_char_arg:Nw \@@_peek_analysis_str:n }
\cs_new_protected:Npn \@@_peek_analysis_str:n #1
  {
    \int_case:nnF { `#1 }
      {
        { \l_@@_peek_charcode_int }
          { \@@_peek_analysis_explicit:n {#1} }
        { \tex_escapechar:D } { \@@_peek_analysis_escape: }
      }
      { \@@_peek_analysis_active_str:n {#1} }
  }
%    \end{macrocode}
%   When |#1| is a stringified active character we pass appropriate
%   arguments to the user's code; thankfully \cs{char_generate:nn}
%   can make active characters.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_active_str:n #1
  {
    \tl_put_right:Ne \l_@@_peek_code_tl
      {
        { \char_generate:nn { `#1 } { 13 } }
        { \int_value:w `#1 }
        \token_to_str:N D
      }
    \l_@@_peek_code_tl
  }
%    \end{macrocode}
%   When |#1| matches the character we had extracted from the meaning of
%   \cs{l_peek_token}, the token was an explicit character, which can be
%   a standard space, or a begin-group or end-group character with some
%   character code.  In the latter two cases we call
%   \cs{char_generate:nn} with suitable arguments and put suitable
%   \cs{if_false:} \cs{fi:} constructions to make the result balanced
%   and such that \texttt{o}-expanding or \texttt{e}/\texttt{x}-expanding gives
%   back a single (unbalanced) begin-group or end-group character.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_explicit:n #1
  {
    \tl_put_right:Ne \l_@@_peek_code_tl
      {
        \if_meaning:w \l_peek_token \c_space_token
          { ~ } { 32 } \token_to_str:N A
        \else:
          \if_catcode:w \l_peek_token \c_group_begin_token
            {
              \exp_not:N \exp_after:wN
              \char_generate:nn { `#1 } { 1 }
              \exp_not:N \if_false:
              \if_false: { \fi: }
              \exp_not:N \fi:
            }
            { \int_value:w `#1 }
            1
          \else:
            {
              \exp_not:N \if_false:
              { \if_false: } \fi:
              \exp_not:N \fi:
              \char_generate:nn { `#1 } { 2 }
            }
            { \int_value:w `#1 }
            2
          \fi:
        \fi:
      }
    \l_@@_peek_code_tl
  }
%    \end{macrocode}
%   Finally there is the case of a special token whose string
%   representation starts with an escape character, namely the token was
%   a control sequence.  In that case we could have grabbed the token
%   directly as an \texttt{N}-type argument, but of course we couldn't
%   know that until we had run all the various tests including
%   stringifying the token.  We are thus left with the hard work of
%   picking up one by one the characters in the csname (being careful
%   about spaces), until the constructed csname has the expected
%   meaning.  This fails if someone defines a token like
%   \cs[no-index]{bgroup@my} whose string representation starts the same
%   as another token with the same meaning being an implicit character
%   token of category code $1$, $2$, or $10$.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_escape:
  {
    \tl_clear:N \l_@@_internal_a_tl
    \tex_futurelet:D \l_@@_analysis_token
      \@@_peek_analysis_collect:w
  }
\cs_new_protected:Npn \@@_peek_analysis_collect:w
  { \@@_analysis_char_arg:Nw \@@_peek_analysis_collect:n }
\cs_new_protected:Npn \@@_peek_analysis_collect:n #1
  {
    \tl_put_right:Nn \l_@@_internal_a_tl {#1}
    \@@_peek_analysis_collect_loop:
  }
\cs_new_protected:Npn \@@_peek_analysis_collect_loop:
  {
    \exp_after:wN \if_meaning:w
      \cs:w
      \if_cs_exist:w \l_@@_internal_a_tl \cs_end:
        \l_@@_internal_a_tl
      \else:
        c_one % anything short
      \fi:
      \cs_end:
      \l_peek_token
      \@@_peek_analysis_collect_end:NNNN
    \fi:
    \tex_futurelet:D \l_@@_analysis_token
      \@@_peek_analysis_collect:w
  }
%    \end{macrocode}
%   As in all other cases, end by calling the user code with suitable
%   arguments (here |#1| is \cs{fi:}).
%    \begin{macrocode}
\cs_new_protected:Npn \@@_peek_analysis_collect_end:NNNN #1#2#3#4
  {
    #1
    \tl_put_right:Ne \l_@@_peek_code_tl
      {
        { \exp_not:N \exp_not:n { \exp_not:c { \l_@@_internal_a_tl } } }
        { -1 }
        0
      }
    \l_@@_peek_code_tl
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Messages}
%
% \begin{variable}{\c_@@_analysis_show_etc_str}
%   When a control sequence (or active character)
%   and its meaning are too long to fit in one line
%   of the terminal, the end is replaced by this token list.
%    \begin{macrocode}
\tl_const:Ne \c_@@_analysis_show_etc_str % (
  { \token_to_str:N \ETC.) }
%    \end{macrocode}
% \end{variable}
%
%    \begin{macrocode}
\msg_new:nnn { tl } { show-analysis }
  {
    The~token~list~ \tl_if_empty:nF {#1} { #1 ~ }
    \tl_if_empty:nTF {#2}
      { is~empty }
      { contains~the~tokens: #2 }
  }
%    \end{macrocode}
%
%    \begin{macrocode}
%</package>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex