{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      : Aihc.Parser.Lex
-- Description : Lex Haskell source into span-annotated tokens with inline extension handling
--
-- This module performs the pre-parse tokenization step for Haskell source code.
-- It turns raw text into 'LexToken's that preserve:
--
-- * a semantic token classification ('LexTokenKind')
-- * the original token text ('lexTokenText')
-- * source location information ('lexTokenSpan')
--
-- The lexer runs in two phases:
--
-- 1. /Raw tokenization/ with a custom incremental scanner that consumes one or more
--    input chunks and emits tokens lazily. Extension-specific lexing (such as
--    @NegativeLiterals@ and @LexicalNegation@) is handled inline during this phase
--    by tracking the previous token context.
-- 2. /Layout insertion/ ('applyLayoutTokens') that inserts virtual @{@, @;@ and @}@
--    according to indentation (the offside rule), so the parser can treat implicit
--    layout like explicit braces and semicolons.
--
-- Scanning is incremental and error-tolerant:
--
-- * token production starts as soon as enough input is available
-- * malformed lexemes produce 'TkError' tokens instead of aborting lexing
-- * @# ...@, @#line ...@, @{-# LINE #-}@ and @{-# COLUMN #-}@ are handled in-band by
--   the lexer and update subsequent token spans without being exposed as normal tokens
--
-- Layout-sensitive syntax is the tricky part. The implementation tracks a stack of
-- layout contexts and mirrors the @haskell-src-exts@ model summarized in
-- @docs/hse-indentation-layout.md@:
--
-- * after layout-introducing keywords (currently @do@, @of@, @let@, @where@, @\\case@, plus optional module
--   body layout), mark a pending implicit block
-- * if the next token is an explicit @{@, disable implicit insertion for that block
-- * otherwise, open an implicit layout context at the next token column
-- * at beginning-of-line tokens, dedent emits virtual @}@, equal-indent emits virtual
--   @;@ (with a small suppression rule for @then@/@else@)
--
-- Keyword classification is intentionally lexical and exact. 'lexIdentifier'
-- produces a keyword token /only/ when the full identifier text exactly matches a
-- reserved word in 'keywordTokenKind'. That means:
--
-- * @where@ becomes 'TkKeywordWhere'
-- * @where'@, @_where@, and @M.where@ remain identifiers
--
-- In other words, use keyword tokens only for exact reserved lexemes; contextual
-- validity is left to the parser.
module Aihc.Parser.Lex
  ( LexToken (..),
    LexTokenKind (..),
    isReservedIdentifier,
    readModuleHeaderExtensions,
    readModuleHeaderExtensionsFromChunks,
    lexTokensFromChunks,
    lexModuleTokensFromChunks,
    lexTokensWithExtensions,
    lexModuleTokensWithExtensions,
    lexTokens,
    lexModuleTokens,
  )
where

import Aihc.Parser.Syntax
import Control.DeepSeq (NFData)
import Data.Char (digitToInt, isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isHexDigit, isOctDigit, isSpace)
import Data.List qualified as List
import Data.Maybe (fromMaybe, isJust, mapMaybe)
import Data.Text (Text)
import Data.Text qualified as T
import GHC.Generics (Generic)
import Numeric (readHex, readInt, readOct)

data LexTokenKind
  = -- Keywords (reserved identifiers per Haskell Report Section 2.4)
    TkKeywordCase
  | TkKeywordClass
  | TkKeywordData
  | TkKeywordDefault
  | TkKeywordDeriving
  | TkKeywordDo
  | TkKeywordElse
  | TkKeywordForeign
  | TkKeywordIf
  | TkKeywordImport
  | TkKeywordIn
  | TkKeywordInfix
  | TkKeywordInfixl
  | TkKeywordInfixr
  | TkKeywordInstance
  | TkKeywordLet
  | TkKeywordModule
  | TkKeywordNewtype
  | TkKeywordOf
  | TkKeywordThen
  | TkKeywordType
  | TkKeywordWhere
  | TkKeywordUnderscore -- _ (wildcard, reserved per Report)
  | -- Context-sensitive keywords (not strictly reserved per Report, but needed for imports)
    TkKeywordQualified
  | TkKeywordAs
  | TkKeywordHiding
  | -- Reserved operators (per Haskell Report Section 2.4)
    TkReservedDotDot -- ..
  | TkReservedColon -- :
  | TkReservedDoubleColon -- ::
  | TkReservedEquals -- =
  | TkReservedBackslash -- \
  | TkReservedPipe -- \"|\"
  | TkReservedLeftArrow -- <-
  | TkReservedRightArrow -- ->
  | TkReservedAt -- @
  | -- Note: ~ is NOT reserved; it uses whitespace-sensitive lexing (GHC proposal 0229)
    TkReservedDoubleArrow -- =>
  | -- Identifiers (per Haskell Report Section 2.4)
    TkVarId Text -- variable identifier (starts lowercase/_)
  | TkConId Text -- constructor identifier (starts uppercase)
  | TkQVarId Text -- qualified variable identifier
  | TkQConId Text -- qualified constructor identifier
  | -- Operators (per Haskell Report Section 2.4)
    TkVarSym Text -- variable symbol (doesn't start with :)
  | TkConSym Text -- constructor symbol (starts with :)
  | TkQVarSym Text -- qualified variable symbol
  | TkQConSym Text -- qualified constructor symbol
  | -- Literals
    TkInteger Integer
  | TkIntegerBase Integer Text
  | TkFloat Double Text
  | TkChar Char
  | TkString Text
  | -- Special characters (per Haskell Report Section 2.2)
    TkSpecialLParen -- (
  | TkSpecialRParen -- )
  | TkSpecialComma -- ,
  | TkSpecialSemicolon -- ;
  | TkSpecialLBracket -- [
  | TkSpecialRBracket -- ]
  | TkSpecialBacktick -- `
  | TkSpecialLBrace -- {
  | TkSpecialRBrace -- }
  | -- LexicalNegation support
    TkMinusOperator -- minus operator when LexicalNegation enabled (before prefix detection)
  | TkPrefixMinus -- prefix minus (tight, no space) for LexicalNegation
  | -- Whitespace-sensitive operator support (GHC proposal 0229)
    TkPrefixBang -- prefix bang (!x) for bang patterns
  | TkPrefixTilde -- prefix tilde (~x) for irrefutable patterns
  | -- Pragmas
    TkPragmaLanguage [ExtensionSetting]
  | TkPragmaWarning Text
  | TkPragmaDeprecated Text
  | -- Other
    TkQuasiQuote Text Text
  | TkError Text
  deriving (LexTokenKind -> LexTokenKind -> Bool
(LexTokenKind -> LexTokenKind -> Bool)
-> (LexTokenKind -> LexTokenKind -> Bool) -> Eq LexTokenKind
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LexTokenKind -> LexTokenKind -> Bool
== :: LexTokenKind -> LexTokenKind -> Bool
$c/= :: LexTokenKind -> LexTokenKind -> Bool
/= :: LexTokenKind -> LexTokenKind -> Bool
Eq, Eq LexTokenKind
Eq LexTokenKind =>
(LexTokenKind -> LexTokenKind -> Ordering)
-> (LexTokenKind -> LexTokenKind -> Bool)
-> (LexTokenKind -> LexTokenKind -> Bool)
-> (LexTokenKind -> LexTokenKind -> Bool)
-> (LexTokenKind -> LexTokenKind -> Bool)
-> (LexTokenKind -> LexTokenKind -> LexTokenKind)
-> (LexTokenKind -> LexTokenKind -> LexTokenKind)
-> Ord LexTokenKind
LexTokenKind -> LexTokenKind -> Bool
LexTokenKind -> LexTokenKind -> Ordering
LexTokenKind -> LexTokenKind -> LexTokenKind
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: LexTokenKind -> LexTokenKind -> Ordering
compare :: LexTokenKind -> LexTokenKind -> Ordering
$c< :: LexTokenKind -> LexTokenKind -> Bool
< :: LexTokenKind -> LexTokenKind -> Bool
$c<= :: LexTokenKind -> LexTokenKind -> Bool
<= :: LexTokenKind -> LexTokenKind -> Bool
$c> :: LexTokenKind -> LexTokenKind -> Bool
> :: LexTokenKind -> LexTokenKind -> Bool
$c>= :: LexTokenKind -> LexTokenKind -> Bool
>= :: LexTokenKind -> LexTokenKind -> Bool
$cmax :: LexTokenKind -> LexTokenKind -> LexTokenKind
max :: LexTokenKind -> LexTokenKind -> LexTokenKind
$cmin :: LexTokenKind -> LexTokenKind -> LexTokenKind
min :: LexTokenKind -> LexTokenKind -> LexTokenKind
Ord, Int -> LexTokenKind -> ShowS
[LexTokenKind] -> ShowS
LexTokenKind -> String
(Int -> LexTokenKind -> ShowS)
-> (LexTokenKind -> String)
-> ([LexTokenKind] -> ShowS)
-> Show LexTokenKind
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LexTokenKind -> ShowS
showsPrec :: Int -> LexTokenKind -> ShowS
$cshow :: LexTokenKind -> String
show :: LexTokenKind -> String
$cshowList :: [LexTokenKind] -> ShowS
showList :: [LexTokenKind] -> ShowS
Show, ReadPrec [LexTokenKind]
ReadPrec LexTokenKind
Int -> ReadS LexTokenKind
ReadS [LexTokenKind]
(Int -> ReadS LexTokenKind)
-> ReadS [LexTokenKind]
-> ReadPrec LexTokenKind
-> ReadPrec [LexTokenKind]
-> Read LexTokenKind
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS LexTokenKind
readsPrec :: Int -> ReadS LexTokenKind
$creadList :: ReadS [LexTokenKind]
readList :: ReadS [LexTokenKind]
$creadPrec :: ReadPrec LexTokenKind
readPrec :: ReadPrec LexTokenKind
$creadListPrec :: ReadPrec [LexTokenKind]
readListPrec :: ReadPrec [LexTokenKind]
Read, (forall x. LexTokenKind -> Rep LexTokenKind x)
-> (forall x. Rep LexTokenKind x -> LexTokenKind)
-> Generic LexTokenKind
forall x. Rep LexTokenKind x -> LexTokenKind
forall x. LexTokenKind -> Rep LexTokenKind x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. LexTokenKind -> Rep LexTokenKind x
from :: forall x. LexTokenKind -> Rep LexTokenKind x
$cto :: forall x. Rep LexTokenKind x -> LexTokenKind
to :: forall x. Rep LexTokenKind x -> LexTokenKind
Generic, LexTokenKind -> ()
(LexTokenKind -> ()) -> NFData LexTokenKind
forall a. (a -> ()) -> NFData a
$crnf :: LexTokenKind -> ()
rnf :: LexTokenKind -> ()
NFData)

data LexToken = LexToken
  { LexToken -> LexTokenKind
lexTokenKind :: !LexTokenKind,
    LexToken -> Text
lexTokenText :: !Text,
    LexToken -> SourceSpan
lexTokenSpan :: !SourceSpan
  }
  deriving (LexToken -> LexToken -> Bool
(LexToken -> LexToken -> Bool)
-> (LexToken -> LexToken -> Bool) -> Eq LexToken
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LexToken -> LexToken -> Bool
== :: LexToken -> LexToken -> Bool
$c/= :: LexToken -> LexToken -> Bool
/= :: LexToken -> LexToken -> Bool
Eq, Eq LexToken
Eq LexToken =>
(LexToken -> LexToken -> Ordering)
-> (LexToken -> LexToken -> Bool)
-> (LexToken -> LexToken -> Bool)
-> (LexToken -> LexToken -> Bool)
-> (LexToken -> LexToken -> Bool)
-> (LexToken -> LexToken -> LexToken)
-> (LexToken -> LexToken -> LexToken)
-> Ord LexToken
LexToken -> LexToken -> Bool
LexToken -> LexToken -> Ordering
LexToken -> LexToken -> LexToken
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: LexToken -> LexToken -> Ordering
compare :: LexToken -> LexToken -> Ordering
$c< :: LexToken -> LexToken -> Bool
< :: LexToken -> LexToken -> Bool
$c<= :: LexToken -> LexToken -> Bool
<= :: LexToken -> LexToken -> Bool
$c> :: LexToken -> LexToken -> Bool
> :: LexToken -> LexToken -> Bool
$c>= :: LexToken -> LexToken -> Bool
>= :: LexToken -> LexToken -> Bool
$cmax :: LexToken -> LexToken -> LexToken
max :: LexToken -> LexToken -> LexToken
$cmin :: LexToken -> LexToken -> LexToken
min :: LexToken -> LexToken -> LexToken
Ord, Int -> LexToken -> ShowS
[LexToken] -> ShowS
LexToken -> String
(Int -> LexToken -> ShowS)
-> (LexToken -> String) -> ([LexToken] -> ShowS) -> Show LexToken
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LexToken -> ShowS
showsPrec :: Int -> LexToken -> ShowS
$cshow :: LexToken -> String
show :: LexToken -> String
$cshowList :: [LexToken] -> ShowS
showList :: [LexToken] -> ShowS
Show, (forall x. LexToken -> Rep LexToken x)
-> (forall x. Rep LexToken x -> LexToken) -> Generic LexToken
forall x. Rep LexToken x -> LexToken
forall x. LexToken -> Rep LexToken x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. LexToken -> Rep LexToken x
from :: forall x. LexToken -> Rep LexToken x
$cto :: forall x. Rep LexToken x -> LexToken
to :: forall x. Rep LexToken x -> LexToken
Generic, LexToken -> ()
(LexToken -> ()) -> NFData LexToken
forall a. (a -> ()) -> NFData a
$crnf :: LexToken -> ()
rnf :: LexToken -> ()
NFData)

data LexerState = LexerState
  { LexerState -> String
lexerInput :: String,
    LexerState -> Int
lexerLine :: !Int,
    LexerState -> Int
lexerCol :: !Int,
    LexerState -> Bool
lexerAtLineStart :: !Bool,
    LexerState -> [LexToken]
lexerPending :: [LexToken],
    LexerState -> [Extension]
lexerExtensions :: [Extension],
    -- | The kind of the previous non-trivia token (for NegativeLiterals/LexicalNegation)
    LexerState -> Maybe LexTokenKind
lexerPrevTokenKind :: !(Maybe LexTokenKind),
    -- | Whether trivia (whitespace/comments) was skipped since the last token
    LexerState -> Bool
lexerHadTrivia :: !Bool
  }
  deriving (LexerState -> LexerState -> Bool
(LexerState -> LexerState -> Bool)
-> (LexerState -> LexerState -> Bool) -> Eq LexerState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LexerState -> LexerState -> Bool
== :: LexerState -> LexerState -> Bool
$c/= :: LexerState -> LexerState -> Bool
/= :: LexerState -> LexerState -> Bool
Eq, Int -> LexerState -> ShowS
[LexerState] -> ShowS
LexerState -> String
(Int -> LexerState -> ShowS)
-> (LexerState -> String)
-> ([LexerState] -> ShowS)
-> Show LexerState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LexerState -> ShowS
showsPrec :: Int -> LexerState -> ShowS
$cshow :: LexerState -> String
show :: LexerState -> String
$cshowList :: [LexerState] -> ShowS
showList :: [LexerState] -> ShowS
Show)

data LayoutContext
  = LayoutExplicit
  | LayoutImplicit !Int
  | LayoutImplicitLet !Int
  | -- | Implicit layout opened after 'then do' or 'else do'.
    -- This variant allows 'then' and 'else' to close it at the same indent level.
    LayoutImplicitAfterThenElse !Int
  | -- | Marker for ( or [ to scope implicit layout closures
    LayoutDelimiter
  deriving (LayoutContext -> LayoutContext -> Bool
(LayoutContext -> LayoutContext -> Bool)
-> (LayoutContext -> LayoutContext -> Bool) -> Eq LayoutContext
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LayoutContext -> LayoutContext -> Bool
== :: LayoutContext -> LayoutContext -> Bool
$c/= :: LayoutContext -> LayoutContext -> Bool
/= :: LayoutContext -> LayoutContext -> Bool
Eq, Int -> LayoutContext -> ShowS
[LayoutContext] -> ShowS
LayoutContext -> String
(Int -> LayoutContext -> ShowS)
-> (LayoutContext -> String)
-> ([LayoutContext] -> ShowS)
-> Show LayoutContext
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LayoutContext -> ShowS
showsPrec :: Int -> LayoutContext -> ShowS
$cshow :: LayoutContext -> String
show :: LayoutContext -> String
$cshowList :: [LayoutContext] -> ShowS
showList :: [LayoutContext] -> ShowS
Show)

data PendingLayout
  = PendingLayoutGeneric
  | PendingLayoutLet
  | -- | Pending layout from 'do' after 'then' or 'else'.
    -- The resulting layout can be closed by 'then'/'else' at the same indent.
    PendingLayoutAfterThenElse
  deriving (PendingLayout -> PendingLayout -> Bool
(PendingLayout -> PendingLayout -> Bool)
-> (PendingLayout -> PendingLayout -> Bool) -> Eq PendingLayout
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PendingLayout -> PendingLayout -> Bool
== :: PendingLayout -> PendingLayout -> Bool
$c/= :: PendingLayout -> PendingLayout -> Bool
/= :: PendingLayout -> PendingLayout -> Bool
Eq, Int -> PendingLayout -> ShowS
[PendingLayout] -> ShowS
PendingLayout -> String
(Int -> PendingLayout -> ShowS)
-> (PendingLayout -> String)
-> ([PendingLayout] -> ShowS)
-> Show PendingLayout
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PendingLayout -> ShowS
showsPrec :: Int -> PendingLayout -> ShowS
$cshow :: PendingLayout -> String
show :: PendingLayout -> String
$cshowList :: [PendingLayout] -> ShowS
showList :: [PendingLayout] -> ShowS
Show)

data ModuleLayoutMode
  = ModuleLayoutOff
  | ModuleLayoutSeekStart
  | ModuleLayoutAwaitWhere
  | ModuleLayoutDone
  deriving (ModuleLayoutMode -> ModuleLayoutMode -> Bool
(ModuleLayoutMode -> ModuleLayoutMode -> Bool)
-> (ModuleLayoutMode -> ModuleLayoutMode -> Bool)
-> Eq ModuleLayoutMode
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ModuleLayoutMode -> ModuleLayoutMode -> Bool
== :: ModuleLayoutMode -> ModuleLayoutMode -> Bool
$c/= :: ModuleLayoutMode -> ModuleLayoutMode -> Bool
/= :: ModuleLayoutMode -> ModuleLayoutMode -> Bool
Eq, Int -> ModuleLayoutMode -> ShowS
[ModuleLayoutMode] -> ShowS
ModuleLayoutMode -> String
(Int -> ModuleLayoutMode -> ShowS)
-> (ModuleLayoutMode -> String)
-> ([ModuleLayoutMode] -> ShowS)
-> Show ModuleLayoutMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ModuleLayoutMode -> ShowS
showsPrec :: Int -> ModuleLayoutMode -> ShowS
$cshow :: ModuleLayoutMode -> String
show :: ModuleLayoutMode -> String
$cshowList :: [ModuleLayoutMode] -> ShowS
showList :: [ModuleLayoutMode] -> ShowS
Show)

data LayoutState = LayoutState
  { LayoutState -> [LayoutContext]
layoutContexts :: [LayoutContext],
    LayoutState -> Maybe PendingLayout
layoutPendingLayout :: !(Maybe PendingLayout),
    LayoutState -> Maybe Int
layoutPrevLine :: !(Maybe Int),
    LayoutState -> Maybe LexTokenKind
layoutPrevTokenKind :: !(Maybe LexTokenKind),
    LayoutState -> Int
layoutDelimiterDepth :: !Int,
    LayoutState -> ModuleLayoutMode
layoutModuleMode :: !ModuleLayoutMode
  }
  deriving (LayoutState -> LayoutState -> Bool
(LayoutState -> LayoutState -> Bool)
-> (LayoutState -> LayoutState -> Bool) -> Eq LayoutState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LayoutState -> LayoutState -> Bool
== :: LayoutState -> LayoutState -> Bool
$c/= :: LayoutState -> LayoutState -> Bool
/= :: LayoutState -> LayoutState -> Bool
Eq, Int -> LayoutState -> ShowS
[LayoutState] -> ShowS
LayoutState -> String
(Int -> LayoutState -> ShowS)
-> (LayoutState -> String)
-> ([LayoutState] -> ShowS)
-> Show LayoutState
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LayoutState -> ShowS
showsPrec :: Int -> LayoutState -> ShowS
$cshow :: LayoutState -> String
show :: LayoutState -> String
$cshowList :: [LayoutState] -> ShowS
showList :: [LayoutState] -> ShowS
Show)

data DirectiveUpdate = DirectiveUpdate
  { DirectiveUpdate -> Maybe Int
directiveLine :: !(Maybe Int),
    DirectiveUpdate -> Maybe Int
directiveCol :: !(Maybe Int)
  }
  deriving (DirectiveUpdate -> DirectiveUpdate -> Bool
(DirectiveUpdate -> DirectiveUpdate -> Bool)
-> (DirectiveUpdate -> DirectiveUpdate -> Bool)
-> Eq DirectiveUpdate
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DirectiveUpdate -> DirectiveUpdate -> Bool
== :: DirectiveUpdate -> DirectiveUpdate -> Bool
$c/= :: DirectiveUpdate -> DirectiveUpdate -> Bool
/= :: DirectiveUpdate -> DirectiveUpdate -> Bool
Eq, Int -> DirectiveUpdate -> ShowS
[DirectiveUpdate] -> ShowS
DirectiveUpdate -> String
(Int -> DirectiveUpdate -> ShowS)
-> (DirectiveUpdate -> String)
-> ([DirectiveUpdate] -> ShowS)
-> Show DirectiveUpdate
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DirectiveUpdate -> ShowS
showsPrec :: Int -> DirectiveUpdate -> ShowS
$cshow :: DirectiveUpdate -> String
show :: DirectiveUpdate -> String
$cshowList :: [DirectiveUpdate] -> ShowS
showList :: [DirectiveUpdate] -> ShowS
Show)

-- | Convenience lexer entrypoint: no extensions, parse as expression/declaration stream.
--
-- This variant consumes a single strict 'Text' chunk and returns a lazy list of
-- tokens. Lexing errors are preserved as 'TkError' tokens instead of causing
-- lexing to fail.
lexTokens :: Text -> [LexToken]
lexTokens :: Text -> [LexToken]
lexTokens = [Text] -> [LexToken]
lexTokensFromChunks ([Text] -> [LexToken]) -> (Text -> [Text]) -> Text -> [LexToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [])

-- | Convenience lexer entrypoint for full modules: no explicit extension list.
--
-- Leading header pragmas are scanned first so module-enabled extensions can be
-- applied before token rewrites and top-level layout insertion.
lexModuleTokens :: Text -> [LexToken]
lexModuleTokens :: Text -> [LexToken]
lexModuleTokens Text
input =
  [Extension] -> [Text] -> [LexToken]
lexModuleTokensFromChunks
    ([ExtensionSetting] -> [Extension]
enabledExtensionsFromSettings ([Text] -> [ExtensionSetting]
readModuleHeaderExtensionsFromChunks [Text
input]))
    [Text
input]

-- | Lex an expression/declaration stream from one or more input chunks.
--
-- Tokens are produced lazily, so downstream consumers can begin parsing before
-- the full source has been scanned.
lexTokensFromChunks :: [Text] -> [LexToken]
lexTokensFromChunks :: [Text] -> [LexToken]
lexTokensFromChunks = [Extension] -> [Text] -> [LexToken]
lexTokensFromChunksWithExtensions []

-- | Lex a full module from one or more input chunks with explicit extensions.
--
-- This variant enables module-body layout insertion in addition to the normal
-- token scan and extension rewrites.
lexModuleTokensFromChunks :: [Extension] -> [Text] -> [LexToken]
lexModuleTokensFromChunks :: [Extension] -> [Text] -> [LexToken]
lexModuleTokensFromChunks = Bool -> [Extension] -> [Text] -> [LexToken]
lexChunksWithExtensions Bool
True

-- | Lex source text using explicit lexer extensions.
--
-- This runs raw tokenization, extension rewrites, and implicit-layout insertion.
-- Module-top layout is /not/ enabled here. Malformed lexemes become 'TkError'
-- tokens in the token stream.
lexTokensWithExtensions :: [Extension] -> Text -> [LexToken]
lexTokensWithExtensions :: [Extension] -> Text -> [LexToken]
lexTokensWithExtensions [Extension]
exts Text
input = [Extension] -> [Text] -> [LexToken]
lexTokensFromChunksWithExtensions [Extension]
exts [Text
input]

-- | Lex module source text using explicit lexer extensions.
--
-- Like 'lexTokensWithExtensions', but also enables top-level module-body layout:
-- when the source omits explicit braces, virtual layout tokens are inserted
-- after @module ... where@ (or from the first non-pragma token in module-less files).
lexModuleTokensWithExtensions :: [Extension] -> Text -> [LexToken]
lexModuleTokensWithExtensions :: [Extension] -> Text -> [LexToken]
lexModuleTokensWithExtensions [Extension]
exts Text
input = [Extension] -> [Text] -> [LexToken]
lexModuleTokensFromChunks [Extension]
exts [Text
input]

-- | Internal chunked lexer entrypoint for non-module inputs.
--
-- This exists so callers can stream input through the same scanner while still
-- selecting extension-driven token rewrites.
lexTokensFromChunksWithExtensions :: [Extension] -> [Text] -> [LexToken]
lexTokensFromChunksWithExtensions :: [Extension] -> [Text] -> [LexToken]
lexTokensFromChunksWithExtensions = Bool -> [Extension] -> [Text] -> [LexToken]
lexChunksWithExtensions Bool
False

-- | Run the full lexer pipeline over chunked input.
--
-- The scanner operates over the concatenated chunk stream with inline extension
-- handling, then the resulting token stream is passed through the layout insertion step.
lexChunksWithExtensions :: Bool -> [Extension] -> [Text] -> [LexToken]
lexChunksWithExtensions :: Bool -> [Extension] -> [Text] -> [LexToken]
lexChunksWithExtensions Bool
enableModuleLayout [Extension]
exts [Text]
chunks =
  Bool -> [LexToken] -> [LexToken]
applyLayoutTokens Bool
enableModuleLayout (LexerState -> [LexToken]
scanTokens LexerState
initialLexerState)
  where
    initialLexerState :: LexerState
initialLexerState =
      LexerState
        { lexerInput :: String
lexerInput = (Text -> String) -> [Text] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Text -> String
T.unpack [Text]
chunks,
          lexerLine :: Int
lexerLine = Int
1,
          lexerCol :: Int
lexerCol = Int
1,
          lexerAtLineStart :: Bool
lexerAtLineStart = Bool
True,
          lexerPending :: [LexToken]
lexerPending = [],
          lexerExtensions :: [Extension]
lexerExtensions = [Extension]
exts,
          lexerPrevTokenKind :: Maybe LexTokenKind
lexerPrevTokenKind = Maybe LexTokenKind
forall a. Maybe a
Nothing,
          lexerHadTrivia :: Bool
lexerHadTrivia = Bool
True -- Start of file is treated as having leading trivia
        }

-- | Read leading module-header pragmas and return parsed LANGUAGE settings.
--
-- This scans only the pragma/header prefix (allowing whitespace and comments)
-- and stops at the first non-pragma token or lexer error token.
readModuleHeaderExtensions :: Text -> [ExtensionSetting]
readModuleHeaderExtensions :: Text -> [ExtensionSetting]
readModuleHeaderExtensions Text
input = [Text] -> [ExtensionSetting]
readModuleHeaderExtensionsFromChunks [Text
input]

-- | Read leading module-header pragmas from one or more input chunks.
--
-- This scans only the pragma/header prefix (allowing whitespace and comments)
-- and stops at the first non-pragma token or lexer error token.
readModuleHeaderExtensionsFromChunks :: [Text] -> [ExtensionSetting]
readModuleHeaderExtensionsFromChunks :: [Text] -> [ExtensionSetting]
readModuleHeaderExtensionsFromChunks [Text]
chunks = [LexToken] -> [ExtensionSetting]
go ([Text] -> [LexToken]
lexTokensFromChunks [Text]
chunks)
  where
    go :: [LexToken] -> [ExtensionSetting]
go [LexToken]
toks =
      case [LexToken]
toks of
        LexToken {lexTokenKind :: LexToken -> LexTokenKind
lexTokenKind = TkPragmaLanguage [ExtensionSetting]
settings} : [LexToken]
rest -> [ExtensionSetting]
settings [ExtensionSetting] -> [ExtensionSetting] -> [ExtensionSetting]
forall a. Semigroup a => a -> a -> a
<> [LexToken] -> [ExtensionSetting]
go [LexToken]
rest
        LexToken {lexTokenKind :: LexToken -> LexTokenKind
lexTokenKind = TkPragmaWarning Text
_} : [LexToken]
rest -> [LexToken] -> [ExtensionSetting]
go [LexToken]
rest
        LexToken {lexTokenKind :: LexToken -> LexTokenKind
lexTokenKind = TkPragmaDeprecated Text
_} : [LexToken]
rest -> [LexToken] -> [ExtensionSetting]
go [LexToken]
rest
        LexToken {lexTokenKind :: LexToken -> LexTokenKind
lexTokenKind = TkError Text
_} : [LexToken]
_ -> []
        [LexToken]
_ -> []

enabledExtensionsFromSettings :: [ExtensionSetting] -> [Extension]
enabledExtensionsFromSettings :: [ExtensionSetting] -> [Extension]
enabledExtensionsFromSettings = ([Extension] -> ExtensionSetting -> [Extension])
-> [Extension] -> [ExtensionSetting] -> [Extension]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
List.foldl' [Extension] -> ExtensionSetting -> [Extension]
apply []
  where
    apply :: [Extension] -> ExtensionSetting -> [Extension]
apply [Extension]
exts ExtensionSetting
setting =
      case ExtensionSetting
setting of
        EnableExtension Extension
ext
          | Extension
ext Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Extension]
exts -> [Extension]
exts
          | Bool
otherwise -> [Extension]
exts [Extension] -> [Extension] -> [Extension]
forall a. Semigroup a => a -> a -> a
<> [Extension
ext]
        DisableExtension Extension
ext -> (Extension -> Bool) -> [Extension] -> [Extension]
forall a. (a -> Bool) -> [a] -> [a]
filter (Extension -> Extension -> Bool
forall a. Eq a => a -> a -> Bool
/= Extension
ext) [Extension]
exts

-- | Produce the lazy stream of raw lexical tokens.
--
-- Pending synthetic tokens are emitted first, then trivia is skipped, and finally
-- the next real token is scanned from the remaining input.
--
-- The lexer tracks the previous token kind and whether trivia was consumed between
-- tokens, which enables inline handling of LexicalNegation and NegativeLiterals.
scanTokens :: LexerState -> [LexToken]
scanTokens :: LexerState -> [LexToken]
scanTokens LexerState
st0 =
  case LexerState -> [LexToken]
lexerPending LexerState
st0 of
    LexToken
tok : [LexToken]
rest ->
      let st0' :: LexerState
st0' = LexerState
st0 {lexerPending = rest, lexerPrevTokenKind = Just (lexTokenKind tok), lexerHadTrivia = False}
       in LexToken
tok LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: LexerState -> [LexToken]
scanTokens LexerState
st0'
    [] ->
      let st :: LexerState
st = LexerState -> LexerState
skipTrivia LexerState
st0
       in case LexerState -> [LexToken]
lexerPending LexerState
st of
            LexToken
tok : [LexToken]
rest ->
              let st' :: LexerState
st' = LexerState
st {lexerPending = rest, lexerPrevTokenKind = Just (lexTokenKind tok), lexerHadTrivia = False}
               in LexToken
tok LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: LexerState -> [LexToken]
scanTokens LexerState
st'
            []
              | String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (LexerState -> String
lexerInput LexerState
st) -> []
              | Bool
otherwise ->
                  -- Reset hadTrivia flag is already set by skipTrivia; we just lex the token
                  let (LexToken
tok, LexerState
st') = LexerState -> (LexToken, LexerState)
nextToken LexerState
st
                      st'' :: LexerState
st'' = LexerState
st' {lexerPrevTokenKind = Just (lexTokenKind tok), lexerHadTrivia = False}
                   in LexToken
tok LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: LexerState -> [LexToken]
scanTokens LexerState
st''

-- | Skip ignorable trivia until the next token boundary.
--
-- Control directives are treated specially: valid directives update lexer position
-- state without emitting a token, while malformed directives enqueue 'TkError'
-- tokens for later emission.
skipTrivia :: LexerState -> LexerState
skipTrivia :: LexerState -> LexerState
skipTrivia LexerState
st = LexerState
-> (LexerState -> LexerState) -> Maybe LexerState -> LexerState
forall b a. b -> (a -> b) -> Maybe a -> b
maybe LexerState
st LexerState -> LexerState
skipTrivia (LexerState -> Maybe LexerState
consumeTrivia LexerState
st)

consumeTrivia :: LexerState -> Maybe LexerState
consumeTrivia :: LexerState -> Maybe LexerState
consumeTrivia LexerState
st
  | String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (LexerState -> String
lexerInput LexerState
st) = Maybe LexerState
forall a. Maybe a
Nothing
  | Bool
otherwise =
      case LexerState -> String
lexerInput LexerState
st of
        Char
c : String
_
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r' -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia ((Char -> Bool) -> LexerState -> LexerState
consumeWhile (\Char
x -> Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r') LexerState
st))
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia (String -> LexerState -> LexerState
advanceChars String
"\n" LexerState
st))
        Char
'-' : Char
'-' : String
rest
          | String -> Bool
isLineComment String
rest -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia (LexerState -> LexerState
consumeLineComment LexerState
st))
        Char
'{' : Char
'-' : Char
'#' : String
_ ->
          case LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeControlPragma LexerState
st of
            Just (Maybe LexToken
Nothing, LexerState
st') -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia LexerState
st')
            Just (Just LexToken
tok, LexerState
st') -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia LexerState
st' {lexerPending = lexerPending st' <> [tok]})
            Maybe (Maybe LexToken, LexerState)
Nothing ->
              case LexerState -> Maybe ()
tryConsumeKnownPragma LexerState
st of
                Just ()
_ -> Maybe LexerState
forall a. Maybe a
Nothing
                Maybe ()
Nothing ->
                  LexerState -> LexerState
markHadTrivia (LexerState -> LexerState) -> Maybe LexerState -> Maybe LexerState
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> LexerState -> Maybe LexerState
consumeUnknownPragma LexerState
st
        Char
'{' : Char
'-' : String
_ ->
          LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia (LexerState -> LexerState
consumeBlockCommentOrError LexerState
st))
        String
_ ->
          case LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeLineDirective LexerState
st of
            Just (Maybe LexToken
Nothing, LexerState
st') -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia LexerState
st')
            Just (Just LexToken
tok, LexerState
st') -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (LexerState -> LexerState
markHadTrivia LexerState
st' {lexerPending = lexerPending st' <> [tok]})
            Maybe (Maybe LexToken, LexerState)
Nothing -> Maybe LexerState
forall a. Maybe a
Nothing

-- | Mark that trivia was consumed
markHadTrivia :: LexerState -> LexerState
markHadTrivia :: LexerState -> LexerState
markHadTrivia LexerState
st = LexerState
st {lexerHadTrivia = True}

nextToken :: LexerState -> (LexToken, LexerState)
nextToken :: LexerState -> (LexToken, LexerState)
nextToken LexerState
st =
  (LexToken, LexerState)
-> Maybe (LexToken, LexerState) -> (LexToken, LexerState)
forall a. a -> Maybe a -> a
fromMaybe (LexerState -> Text -> (LexToken, LexerState)
lexErrorToken LexerState
st Text
"unexpected character") ([LexerState -> Maybe (LexToken, LexerState)]
-> Maybe (LexToken, LexerState)
forall {a}. [LexerState -> Maybe a] -> Maybe a
firstJust [LexerState -> Maybe (LexToken, LexerState)]
tokenParsers)
  where
    tokenParsers :: [LexerState -> Maybe (LexToken, LexerState)]
tokenParsers =
      [ LexerState -> Maybe (LexToken, LexerState)
lexKnownPragma,
        LexerState -> Maybe (LexToken, LexerState)
lexQuasiQuote,
        LexerState -> Maybe (LexToken, LexerState)
lexHexFloat,
        LexerState -> Maybe (LexToken, LexerState)
lexFloat,
        LexerState -> Maybe (LexToken, LexerState)
lexIntBase,
        LexerState -> Maybe (LexToken, LexerState)
lexInt,
        LexerState -> Maybe (LexToken, LexerState)
lexPromotedQuote,
        LexerState -> Maybe (LexToken, LexerState)
lexChar,
        LexerState -> Maybe (LexToken, LexerState)
lexString,
        LexerState -> Maybe (LexToken, LexerState)
lexSymbol,
        LexerState -> Maybe (LexToken, LexerState)
lexIdentifier,
        LexerState -> Maybe (LexToken, LexerState)
lexNegativeLiteralOrMinus,
        LexerState -> Maybe (LexToken, LexerState)
lexBangOrTildeOperator, -- must come before lexOperator
        LexerState -> Maybe (LexToken, LexerState)
lexOperator
      ]

    firstJust :: [LexerState -> Maybe a] -> Maybe a
firstJust [] = Maybe a
forall a. Maybe a
Nothing
    firstJust (LexerState -> Maybe a
parser : [LexerState -> Maybe a]
rest) =
      case LexerState -> Maybe a
parser LexerState
st of
        Just a
out -> a -> Maybe a
forall a. a -> Maybe a
Just a
out
        Maybe a
Nothing -> [LexerState -> Maybe a] -> Maybe a
firstJust [LexerState -> Maybe a]
rest

applyLayoutTokens :: Bool -> [LexToken] -> [LexToken]
applyLayoutTokens :: Bool -> [LexToken] -> [LexToken]
applyLayoutTokens Bool
enableModuleLayout =
  LayoutState -> [LexToken] -> [LexToken]
go
    LayoutState
      { layoutContexts :: [LayoutContext]
layoutContexts = [],
        layoutPendingLayout :: Maybe PendingLayout
layoutPendingLayout = Maybe PendingLayout
forall a. Maybe a
Nothing,
        layoutPrevLine :: Maybe Int
layoutPrevLine = Maybe Int
forall a. Maybe a
Nothing,
        layoutPrevTokenKind :: Maybe LexTokenKind
layoutPrevTokenKind = Maybe LexTokenKind
forall a. Maybe a
Nothing,
        layoutDelimiterDepth :: Int
layoutDelimiterDepth = Int
0,
        layoutModuleMode :: ModuleLayoutMode
layoutModuleMode =
          if Bool
enableModuleLayout
            then ModuleLayoutMode
ModuleLayoutSeekStart
            else ModuleLayoutMode
ModuleLayoutOff
      }
  where
    go :: LayoutState -> [LexToken] -> [LexToken]
go LayoutState
st [LexToken]
toks =
      case [LexToken]
toks of
        [] -> [LayoutContext] -> SourceSpan -> [LexToken]
closeAllImplicit (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st) SourceSpan
NoSourceSpan
        LexToken
tok : [LexToken]
rest ->
          let stModule :: LayoutState
stModule = LayoutState -> LexToken -> LayoutState
noteModuleLayoutBeforeToken LayoutState
st LexToken
tok
              ([LexToken]
preInserted, LayoutState
stBeforePending) = LayoutState -> LexToken -> ([LexToken], LayoutState)
closeBeforeToken LayoutState
stModule LexToken
tok
              ([LexToken]
pendingInserted, LayoutState
stAfterPending, Bool
skipBOL) = LayoutState -> LexToken -> ([LexToken], LayoutState, Bool)
openPendingLayout LayoutState
stBeforePending LexToken
tok
              ([LexToken]
bolInserted, LayoutState
stAfterBOL) = if Bool
skipBOL then ([], LayoutState
stAfterPending) else LayoutState -> LexToken -> ([LexToken], LayoutState)
bolLayout LayoutState
stAfterPending LexToken
tok
              stAfterToken :: LayoutState
stAfterToken = LayoutState -> LexToken -> LayoutState
noteModuleLayoutAfterToken (LayoutState -> LexToken -> LayoutState
stepTokenContext LayoutState
stAfterBOL LexToken
tok) LexToken
tok
              stNext :: LayoutState
stNext =
                LayoutState
stAfterToken
                  { layoutPrevLine = Just (tokenStartLine tok),
                    layoutPrevTokenKind = Just (lexTokenKind tok)
                  }
           in [LexToken]
preInserted [LexToken] -> [LexToken] -> [LexToken]
forall a. Semigroup a => a -> a -> a
<> [LexToken]
pendingInserted [LexToken] -> [LexToken] -> [LexToken]
forall a. Semigroup a => a -> a -> a
<> [LexToken]
bolInserted [LexToken] -> [LexToken] -> [LexToken]
forall a. Semigroup a => a -> a -> a
<> (LexToken
tok LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: LayoutState -> [LexToken] -> [LexToken]
go LayoutState
stNext [LexToken]
rest)

noteModuleLayoutBeforeToken :: LayoutState -> LexToken -> LayoutState
noteModuleLayoutBeforeToken :: LayoutState -> LexToken -> LayoutState
noteModuleLayoutBeforeToken LayoutState
st LexToken
tok =
  case LayoutState -> ModuleLayoutMode
layoutModuleMode LayoutState
st of
    ModuleLayoutMode
ModuleLayoutSeekStart ->
      case LexToken -> LexTokenKind
lexTokenKind LexToken
tok of
        TkPragmaLanguage [ExtensionSetting]
_ -> LayoutState
st
        TkPragmaWarning Text
_ -> LayoutState
st
        TkPragmaDeprecated Text
_ -> LayoutState
st
        LexTokenKind
TkKeywordModule -> LayoutState
st {layoutModuleMode = ModuleLayoutAwaitWhere}
        LexTokenKind
_ -> LayoutState
st {layoutModuleMode = ModuleLayoutDone, layoutPendingLayout = Just PendingLayoutGeneric}
    ModuleLayoutMode
_ -> LayoutState
st

noteModuleLayoutAfterToken :: LayoutState -> LexToken -> LayoutState
noteModuleLayoutAfterToken :: LayoutState -> LexToken -> LayoutState
noteModuleLayoutAfterToken LayoutState
st LexToken
tok =
  case LayoutState -> ModuleLayoutMode
layoutModuleMode LayoutState
st of
    ModuleLayoutMode
ModuleLayoutAwaitWhere
      | LexToken -> LexTokenKind
lexTokenKind LexToken
tok LexTokenKind -> LexTokenKind -> Bool
forall a. Eq a => a -> a -> Bool
== LexTokenKind
TkKeywordWhere ->
          LayoutState
st {layoutModuleMode = ModuleLayoutDone, layoutPendingLayout = Just PendingLayoutGeneric}
    ModuleLayoutMode
_ -> LayoutState
st

openPendingLayout :: LayoutState -> LexToken -> ([LexToken], LayoutState, Bool)
openPendingLayout :: LayoutState -> LexToken -> ([LexToken], LayoutState, Bool)
openPendingLayout LayoutState
st LexToken
tok =
  case LayoutState -> Maybe PendingLayout
layoutPendingLayout LayoutState
st of
    Maybe PendingLayout
Nothing -> ([], LayoutState
st, Bool
False)
    Just PendingLayout
pending ->
      case LexToken -> LexTokenKind
lexTokenKind LexToken
tok of
        LexTokenKind
TkSpecialLBrace -> ([], LayoutState
st {layoutPendingLayout = Nothing}, Bool
False)
        LexTokenKind
_ ->
          let col :: Int
col = LexToken -> Int
tokenStartCol LexToken
tok
              parentIndent :: Int
parentIndent = [LayoutContext] -> Int
currentLayoutIndent (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
              openTok :: LexToken
openTok = Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"{" (LexToken -> SourceSpan
lexTokenSpan LexToken
tok)
              closeTok :: LexToken
closeTok = Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" (LexToken -> SourceSpan
lexTokenSpan LexToken
tok)
              newContext :: LayoutContext
newContext =
                case PendingLayout
pending of
                  PendingLayout
PendingLayoutGeneric -> Int -> LayoutContext
LayoutImplicit Int
col
                  PendingLayout
PendingLayoutLet -> Int -> LayoutContext
LayoutImplicitLet Int
col
                  PendingLayout
PendingLayoutAfterThenElse -> Int -> LayoutContext
LayoutImplicitAfterThenElse Int
col
           in if Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
parentIndent
                then ([LexToken
openTok, LexToken
closeTok], LayoutState
st {layoutPendingLayout = Nothing}, Bool
False)
                else
                  ( [LexToken
openTok],
                    LayoutState
st
                      { layoutPendingLayout = Nothing,
                        layoutContexts = newContext : layoutContexts st
                      },
                    Bool
True
                  )

closeBeforeToken :: LayoutState -> LexToken -> ([LexToken], LayoutState)
closeBeforeToken :: LayoutState -> LexToken -> ([LexToken], LayoutState)
closeBeforeToken LayoutState
st LexToken
tok =
  case LexToken -> LexTokenKind
lexTokenKind LexToken
tok of
    LexTokenKind
TkKeywordIn ->
      let ([LexToken]
inserted, [LayoutContext]
contexts') = SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeLeadingImplicitLets (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    LexTokenKind
TkSpecialComma
      | LayoutState -> Int
layoutDelimiterDepth LayoutState
st Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 ->
          let ([LexToken]
inserted, [LayoutContext]
contexts') = SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeLeadingImplicitLets (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
           in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    -- Close implicit layout contexts before closing delimiters (parse-error rule)
    LexTokenKind
TkSpecialRParen ->
      let ([LexToken]
inserted, [LayoutContext]
contexts') = SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeAllImplicitBeforeDelimiter (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    LexTokenKind
TkSpecialRBracket ->
      let ([LexToken]
inserted, [LayoutContext]
contexts') = SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeAllImplicitBeforeDelimiter (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    LexTokenKind
TkSpecialRBrace ->
      let ([LexToken]
inserted, [LayoutContext]
contexts') = SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeAllImplicitBeforeDelimiter (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    -- Close implicit layout contexts before 'then' and 'else' keywords (parse-error rule)
    -- These keywords cannot appear inside a do block, so we close contexts at >= their column.
    LexTokenKind
TkKeywordThen ->
      let col :: Int
col = LexToken -> Int
tokenStartCol LexToken
tok
          ([LexToken]
inserted, [LayoutContext]
contexts') = Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusive Int
col (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    LexTokenKind
TkKeywordElse ->
      let col :: Int
col = LexToken -> Int
tokenStartCol LexToken
tok
          ([LexToken]
inserted, [LayoutContext]
contexts') = Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusive Int
col (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    -- Close implicit layout contexts before 'where' keyword (parse-error rule)
    -- 'where' at the same column as an implicit layout closes that layout,
    -- allowing it to attach to the enclosing definition.
    LexTokenKind
TkKeywordWhere ->
      let col :: Int
col = LexToken -> Int
tokenStartCol LexToken
tok
          ([LexToken]
inserted, [LayoutContext]
contexts') = Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusiveAll Int
col (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
       in ([LexToken]
inserted, LayoutState
st {layoutContexts = contexts'})
    LexTokenKind
_ -> ([], LayoutState
st)

bolLayout :: LayoutState -> LexToken -> ([LexToken], LayoutState)
bolLayout :: LayoutState -> LexToken -> ([LexToken], LayoutState)
bolLayout LayoutState
st LexToken
tok
  | Bool -> Bool
not (LayoutState -> LexToken -> Bool
isBOL LayoutState
st LexToken
tok) = ([], LayoutState
st)
  | Bool
otherwise =
      let col :: Int
col = LexToken -> Int
tokenStartCol LexToken
tok
          ([LexToken]
inserted, [LayoutContext]
contexts') = Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedent Int
col (LexToken -> SourceSpan
lexTokenSpan LexToken
tok) (LayoutState -> [LayoutContext]
layoutContexts LayoutState
st)
          eqSemi :: [LexToken]
eqSemi =
            case [LayoutContext] -> Maybe Int
currentLayoutIndentMaybe [LayoutContext]
contexts' of
              Just Int
indent
                | Int
col Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
indent,
                  Bool -> Bool
not (LexToken -> Bool
suppressesVirtualSemicolon LexToken
tok) ->
                    [Text -> SourceSpan -> LexToken
virtualSymbolToken Text
";" (LexToken -> SourceSpan
lexTokenSpan LexToken
tok)]
              Maybe Int
_ -> []
       in ([LexToken]
inserted [LexToken] -> [LexToken] -> [LexToken]
forall a. Semigroup a => a -> a -> a
<> [LexToken]
eqSemi, LayoutState
st {layoutContexts = contexts'})

suppressesVirtualSemicolon :: LexToken -> Bool
suppressesVirtualSemicolon :: LexToken -> Bool
suppressesVirtualSemicolon LexToken
tok =
  case LexToken -> LexTokenKind
lexTokenKind LexToken
tok of
    LexTokenKind
TkKeywordThen -> Bool
True
    LexTokenKind
TkKeywordElse -> Bool
True
    LexTokenKind
TkReservedDoubleArrow -> Bool
True -- =>
    LexTokenKind
TkReservedRightArrow -> Bool
True -- ->
    LexTokenKind
TkReservedEquals -> Bool
True -- =
    LexTokenKind
TkReservedPipe -> Bool
True
    LexTokenKind
TkReservedDoubleColon -> Bool
True -- ::
    LexTokenKind
_ -> Bool
False

closeForDedent :: Int -> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedent :: Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedent Int
col SourceSpan
anchor = [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go []
  where
    go :: [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go [LexToken]
acc [LayoutContext]
contexts =
      case [LayoutContext]
contexts of
        LayoutImplicit Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        LayoutImplicitLet Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        LayoutImplicitAfterThenElse Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        [LayoutContext]
_ -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)

-- | Close layout contexts opened after 'then do' or 'else do' when encountering
-- 'then' or 'else' at the same or lesser indent. This handles the parse-error rule
-- for these specific cases where the keyword cannot be part of the do block.
--
-- This function first closes any implicit layouts with indent > col (regular dedent),
-- then closes LayoutImplicitAfterThenElse contexts where col <= indent.
-- This ensures that nested layouts (like case blocks) are closed before
-- the then/else-specific layout closing.
closeForDedentInclusive :: Int -> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusive :: Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusive Int
col SourceSpan
anchor = [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go []
  where
    go :: [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go [LexToken]
acc [LayoutContext]
contexts =
      case [LayoutContext]
contexts of
        -- Close any implicit layout with indent > col (dedent rule)
        LayoutImplicit Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        LayoutImplicitLet Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        -- Close LayoutImplicitAfterThenElse where col <= indent (parse-error rule)
        LayoutImplicitAfterThenElse Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        [LayoutContext]
_ -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)

-- | Close all implicit layout contexts at or above the given column.
-- Used for 'where' which needs to close all enclosing implicit layouts
-- (not just LayoutImplicitAfterThenElse like then/else).
closeForDedentInclusiveAll :: Int -> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusiveAll :: Int
-> SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeForDedentInclusiveAll Int
col SourceSpan
anchor = [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go []
  where
    go :: [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go [LexToken]
acc [LayoutContext]
contexts =
      case [LayoutContext]
contexts of
        LayoutImplicit Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        LayoutImplicitLet Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        LayoutImplicitAfterThenElse Int
indent : [LayoutContext]
rest
          | Int
col Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
indent -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
          | Bool
otherwise -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)
        [LayoutContext]
_ -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)

closeAllImplicit :: [LayoutContext] -> SourceSpan -> [LexToken]
closeAllImplicit :: [LayoutContext] -> SourceSpan -> [LexToken]
closeAllImplicit [LayoutContext]
contexts SourceSpan
anchor =
  [Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor | LayoutContext
ctx <- [LayoutContext]
contexts, LayoutContext -> Bool
isImplicitLayoutContext LayoutContext
ctx]

closeLeadingImplicitLets :: SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeLeadingImplicitLets :: SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeLeadingImplicitLets SourceSpan
anchor = [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go []
  where
    go :: [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go [LexToken]
acc [LayoutContext]
contexts =
      case [LayoutContext]
contexts of
        LayoutImplicitLet Int
_ : [LayoutContext]
rest -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
        [LayoutContext]
_ -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)

-- | Close all implicit layout contexts up to (but not including) the first explicit context.
-- Used to implement the Haskell Report's "parse-error" rule for closing delimiters.
closeAllImplicitBeforeDelimiter :: SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeAllImplicitBeforeDelimiter :: SourceSpan -> [LayoutContext] -> ([LexToken], [LayoutContext])
closeAllImplicitBeforeDelimiter SourceSpan
anchor = [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go []
  where
    go :: [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go [LexToken]
acc [LayoutContext]
contexts =
      case [LayoutContext]
contexts of
        LayoutImplicit Int
_ : [LayoutContext]
rest -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
        LayoutImplicitLet Int
_ : [LayoutContext]
rest -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
        LayoutImplicitAfterThenElse Int
_ : [LayoutContext]
rest -> [LexToken] -> [LayoutContext] -> ([LexToken], [LayoutContext])
go (Text -> SourceSpan -> LexToken
virtualSymbolToken Text
"}" SourceSpan
anchor LexToken -> [LexToken] -> [LexToken]
forall a. a -> [a] -> [a]
: [LexToken]
acc) [LayoutContext]
rest
        [LayoutContext]
_ -> ([LexToken] -> [LexToken]
forall a. [a] -> [a]
reverse [LexToken]
acc, [LayoutContext]
contexts)

stepTokenContext :: LayoutState -> LexToken -> LayoutState
stepTokenContext :: LayoutState -> LexToken -> LayoutState
stepTokenContext LayoutState
st LexToken
tok =
  case LexToken -> LexTokenKind
lexTokenKind LexToken
tok of
    LexTokenKind
TkKeywordDo
      | LayoutState -> Maybe LexTokenKind
layoutPrevTokenKind LayoutState
st Maybe LexTokenKind -> Maybe LexTokenKind -> Bool
forall a. Eq a => a -> a -> Bool
== LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordThen
          Bool -> Bool -> Bool
|| LayoutState -> Maybe LexTokenKind
layoutPrevTokenKind LayoutState
st Maybe LexTokenKind -> Maybe LexTokenKind -> Bool
forall a. Eq a => a -> a -> Bool
== LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordElse ->
          LayoutState
st {layoutPendingLayout = Just PendingLayoutAfterThenElse}
      | Bool
otherwise -> LayoutState
st {layoutPendingLayout = Just PendingLayoutGeneric}
    LexTokenKind
TkKeywordOf -> LayoutState
st {layoutPendingLayout = Just PendingLayoutGeneric}
    LexTokenKind
TkKeywordCase
      | LayoutState -> Maybe LexTokenKind
layoutPrevTokenKind LayoutState
st Maybe LexTokenKind -> Maybe LexTokenKind -> Bool
forall a. Eq a => a -> a -> Bool
== LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedBackslash ->
          LayoutState
st {layoutPendingLayout = Just PendingLayoutGeneric}
      | Bool
otherwise -> LayoutState
st
    LexTokenKind
TkKeywordLet -> LayoutState
st {layoutPendingLayout = Just PendingLayoutLet}
    LexTokenKind
TkKeywordWhere -> LayoutState
st {layoutPendingLayout = Just PendingLayoutGeneric}
    LexTokenKind
TkSpecialLParen ->
      LayoutState
st
        { layoutDelimiterDepth = layoutDelimiterDepth st + 1,
          layoutContexts = LayoutDelimiter : layoutContexts st
        }
    LexTokenKind
TkSpecialLBracket ->
      LayoutState
st
        { layoutDelimiterDepth = layoutDelimiterDepth st + 1,
          layoutContexts = LayoutDelimiter : layoutContexts st
        }
    LexTokenKind
TkSpecialRParen ->
      LayoutState
st
        { layoutDelimiterDepth = max 0 (layoutDelimiterDepth st - 1),
          layoutContexts = popToDelimiter (layoutContexts st)
        }
    LexTokenKind
TkSpecialRBracket ->
      LayoutState
st
        { layoutDelimiterDepth = max 0 (layoutDelimiterDepth st - 1),
          layoutContexts = popToDelimiter (layoutContexts st)
        }
    LexTokenKind
TkSpecialLBrace -> LayoutState
st {layoutContexts = LayoutExplicit : layoutContexts st}
    LexTokenKind
TkSpecialRBrace -> LayoutState
st {layoutContexts = popOneContext (layoutContexts st)}
    LexTokenKind
_ -> LayoutState
st

-- | Pop the layout context stack up to and including the nearest LayoutDelimiter.
popToDelimiter :: [LayoutContext] -> [LayoutContext]
popToDelimiter :: [LayoutContext] -> [LayoutContext]
popToDelimiter [LayoutContext]
contexts =
  case [LayoutContext]
contexts of
    LayoutContext
LayoutDelimiter : [LayoutContext]
rest -> [LayoutContext]
rest
    LayoutContext
_ : [LayoutContext]
rest -> [LayoutContext] -> [LayoutContext]
popToDelimiter [LayoutContext]
rest
    [] -> []

popOneContext :: [LayoutContext] -> [LayoutContext]
popOneContext :: [LayoutContext] -> [LayoutContext]
popOneContext [LayoutContext]
contexts =
  case [LayoutContext]
contexts of
    LayoutContext
_ : [LayoutContext]
rest -> [LayoutContext]
rest
    [] -> []

currentLayoutIndent :: [LayoutContext] -> Int
currentLayoutIndent :: [LayoutContext] -> Int
currentLayoutIndent [LayoutContext]
contexts = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 ([LayoutContext] -> Maybe Int
currentLayoutIndentMaybe [LayoutContext]
contexts)

currentLayoutIndentMaybe :: [LayoutContext] -> Maybe Int
currentLayoutIndentMaybe :: [LayoutContext] -> Maybe Int
currentLayoutIndentMaybe [LayoutContext]
contexts =
  case [LayoutContext]
contexts of
    LayoutImplicit Int
indent : [LayoutContext]
_ -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
indent
    LayoutImplicitLet Int
indent : [LayoutContext]
_ -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
indent
    LayoutImplicitAfterThenElse Int
indent : [LayoutContext]
_ -> Int -> Maybe Int
forall a. a -> Maybe a
Just Int
indent
    [LayoutContext]
_ -> Maybe Int
forall a. Maybe a
Nothing

isImplicitLayoutContext :: LayoutContext -> Bool
isImplicitLayoutContext :: LayoutContext -> Bool
isImplicitLayoutContext LayoutContext
ctx =
  case LayoutContext
ctx of
    LayoutImplicit Int
_ -> Bool
True
    LayoutImplicitLet Int
_ -> Bool
True
    LayoutImplicitAfterThenElse Int
_ -> Bool
True
    LayoutContext
LayoutExplicit -> Bool
False
    LayoutContext
LayoutDelimiter -> Bool
False

isBOL :: LayoutState -> LexToken -> Bool
isBOL :: LayoutState -> LexToken -> Bool
isBOL LayoutState
st LexToken
tok =
  case LayoutState -> Maybe Int
layoutPrevLine LayoutState
st of
    Just Int
prevLine -> LexToken -> Int
tokenStartLine LexToken
tok Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
prevLine
    Maybe Int
Nothing -> Bool
False

tokenStartLine :: LexToken -> Int
tokenStartLine :: LexToken -> Int
tokenStartLine LexToken
tok =
  case LexToken -> SourceSpan
lexTokenSpan LexToken
tok of
    SourceSpan Int
line Int
_ Int
_ Int
_ -> Int
line
    SourceSpan
NoSourceSpan -> Int
1

tokenStartCol :: LexToken -> Int
tokenStartCol :: LexToken -> Int
tokenStartCol LexToken
tok =
  case LexToken -> SourceSpan
lexTokenSpan LexToken
tok of
    SourceSpan Int
_ Int
col Int
_ Int
_ -> Int
col
    SourceSpan
NoSourceSpan -> Int
1

virtualSymbolToken :: Text -> SourceSpan -> LexToken
virtualSymbolToken :: Text -> SourceSpan -> LexToken
virtualSymbolToken Text
sym SourceSpan
span' =
  LexToken
    { lexTokenKind :: LexTokenKind
lexTokenKind = case Text
sym of
        Text
"{" -> LexTokenKind
TkSpecialLBrace
        Text
"}" -> LexTokenKind
TkSpecialRBrace
        Text
";" -> LexTokenKind
TkSpecialSemicolon
        Text
_ -> String -> LexTokenKind
forall a. HasCallStack => String -> a
error (String
"virtualSymbolToken: unexpected symbol " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
sym),
      lexTokenText :: Text
lexTokenText = Text
sym,
      lexTokenSpan :: SourceSpan
lexTokenSpan = SourceSpan
span'
    }

lexKnownPragma :: LexerState -> Maybe (LexToken, LexerState)
lexKnownPragma :: LexerState -> Maybe (LexToken, LexerState)
lexKnownPragma LexerState
st
  | Just ((Text
raw, LexTokenKind
kind), LexerState
st') <- (String -> Maybe (Int, (Text, LexTokenKind)))
-> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike String -> Maybe (Int, (Text, LexTokenKind))
parseLanguagePragma LexerState
st = (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
raw LexTokenKind
kind, LexerState
st')
  | Just ((Text
raw, LexTokenKind
kind), LexerState
st') <- (String -> Maybe (Int, (Text, LexTokenKind)))
-> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike String -> Maybe (Int, (Text, LexTokenKind))
parseOptionsPragma LexerState
st = (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
raw LexTokenKind
kind, LexerState
st')
  | Just ((Text
raw, LexTokenKind
kind), LexerState
st') <- (String -> Maybe (Int, (Text, LexTokenKind)))
-> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike String -> Maybe (Int, (Text, LexTokenKind))
parseWarningPragma LexerState
st = (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
raw LexTokenKind
kind, LexerState
st')
  | Just ((Text
raw, LexTokenKind
kind), LexerState
st') <- (String -> Maybe (Int, (Text, LexTokenKind)))
-> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike String -> Maybe (Int, (Text, LexTokenKind))
parseDeprecatedPragma LexerState
st = (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
raw LexTokenKind
kind, LexerState
st')
  | Bool
otherwise = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

parsePragmaLike :: (String -> Maybe (Int, (Text, LexTokenKind))) -> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike :: (String -> Maybe (Int, (Text, LexTokenKind)))
-> LexerState -> Maybe ((Text, LexTokenKind), LexerState)
parsePragmaLike String -> Maybe (Int, (Text, LexTokenKind))
parser LexerState
st = do
  (n, out) <- String -> Maybe (Int, (Text, LexTokenKind))
parser (LexerState -> String
lexerInput LexerState
st)
  pure (out, advanceChars (take n (lexerInput st)) st)

lexIdentifier :: LexerState -> Maybe (LexToken, LexerState)
lexIdentifier :: LexerState -> Maybe (LexToken, LexerState)
lexIdentifier LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
c : String
rest
      | Char -> Bool
isIdentStart Char
c ->
          let (String
seg, String
rest0) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdentTail String
rest
              firstChunk :: String
firstChunk = Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
seg
              (String
consumed, String
rest1, Bool
isQualified) = String -> String -> (String, String, Bool)
gatherQualified String
firstChunk String
rest0
           in -- Check if we have a qualified operator (e.g., Prelude.+)
              case (Bool
isQualified Bool -> Bool -> Bool
|| Char -> Bool
isAsciiUpper Char
c, String
rest1) of
                (Bool
True, Char
'.' : Char
opChar : String
opRest)
                  | Char -> Bool
isSymbolicOpCharNotDot Char
opChar ->
                      -- This is a qualified operator like Prelude.+ or A.B.C.:++
                      let (String
opChars, String
_) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSymbolicOpChar (Char
opChar Char -> ShowS
forall a. a -> [a] -> [a]
: String
opRest)
                          fullOp :: String
fullOp = String
consumed String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"." String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
opChars
                          opTxt :: Text
opTxt = String -> Text
T.pack String
fullOp
                          kind :: LexTokenKind
kind =
                            if Char
opChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':'
                              then Text -> LexTokenKind
TkQConSym Text
opTxt
                              else Text -> LexTokenKind
TkQVarSym Text
opTxt
                          st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
fullOp LexerState
st
                       in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
opTxt LexTokenKind
kind, LexerState
st')
                (Bool, String)
_ ->
                  -- Regular identifier
                  let ident :: Text
ident = String -> Text
T.pack String
consumed
                      kind :: LexTokenKind
kind = Char -> Bool -> Text -> LexTokenKind
classifyIdentifier Char
c Bool
isQualified Text
ident
                      st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st
                   in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
ident LexTokenKind
kind, LexerState
st')
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  where
    -- Returns (consumed, remaining, isQualified)
    gatherQualified :: String -> String -> (String, String, Bool)
gatherQualified String
acc String
chars =
      case String
chars of
        Char
'.' : Char
c' : String
more
          | Char -> Bool
isIdentStart Char
c' ->
              let (String
seg, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdentTail String
more
               in String -> String -> (String, String, Bool)
gatherQualified (String
acc String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"." String -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char
c'] String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
seg) String
rest
        String
_ -> (String
acc, String
chars, Char
'.' Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
acc)

    -- Check for symbol char that is not '.' to avoid consuming module path dots
    isSymbolicOpCharNotDot :: Char -> Bool
isSymbolicOpCharNotDot Char
c = Char -> Bool
isSymbolicOpChar Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'.'

    classifyIdentifier :: Char -> Bool -> Text -> LexTokenKind
classifyIdentifier Char
firstChar Bool
isQualified Text
ident
      | Bool
isQualified =
          -- Qualified name: use final part to determine var/con
          let finalPart :: Text
finalPart = (Char -> Bool) -> Text -> Text
T.takeWhileEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'.') Text
ident
              firstCharFinal :: Char
firstCharFinal = HasCallStack => Text -> Char
Text -> Char
T.head Text
finalPart
           in if Char -> Bool
isAsciiUpper Char
firstCharFinal
                then Text -> LexTokenKind
TkQConId Text
ident
                else Text -> LexTokenKind
TkQVarId Text
ident
      | Bool
otherwise =
          -- Unqualified: check for keyword first
          case Text -> Maybe LexTokenKind
keywordTokenKind Text
ident of
            Just LexTokenKind
kw -> LexTokenKind
kw
            Maybe LexTokenKind
Nothing ->
              if Char -> Bool
isAsciiUpper Char
firstChar
                then Text -> LexTokenKind
TkConId Text
ident
                else Text -> LexTokenKind
TkVarId Text
ident

-- | Handle minus in the context of NegativeLiterals and LexicalNegation extensions.
--
-- This function is called when the input starts with '-' and either NegativeLiterals
-- or LexicalNegation is enabled. It handles the following cases:
--
-- 1. NegativeLiterals: If '-' is immediately followed by a numeric literal (no space),
-- | Handle minus in the context of NegativeLiterals and LexicalNegation extensions.
--
-- When NegativeLiterals is enabled and context allows, attempts to lex a negative
-- literal by consuming '-' and delegating to existing number lexers.
--
-- When LexicalNegation is enabled, emits TkPrefixMinus or TkMinusOperator based
-- on position.
--
-- Otherwise, return Nothing and let lexOperator handle it.
lexNegativeLiteralOrMinus :: LexerState -> Maybe (LexToken, LexerState)
lexNegativeLiteralOrMinus :: LexerState -> Maybe (LexToken, LexerState)
lexNegativeLiteralOrMinus LexerState
st
  | Bool -> Bool
not Bool
hasNegExt = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  | Bool -> Bool
not (String -> Bool
isStandaloneMinus (LexerState -> String
lexerInput LexerState
st)) = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  | Bool
otherwise =
      let prevAllows :: Bool
prevAllows = Maybe LexTokenKind -> Bool -> Bool
allowsMergeOrPrefix (LexerState -> Maybe LexTokenKind
lexerPrevTokenKind LexerState
st) (LexerState -> Bool
lexerHadTrivia LexerState
st)
          rest :: String
rest = Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
1 (LexerState -> String
lexerInput LexerState
st) -- input after '-'
       in if Extension
NegativeLiterals Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st Bool -> Bool -> Bool
&& Bool
prevAllows
            then case LexerState -> Maybe (LexToken, LexerState)
tryLexNumberAfterMinus LexerState
st of
              Just (LexToken, LexerState)
result -> (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexToken, LexerState)
result
              Maybe (LexToken, LexerState)
Nothing -> LexerState -> String -> Bool -> Maybe (LexToken, LexerState)
lexMinusOperator LexerState
st String
rest Bool
prevAllows
            else LexerState -> String -> Bool -> Maybe (LexToken, LexerState)
lexMinusOperator LexerState
st String
rest Bool
prevAllows
  where
    hasNegExt :: Bool
hasNegExt =
      Extension
NegativeLiterals Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st
        Bool -> Bool -> Bool
|| Extension
LexicalNegation Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st

-- | Check if input starts with a standalone '-' (not part of ->, -<, etc.)
isStandaloneMinus :: String -> Bool
isStandaloneMinus :: String -> Bool
isStandaloneMinus String
input =
  case String
input of
    Char
'-' : Char
c : String
_ | Char -> Bool
isSymbolicOpChar Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-' -> Bool
False -- part of multi-char op
    Char
'-' : String
_ -> Bool
True
    String
_ -> Bool
False

-- | Try to lex a negative number by delegating to existing number lexers.
-- Consumes '-', runs number lexers on remainder, negates result if successful.
tryLexNumberAfterMinus :: LexerState -> Maybe (LexToken, LexerState)
tryLexNumberAfterMinus :: LexerState -> Maybe (LexToken, LexerState)
tryLexNumberAfterMinus LexerState
st = do
  -- Create a temporary state positioned after the '-'
  let stAfterMinus :: LexerState
stAfterMinus = String -> LexerState -> LexerState
advanceChars String
"-" LexerState
st
  -- Try existing number lexers in order (most specific first)
  (numTok, stFinal) <- [LexerState -> Maybe (LexToken, LexerState)]
-> LexerState -> Maybe (LexToken, LexerState)
forall {t} {a}. [t -> Maybe a] -> t -> Maybe a
firstJust [LexerState -> Maybe (LexToken, LexerState)]
numberLexers LexerState
stAfterMinus
  -- Build combined negative token
  Just (negateToken st numTok, stFinal)
  where
    numberLexers :: [LexerState -> Maybe (LexToken, LexerState)]
numberLexers = [LexerState -> Maybe (LexToken, LexerState)
lexHexFloat, LexerState -> Maybe (LexToken, LexerState)
lexFloat, LexerState -> Maybe (LexToken, LexerState)
lexIntBase, LexerState -> Maybe (LexToken, LexerState)
lexInt]

    firstJust :: [t -> Maybe a] -> t -> Maybe a
firstJust [] t
_ = Maybe a
forall a. Maybe a
Nothing
    firstJust (t -> Maybe a
f : [t -> Maybe a]
fs) t
s = case t -> Maybe a
f t
s of
      Just a
result -> a -> Maybe a
forall a. a -> Maybe a
Just a
result
      Maybe a
Nothing -> [t -> Maybe a] -> t -> Maybe a
firstJust [t -> Maybe a]
fs t
s

-- | Negate a numeric token and adjust its span/text to include the leading '-'.
negateToken :: LexerState -> LexToken -> LexToken
negateToken :: LexerState -> LexToken -> LexToken
negateToken LexerState
stBefore LexToken
numTok =
  LexToken
    { lexTokenKind :: LexTokenKind
lexTokenKind = LexTokenKind -> LexTokenKind
negateKind (LexToken -> LexTokenKind
lexTokenKind LexToken
numTok),
      lexTokenText :: Text
lexTokenText = Text
"-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> LexToken -> Text
lexTokenText LexToken
numTok,
      lexTokenSpan :: SourceSpan
lexTokenSpan = SourceSpan -> SourceSpan
extendSpanLeft (LexToken -> SourceSpan
lexTokenSpan LexToken
numTok)
    }
  where
    negateKind :: LexTokenKind -> LexTokenKind
negateKind LexTokenKind
k = case LexTokenKind
k of
      TkInteger Integer
n -> Integer -> LexTokenKind
TkInteger (Integer -> Integer
forall a. Num a => a -> a
negate Integer
n)
      TkIntegerBase Integer
n Text
repr -> Integer -> Text -> LexTokenKind
TkIntegerBase (Integer -> Integer
forall a. Num a => a -> a
negate Integer
n) (Text
"-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
repr)
      TkFloat Double
n Text
repr -> Double -> Text -> LexTokenKind
TkFloat (Double -> Double
forall a. Num a => a -> a
negate Double
n) (Text
"-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
repr)
      LexTokenKind
other -> LexTokenKind
other -- shouldn't happen

    -- Extend span to start at the '-' position
    extendSpanLeft :: SourceSpan -> SourceSpan
extendSpanLeft SourceSpan
sp = case SourceSpan
sp of
      SourceSpan Int
_ Int
_ Int
endLine Int
endCol ->
        Int -> Int -> Int -> Int -> SourceSpan
SourceSpan (LexerState -> Int
lexerLine LexerState
stBefore) (LexerState -> Int
lexerCol LexerState
stBefore) Int
endLine Int
endCol
      SourceSpan
NoSourceSpan -> SourceSpan
NoSourceSpan

-- | Emit TkPrefixMinus or TkMinusOperator based on LexicalNegation rules.
lexMinusOperator :: LexerState -> String -> Bool -> Maybe (LexToken, LexerState)
lexMinusOperator :: LexerState -> String -> Bool -> Maybe (LexToken, LexerState)
lexMinusOperator LexerState
st String
rest Bool
prevAllows
  | Extension
LexicalNegation Extension -> [Extension] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` LexerState -> [Extension]
lexerExtensions LexerState
st = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  | Bool
otherwise =
      let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
"-" LexerState
st
          kind :: LexTokenKind
kind =
            if Bool
prevAllows Bool -> Bool -> Bool
&& String -> Bool
canStartNegatedAtom String
rest
              then LexTokenKind
TkPrefixMinus
              else LexTokenKind
TkMinusOperator
       in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
"-" LexTokenKind
kind, LexerState
st')

-- | Check if the preceding token context allows a merge (NegativeLiterals) or
-- prefix minus (LexicalNegation).
--
-- The merge/prefix is allowed when:
-- - There is no previous token (start of input)
-- - There was whitespace/trivia before the '-'
-- - The previous token is an operator or punctuation that allows tight unary prefix
allowsMergeOrPrefix :: Maybe LexTokenKind -> Bool -> Bool
allowsMergeOrPrefix :: Maybe LexTokenKind -> Bool -> Bool
allowsMergeOrPrefix Maybe LexTokenKind
prev Bool
hadTrivia =
  case Maybe LexTokenKind
prev of
    Maybe LexTokenKind
Nothing -> Bool
True
    Just LexTokenKind
_ | Bool
hadTrivia -> Bool
True
    Just LexTokenKind
prevKind -> LexTokenKind -> Bool
prevTokenAllowsTightPrefix LexTokenKind
prevKind

-- | Check if the preceding token allows a tight unary prefix (like negation).
-- This is used when there's no whitespace between the previous token and '-'.
prevTokenAllowsTightPrefix :: LexTokenKind -> Bool
prevTokenAllowsTightPrefix :: LexTokenKind -> Bool
prevTokenAllowsTightPrefix LexTokenKind
kind =
  case LexTokenKind
kind of
    LexTokenKind
TkSpecialLParen -> Bool
True
    LexTokenKind
TkSpecialLBracket -> Bool
True
    LexTokenKind
TkSpecialLBrace -> Bool
True
    LexTokenKind
TkSpecialComma -> Bool
True
    LexTokenKind
TkSpecialSemicolon -> Bool
True
    TkVarSym Text
_ -> Bool
True
    TkConSym Text
_ -> Bool
True
    TkQVarSym Text
_ -> Bool
True
    TkQConSym Text
_ -> Bool
True
    LexTokenKind
TkMinusOperator -> Bool
True
    LexTokenKind
TkPrefixMinus -> Bool
True
    LexTokenKind
TkReservedEquals -> Bool
True
    LexTokenKind
TkReservedLeftArrow -> Bool
True
    LexTokenKind
TkReservedRightArrow -> Bool
True
    LexTokenKind
TkReservedDoubleArrow -> Bool
True
    LexTokenKind
TkReservedDoubleColon -> Bool
True
    LexTokenKind
TkReservedPipe -> Bool
True
    LexTokenKind
TkReservedBackslash -> Bool
True
    LexTokenKind
_ -> Bool
False

-- | Check if the given input could start a negated atom (for LexicalNegation).
canStartNegatedAtom :: String -> Bool
canStartNegatedAtom :: String -> Bool
canStartNegatedAtom String
rest =
  case String
rest of
    [] -> Bool
False
    Char
c : String
_
      | Char -> Bool
isIdentStart Char
c -> Bool
True -- identifier
      | Char -> Bool
isDigit Char
c -> Bool
True -- number
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' -> Bool
True -- char literal
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' -> Bool
True -- string literal
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' -> Bool
True -- parenthesized expression
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'[' -> Bool
True -- list/TH brackets
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\' -> Bool
True -- lambda
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' -> Bool
True -- nested negation
      | Bool
otherwise -> Bool
False

-- | Whitespace-sensitive lexing for ! and ~ operators (GHC proposal 0229).
--
-- Per the proposal, these operators are classified based on surrounding whitespace:
--   - Prefix occurrence: whitespace before, no whitespace after → bang/lazy pattern
--   - Otherwise (loose infix, tight infix, suffix) → regular operator
--
-- Examples:
--   a ! b   -- loose infix (operator)
--   a!b     -- tight infix (operator)
--   a !b    -- prefix (bang pattern)
--   a! b    -- suffix (operator)
lexBangOrTildeOperator :: LexerState -> Maybe (LexToken, LexerState)
lexBangOrTildeOperator :: LexerState -> Maybe (LexToken, LexerState)
lexBangOrTildeOperator LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
'!' : String
rest -> LexerState
-> Char
-> String
-> LexTokenKind
-> String
-> Maybe (LexToken, LexerState)
lexPrefixSensitiveOp LexerState
st Char
'!' String
"!" LexTokenKind
TkPrefixBang String
rest
    Char
'~' : String
rest -> LexerState
-> Char
-> String
-> LexTokenKind
-> String
-> Maybe (LexToken, LexerState)
lexPrefixSensitiveOp LexerState
st Char
'~' String
"~" LexTokenKind
TkPrefixTilde String
rest
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

-- | Lex a whitespace-sensitive prefix operator.
-- Returns TkPrefixBang/TkPrefixTilde if in prefix position, otherwise Nothing
-- to let lexOperator handle it as a regular VarSym.
--
-- Per GHC proposal 0229, prefix position is determined by:
--   - Whitespace before the operator, OR
--   - Previous token is an opening bracket/punctuation that allows tight prefix
-- AND no whitespace after (next char can start a pattern atom).
lexPrefixSensitiveOp :: LexerState -> Char -> String -> LexTokenKind -> String -> Maybe (LexToken, LexerState)
lexPrefixSensitiveOp :: LexerState
-> Char
-> String
-> LexTokenKind
-> String
-> Maybe (LexToken, LexerState)
lexPrefixSensitiveOp LexerState
st Char
opChar String
opStr LexTokenKind
prefixKind String
rest
  -- Only handle single-character ! or ~ (not part of multi-char operator like !=)
  | String -> Bool
isMultiCharOp String
rest = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  -- Prefix position: (whitespace before OR opening token before) AND next char can start a pattern atom
  | Bool
isPrefixPosition Bool -> Bool -> Bool
&& String -> Bool
canStartPrefixPatternAtom String
rest =
      let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
opStr LexerState
st
       in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (Char -> Text
T.singleton Char
opChar) LexTokenKind
prefixKind, LexerState
st')
  -- Otherwise, let lexOperator handle it as a regular operator
  | Bool
otherwise = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  where
    -- Check if rest starts with another symbolic operator char (making this a multi-char op)
    isMultiCharOp :: String -> Bool
isMultiCharOp (Char
c : String
_) = Char -> Bool
isSymbolicOpChar Char
c
    isMultiCharOp [] = Bool
False
    -- Prefix position is allowed when:
    -- - There is no previous token (start of input)
    -- - There was whitespace/trivia before the operator
    -- - The previous token is an opening bracket or punctuation that allows tight prefix
    isPrefixPosition :: Bool
isPrefixPosition =
      case LexerState -> Maybe LexTokenKind
lexerPrevTokenKind LexerState
st of
        Maybe LexTokenKind
Nothing -> Bool
True -- start of input
        Just LexTokenKind
prevKind
          | LexerState -> Bool
lexerHadTrivia LexerState
st -> Bool
True -- had whitespace before
          | Bool
otherwise -> LexTokenKind -> Bool
prevTokenAllowsTightPrefix LexTokenKind
prevKind -- opening bracket, etc.

-- | Check if the given input could start a pattern atom (for prefix ! and ~).
-- This is similar to canStartNegatedAtom but tailored for patterns.
canStartPrefixPatternAtom :: String -> Bool
canStartPrefixPatternAtom :: String -> Bool
canStartPrefixPatternAtom String
rest =
  case String
rest of
    [] -> Bool
False
    Char
c : String
_
      | Char -> Bool
isIdentStart Char
c -> Bool
True -- identifier (variable or constructor)
      | Char -> Bool
isDigit Char
c -> Bool
True -- numeric literal
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' -> Bool
True -- char literal
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' -> Bool
True -- string literal
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' -> Bool
True -- parenthesized pattern or tuple
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'[' -> Bool
True -- list pattern
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' -> Bool
True -- wildcard
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'!' -> Bool
True -- nested bang pattern
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'~' -> Bool
True -- nested lazy pattern
      | Bool
otherwise -> Bool
False

lexOperator :: LexerState -> Maybe (LexToken, LexerState)
lexOperator :: LexerState -> Maybe (LexToken, LexerState)
lexOperator LexerState
st =
  case (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSymbolicOpChar (LexerState -> String
lexerInput LexerState
st) of
    (op :: String
op@(Char
c : String
_), String
_) ->
      let txt :: Text
txt = String -> Text
T.pack String
op
          st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
op LexerState
st
          hasUnicode :: Bool
hasUnicode = Extension
UnicodeSyntax Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st
          kind :: LexTokenKind
kind = case Text -> Maybe LexTokenKind
reservedOpTokenKind Text
txt of
            Just LexTokenKind
reserved -> LexTokenKind
reserved
            Maybe LexTokenKind
Nothing
              | Bool
hasUnicode -> Text -> Char -> LexTokenKind
unicodeOpTokenKind Text
txt Char
c
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' -> Text -> LexTokenKind
TkConSym Text
txt
              | Bool
otherwise -> Text -> LexTokenKind
TkVarSym Text
txt
       in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
txt LexTokenKind
kind, LexerState
st')
    (String, String)
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

-- | Map Unicode operators to their ASCII equivalents when UnicodeSyntax is enabled.
-- Returns the appropriate token kind for known Unicode operators, or falls back
-- to TkVarSym/TkConSym based on whether the first character is ':'.
unicodeOpTokenKind :: Text -> Char -> LexTokenKind
unicodeOpTokenKind :: Text -> Char -> LexTokenKind
unicodeOpTokenKind Text
txt Char
firstChar =
  case Text -> String
T.unpack Text
txt of
    String
"∷" -> LexTokenKind
TkReservedDoubleColon -- :: (proportion)
    String
"⇒" -> LexTokenKind
TkReservedDoubleArrow -- => (rightwards double arrow)
    String
"→" -> LexTokenKind
TkReservedRightArrow -- -> (rightwards arrow)
    String
"←" -> LexTokenKind
TkReservedLeftArrow -- <- (leftwards arrow)
    String
"∀" -> Text -> LexTokenKind
TkVarId Text
"forall" -- forall (for all)
    String
"★" -> Text -> LexTokenKind
TkVarSym Text
"*" -- star (for kind signatures)
    String
"⤙" -> Text -> LexTokenKind
TkVarSym Text
"-<" -- -< (leftwards arrow-tail)
    String
"⤚" -> Text -> LexTokenKind
TkVarSym Text
">-" -- >- (rightwards arrow-tail)
    String
"⤛" -> Text -> LexTokenKind
TkVarSym Text
"-<<" -- -<< (leftwards double arrow-tail)
    String
"⤜" -> Text -> LexTokenKind
TkVarSym Text
">>-" -- >>- (rightwards double arrow-tail)
    String
"⦇" -> Text -> LexTokenKind
TkVarSym Text
"(|" -- (| left banana bracket
    String
"⦈" -> Text -> LexTokenKind
TkVarSym Text
"|)" -- right banana bracket |)
    String
"⟦" -> Text -> LexTokenKind
TkVarSym Text
"[|" -- [| left semantic bracket
    String
"⟧" -> Text -> LexTokenKind
TkVarSym Text
"|]" -- right semantic bracket |]
    String
"⊸" -> Text -> LexTokenKind
TkVarSym Text
"%1->" -- %1-> (linear arrow)
    String
_
      | Char
firstChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' -> Text -> LexTokenKind
TkConSym Text
txt
      | Bool
otherwise -> Text -> LexTokenKind
TkVarSym Text
txt

lexSymbol :: LexerState -> Maybe (LexToken, LexerState)
lexSymbol :: LexerState -> Maybe (LexToken, LexerState)
lexSymbol LexerState
st =
  [(String, LexTokenKind)] -> Maybe (LexToken, LexerState)
firstJust
    [ (String
"(", LexTokenKind
TkSpecialLParen),
      (String
")", LexTokenKind
TkSpecialRParen),
      (String
"[", LexTokenKind
TkSpecialLBracket),
      (String
"]", LexTokenKind
TkSpecialRBracket),
      (String
"{", LexTokenKind
TkSpecialLBrace),
      (String
"}", LexTokenKind
TkSpecialRBrace),
      (String
",", LexTokenKind
TkSpecialComma),
      (String
";", LexTokenKind
TkSpecialSemicolon),
      (String
"`", LexTokenKind
TkSpecialBacktick)
    ]
  where
    firstJust :: [(String, LexTokenKind)] -> Maybe (LexToken, LexerState)
firstJust [(String, LexTokenKind)]
xs =
      case [(String, LexTokenKind)]
xs of
        [] -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
        (String
txt, LexTokenKind
kind) : [(String, LexTokenKind)]
rest ->
          if String
txt String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`List.isPrefixOf` LexerState -> String
lexerInput LexerState
st
            then
              let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
txt LexerState
st
               in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
txt) LexTokenKind
kind, LexerState
st')
            else [(String, LexTokenKind)] -> Maybe (LexToken, LexerState)
firstJust [(String, LexTokenKind)]
rest

lexIntBase :: LexerState -> Maybe (LexToken, LexerState)
lexIntBase :: LexerState -> Maybe (LexToken, LexerState)
lexIntBase LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
'0' : Char
base : String
rest
      | Char
base Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"xXoObB" :: String) ->
          let allowUnderscores :: Bool
allowUnderscores = Extension
NumericUnderscores Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st
              isDigitChar :: Char -> Bool
isDigitChar
                | Char
base Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"xX" :: String) = Char -> Bool
isHexDigit
                | Char
base Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"oO" :: String) = Char -> Bool
isOctDigit
                | Bool
otherwise = (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"01" :: String))
              (String
digitsRaw, String
_) = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithLeadingUnderscores Bool
allowUnderscores Char -> Bool
isDigitChar String
rest
           in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digitsRaw
                then Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
                else
                  let raw :: String
raw = Char
'0' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
base Char -> ShowS
forall a. a -> [a] -> [a]
: String
digitsRaw
                      txt :: Text
txt = String -> Text
T.pack String
raw
                      n :: Integer
n
                        | Char
base Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"xX" :: String) = Text -> Integer
readHexLiteral Text
txt
                        | Char
base Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"oO" :: String) = Text -> Integer
readOctLiteral Text
txt
                        | Bool
otherwise = Text -> Integer
readBinLiteral Text
txt
                      st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
                   in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
txt (Integer -> Text -> LexTokenKind
TkIntegerBase Integer
n Text
txt), LexerState
st')
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

lexHexFloat :: LexerState -> Maybe (LexToken, LexerState)
lexHexFloat :: LexerState -> Maybe (LexToken, LexerState)
lexHexFloat LexerState
st = do
  ('0' : x : rest) <- String -> Maybe String
forall a. a -> Maybe a
Just (LexerState -> String
lexerInput LexerState
st)
  if x `notElem` ("xX" :: String)
    then Nothing
    else do
      let (intDigits, rest1) = span isHexDigit rest
      if null intDigits
        then Nothing
        else do
          let (mFracDigits, rest2) =
                case rest1 of
                  Char
'.' : String
more ->
                    let (String
frac, String
rest') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isHexDigit String
more
                     in (String -> Maybe String
forall a. a -> Maybe a
Just String
frac, String
rest')
                  String
_ -> (Maybe String
forall a. Maybe a
Nothing, String
rest1)
          expo@(_ : expoRest) <- takeHexExponent rest2
          let fracDigits = String -> Maybe String -> String
forall a. a -> Maybe a -> a
fromMaybe String
"" Maybe String
mFracDigits
          if null expoRest
            then Nothing
            else
              let dotAndFrac =
                    case Maybe String
mFracDigits of
                      Just String
ds -> Char
'.' Char -> ShowS
forall a. a -> [a] -> [a]
: String
ds
                      Maybe String
Nothing -> String
""
                  raw = Char
'0' Char -> ShowS
forall a. a -> [a] -> [a]
: Char
x Char -> ShowS
forall a. a -> [a] -> [a]
: String
intDigits String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
dotAndFrac String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
expo
                  txt = String -> Text
T.pack String
raw
                  value = String -> String -> String -> Double
parseHexFloatLiteral String
intDigits String
fracDigits String
expo
                  st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
               in Just (mkToken st st' txt (TkFloat value txt), st')

lexFloat :: LexerState -> Maybe (LexToken, LexerState)
lexFloat :: LexerState -> Maybe (LexToken, LexerState)
lexFloat LexerState
st =
  let allowUnderscores :: Bool
allowUnderscores = Extension
NumericUnderscores Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st
      (String
lhsRaw, String
rest) = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigit (LexerState -> String
lexerInput LexerState
st)
   in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
lhsRaw
        then Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
        else case String
rest of
          Char
'.' : Char
d : String
more
            | Char -> Bool
isDigit Char
d ->
                let (String
rhsRaw, String
rest') = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigit (Char
d Char -> ShowS
forall a. a -> [a] -> [a]
: String
more)
                    (String
expo, String
_) = Bool -> String -> (String, String)
takeExponent Bool
allowUnderscores String
rest'
                    raw :: String
raw = String
lhsRaw String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"." String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
rhsRaw String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
expo
                    txt :: Text
txt = String -> Text
T.pack String
raw
                    normalized :: String
normalized = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') String
raw
                    st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
                 in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
txt (Double -> Text -> LexTokenKind
TkFloat (String -> Double
forall a. Read a => String -> a
read String
normalized) Text
txt), LexerState
st')
          String
_ ->
            case Bool -> String -> (String, String)
takeExponent Bool
allowUnderscores String
rest of
              (String
"", String
_) -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
              (String
expo, String
_) ->
                let raw :: String
raw = String
lhsRaw String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
expo
                    txt :: Text
txt = String -> Text
T.pack String
raw
                    normalized :: String
normalized = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') String
raw
                    st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
                 in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
txt (Double -> Text -> LexTokenKind
TkFloat (String -> Double
forall a. Read a => String -> a
read String
normalized) Text
txt), LexerState
st')

lexInt :: LexerState -> Maybe (LexToken, LexerState)
lexInt :: LexerState -> Maybe (LexToken, LexerState)
lexInt LexerState
st =
  let allowUnderscores :: Bool
allowUnderscores = Extension
NumericUnderscores Extension -> [Extension] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` LexerState -> [Extension]
lexerExtensions LexerState
st
      (String
digitsRaw, String
_) = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigit (LexerState -> String
lexerInput LexerState
st)
   in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digitsRaw
        then Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
        else
          let txt :: Text
txt = String -> Text
T.pack String
digitsRaw
              digits :: String
digits = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') String
digitsRaw
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
digitsRaw LexerState
st
           in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
txt (Integer -> LexTokenKind
TkInteger (String -> Integer
forall a. Read a => String -> a
read String
digits)), LexerState
st')

lexPromotedQuote :: LexerState -> Maybe (LexToken, LexerState)
lexPromotedQuote :: LexerState -> Maybe (LexToken, LexerState)
lexPromotedQuote LexerState
st
  | Extension
DataKinds Extension -> [Extension] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` LexerState -> [Extension]
lexerExtensions LexerState
st = Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  | Bool
otherwise =
      case LexerState -> String
lexerInput LexerState
st of
        Char
'\'' : String
rest
          | String -> Bool
isValidCharLiteral String
rest -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
          | String -> Bool
isPromotionStart String
rest ->
              let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
"'" LexerState
st
               in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' Text
"'" (Text -> LexTokenKind
TkVarSym Text
"'"), LexerState
st')
          | Bool
otherwise -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
        String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  where
    isValidCharLiteral :: String -> Bool
isValidCharLiteral String
chars =
      case Char -> String -> Either String (String, String)
scanQuoted Char
'\'' String
chars of
        Right (String
body, String
_) -> Maybe Char -> Bool
forall a. Maybe a -> Bool
isJust (String -> Maybe Char
readMaybeChar (Char
'\'' Char -> ShowS
forall a. a -> [a] -> [a]
: String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"'"))
        Left String
_ -> Bool
False

    isPromotionStart :: String -> Bool
isPromotionStart String
chars =
      case String
chars of
        Char
c : String
_
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'[' -> Bool
True
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' -> Bool
True
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' -> Bool
True
          | Char -> Bool
isAsciiUpper Char
c -> Bool
True
        String
_ -> Bool
False

lexChar :: LexerState -> Maybe (LexToken, LexerState)
lexChar :: LexerState -> Maybe (LexToken, LexerState)
lexChar LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
'\'' : String
rest ->
      case Char -> String -> Either String (String, String)
scanQuoted Char
'\'' String
rest of
        Right (String
body, String
_) ->
          let raw :: String
raw = Char
'\'' Char -> ShowS
forall a. a -> [a] -> [a]
: String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"'"
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
           in case String -> Maybe Char
readMaybeChar String
raw of
                Just Char
c -> (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
raw) (Char -> LexTokenKind
TkChar Char
c), LexerState
st')
                Maybe Char
Nothing -> (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken LexerState
st LexerState
st' (String -> Text
T.pack String
raw) Text
"invalid char literal", LexerState
st')
        Left String
raw ->
          let full :: String
full = Char
'\'' Char -> ShowS
forall a. a -> [a] -> [a]
: String
raw
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
full LexerState
st
           in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken LexerState
st LexerState
st' (String -> Text
T.pack String
full) Text
"unterminated char literal", LexerState
st')
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

lexString :: LexerState -> Maybe (LexToken, LexerState)
lexString :: LexerState -> Maybe (LexToken, LexerState)
lexString LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
'"' : String
rest ->
      case Char -> String -> Either String (String, String)
scanQuoted Char
'"' String
rest of
        Right (String
body, String
_) ->
          let raw :: String
raw = String
"\"" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\""
              decoded :: Text
decoded =
                case ReadS String
forall a. Read a => ReadS a
reads String
raw of
                  [(String
str, String
"")] -> String -> Text
T.pack String
str
                  [(String, String)]
_ -> String -> Text
T.pack String
body
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
           in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
raw) (Text -> LexTokenKind
TkString Text
decoded), LexerState
st')
        Left String
raw ->
          let full :: String
full = Char
'"' Char -> ShowS
forall a. a -> [a] -> [a]
: String
raw
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
full LexerState
st
           in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken LexerState
st LexerState
st' (String -> Text
T.pack String
full) Text
"unterminated string literal", LexerState
st')
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing

lexQuasiQuote :: LexerState -> Maybe (LexToken, LexerState)
lexQuasiQuote :: LexerState -> Maybe (LexToken, LexerState)
lexQuasiQuote LexerState
st =
  case LexerState -> String
lexerInput LexerState
st of
    Char
'[' : String
rest ->
      case String -> Maybe (String, String, String)
parseQuasiQuote String
rest of
        Just (String
quoter, String
body, String
_) ->
          let raw :: String
raw = String
"[" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
quoter String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"|" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"|]"
              st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
           in (LexToken, LexerState) -> Maybe (LexToken, LexerState)
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
raw) (Text -> Text -> LexTokenKind
TkQuasiQuote (String -> Text
T.pack String
quoter) (String -> Text
T.pack String
body)), LexerState
st')
        Maybe (String, String, String)
Nothing -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
    String
_ -> Maybe (LexToken, LexerState)
forall a. Maybe a
Nothing
  where
    parseQuasiQuote :: String -> Maybe (String, String, String)
parseQuasiQuote String
chars =
      let (String
quoter, String
rest0) = String -> (String, String)
takeQuoter String
chars
       in case String
rest0 of
            Char
'|' : String
rest1
              | Bool -> Bool
not (String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
quoter) ->
                  let (String
body, String
rest2) = String -> String -> (String, String)
breakOnMarker String
"|]" String
rest1
                   in case String
rest2 of
                        Char
'|' : Char
']' : String
_ -> (String, String, String) -> Maybe (String, String, String)
forall a. a -> Maybe a
Just (String
quoter, String
body, Int -> ShowS
forall a. Int -> [a] -> [a]
take (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
body Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) String
rest1)
                        String
_ -> Maybe (String, String, String)
forall a. Maybe a
Nothing
            String
_ -> Maybe (String, String, String)
forall a. Maybe a
Nothing

lexErrorToken :: LexerState -> Text -> (LexToken, LexerState)
lexErrorToken :: LexerState -> Text -> (LexToken, LexerState)
lexErrorToken LexerState
st Text
msg =
  let raw :: String
raw = Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
1 (LexerState -> String
lexerInput LexerState
st)
      rawTxt :: Text
rawTxt = if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
raw then Text
"<eof>" else String -> Text
T.pack String
raw
      st' :: LexerState
st' = if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
raw then LexerState
st else String -> LexerState -> LexerState
advanceChars String
raw LexerState
st
   in ( LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken LexerState
st LexerState
st' Text
rawTxt Text
msg,
        LexerState
st'
      )

mkErrorToken :: LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken :: LexerState -> LexerState -> Text -> Text -> LexToken
mkErrorToken LexerState
start LexerState
end Text
rawTxt Text
msg =
  LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
start LexerState
end Text
rawTxt (Text -> LexTokenKind
TkError Text
msg)

tryConsumeLineDirective :: LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeLineDirective :: LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeLineDirective LexerState
st
  | Bool -> Bool
not (LexerState -> Bool
lexerAtLineStart LexerState
st) = Maybe (Maybe LexToken, LexerState)
forall a. Maybe a
Nothing
  | Bool
otherwise =
      let (String
spaces, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t') (LexerState -> String
lexerInput LexerState
st)
       in case String
rest of
            Char
'#' : String
more ->
              let lineText :: String
lineText = Char
'#' Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
takeLineRemainder String
more
                  consumed :: String
consumed = String
spaces String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
lineText
               in case String -> Maybe DirectiveUpdate
parseHashLineDirective String
lineText of
                    Just DirectiveUpdate
update ->
                      (Maybe LexToken, LexerState) -> Maybe (Maybe LexToken, LexerState)
forall a. a -> Maybe a
Just (Maybe LexToken
forall a. Maybe a
Nothing, String -> DirectiveUpdate -> LexerState -> LexerState
applyDirectiveAdvance String
consumed DirectiveUpdate
update LexerState
st)
                    Maybe DirectiveUpdate
Nothing ->
                      let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st
                       in (Maybe LexToken, LexerState) -> Maybe (Maybe LexToken, LexerState)
forall a. a -> Maybe a
Just (LexToken -> Maybe LexToken
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
consumed) (Text -> LexTokenKind
TkError Text
"malformed line directive")), LexerState
st')
            String
_ -> Maybe (Maybe LexToken, LexerState)
forall a. Maybe a
Nothing

tryConsumeControlPragma :: LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeControlPragma :: LexerState -> Maybe (Maybe LexToken, LexerState)
tryConsumeControlPragma LexerState
st =
  case String -> Maybe (String, Either Text DirectiveUpdate)
parseControlPragma (LexerState -> String
lexerInput LexerState
st) of
    Just (String
consumed0, Right DirectiveUpdate
update0) ->
      let (String
consumed, DirectiveUpdate
update) =
            case DirectiveUpdate -> Maybe Int
directiveLine DirectiveUpdate
update0 of
              Just Int
lineNo ->
                case Int -> ShowS
forall a. Int -> [a] -> [a]
drop (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
consumed0) (LexerState -> String
lexerInput LexerState
st) of
                  Char
'\n' : String
_ ->
                    (String
consumed0 String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"\n", DirectiveUpdate
update0 {directiveLine = Just lineNo, directiveCol = Just 1})
                  String
_ -> (String
consumed0, DirectiveUpdate
update0)
              Maybe Int
Nothing -> (String
consumed0, DirectiveUpdate
update0)
       in (Maybe LexToken, LexerState) -> Maybe (Maybe LexToken, LexerState)
forall a. a -> Maybe a
Just (Maybe LexToken
forall a. Maybe a
Nothing, String -> DirectiveUpdate -> LexerState -> LexerState
applyDirectiveAdvance String
consumed DirectiveUpdate
update LexerState
st)
    Just (String
consumed, Left Text
msg) ->
      let st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st
       in (Maybe LexToken, LexerState) -> Maybe (Maybe LexToken, LexerState)
forall a. a -> Maybe a
Just (LexToken -> Maybe LexToken
forall a. a -> Maybe a
Just (LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
consumed) (Text -> LexTokenKind
TkError Text
msg)), LexerState
st')
    Maybe (String, Either Text DirectiveUpdate)
Nothing -> Maybe (Maybe LexToken, LexerState)
forall a. Maybe a
Nothing

applyDirectiveAdvance :: String -> DirectiveUpdate -> LexerState -> LexerState
applyDirectiveAdvance :: String -> DirectiveUpdate -> LexerState -> LexerState
applyDirectiveAdvance String
consumed DirectiveUpdate
update LexerState
st =
  let hasTrailingNewline :: Bool
hasTrailingNewline =
        case ShowS
forall a. [a] -> [a]
reverse String
consumed of
          Char
'\n' : String
_ -> Bool
True
          String
_ -> Bool
False
   in LexerState
st
        { lexerInput = drop (length consumed) (lexerInput st),
          lexerLine = maybe (lexerLine st) (max 1) (directiveLine update),
          lexerCol = maybe (lexerCol st) (max 1) (directiveCol update),
          lexerAtLineStart = hasTrailingNewline || (Just 1 == directiveCol update)
        }

consumeLineComment :: LexerState -> LexerState
consumeLineComment :: LexerState -> LexerState
consumeLineComment LexerState
st = String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st
  where
    consumed :: String
consumed = ShowS
takeCommentRemainder (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
2 (LexerState -> String
lexerInput LexerState
st))
    prefix :: String
prefix = String
"--"
    takeCommentRemainder :: ShowS
takeCommentRemainder String
xs =
      String
prefix String -> ShowS
forall a. Semigroup a => a -> a -> a
<> (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') String
xs

consumeUnknownPragma :: LexerState -> Maybe LexerState
consumeUnknownPragma :: LexerState -> Maybe LexerState
consumeUnknownPragma LexerState
st =
  case String -> String -> (String, String)
breakOnMarker String
"#-}" (LexerState -> String
lexerInput LexerState
st) of
    (String
_, String
"") -> Maybe LexerState
forall a. Maybe a
Nothing
    (String
body, String
marker) ->
      let consumed :: String
consumed = String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
3 String
marker
       in LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st)

consumeBlockComment :: LexerState -> Maybe LexerState
consumeBlockComment :: LexerState -> Maybe LexerState
consumeBlockComment LexerState
st =
  case Int -> String -> Maybe String
scanNestedBlockComment Int
1 (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
2 (LexerState -> String
lexerInput LexerState
st)) of
    Just String
consumedTail -> LexerState -> Maybe LexerState
forall a. a -> Maybe a
Just (String -> LexerState -> LexerState
advanceChars (String
"{-" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
consumedTail) LexerState
st)
    Maybe String
Nothing -> Maybe LexerState
forall a. Maybe a
Nothing

consumeBlockCommentOrError :: LexerState -> LexerState
consumeBlockCommentOrError :: LexerState -> LexerState
consumeBlockCommentOrError LexerState
st =
  case LexerState -> Maybe LexerState
consumeBlockComment LexerState
st of
    Just LexerState
st' -> LexerState
st'
    Maybe LexerState
Nothing ->
      let consumed :: String
consumed = LexerState -> String
lexerInput LexerState
st
          st' :: LexerState
st' = String -> LexerState -> LexerState
advanceChars String
consumed LexerState
st
          tok :: LexToken
tok = LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
st LexerState
st' (String -> Text
T.pack String
consumed) (Text -> LexTokenKind
TkError Text
"unterminated block comment")
       in LexerState
st' {lexerPending = lexerPending st' <> [tok]}

scanNestedBlockComment :: Int -> String -> Maybe String
scanNestedBlockComment :: Int -> String -> Maybe String
scanNestedBlockComment Int
depth String
chars
  | Int
depth Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = String -> Maybe String
forall a. a -> Maybe a
Just String
""
  | Bool
otherwise =
      case String
chars of
        [] -> Maybe String
forall a. Maybe a
Nothing
        Char
'{' : Char
'-' : String
rest -> (String
"{-" String -> ShowS
forall a. Semigroup a => a -> a -> a
<>) ShowS -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> String -> Maybe String
scanNestedBlockComment (Int
depth Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) String
rest
        Char
'-' : Char
'}' : String
rest ->
          if Int
depth Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
            then String -> Maybe String
forall a. a -> Maybe a
Just String
"-}"
            else (String
"-}" String -> ShowS
forall a. Semigroup a => a -> a -> a
<>) ShowS -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> String -> Maybe String
scanNestedBlockComment (Int
depth Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) String
rest
        Char
c : String
rest -> (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
:) ShowS -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> String -> Maybe String
scanNestedBlockComment Int
depth String
rest

tryConsumeKnownPragma :: LexerState -> Maybe ()
tryConsumeKnownPragma :: LexerState -> Maybe ()
tryConsumeKnownPragma LexerState
st =
  case LexerState -> Maybe (LexToken, LexerState)
lexKnownPragma LexerState
st of
    Just (LexToken, LexerState)
_ -> () -> Maybe ()
forall a. a -> Maybe a
Just ()
    Maybe (LexToken, LexerState)
Nothing -> Maybe ()
forall a. Maybe a
Nothing

parseLanguagePragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseLanguagePragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseLanguagePragma String
input = do
  (_, body, consumed) <- [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String
"LANGUAGE"] String
input
  let names = Text -> [ExtensionSetting]
parseLanguagePragmaNames (String -> Text
T.pack String
body)
      raw = String
"{-# LANGUAGE " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack (Text -> [Text] -> Text
T.intercalate Text
", " ((ExtensionSetting -> Text) -> [ExtensionSetting] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ExtensionSetting -> Text
extensionSettingName [ExtensionSetting]
names)) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" #-}"
  pure (length consumed, (T.pack raw, TkPragmaLanguage names))

parseOptionsPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseOptionsPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseOptionsPragma String
input = do
  (pragmaName, body, consumed) <- [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String
"OPTIONS_GHC", String
"OPTIONS"] String
input
  let settings = Text -> [ExtensionSetting]
parseOptionsPragmaSettings (String -> Text
T.pack String
body)
      raw = String
"{-# " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
pragmaName String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack (Text -> Text
T.strip (String -> Text
T.pack String
body)) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" #-}"
  pure (length consumed, (T.pack raw, TkPragmaLanguage settings))

parseWarningPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseWarningPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseWarningPragma String
input = do
  (_, body, consumed) <- [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String
"WARNING"] String
input
  let txt = Text -> Text
T.strip (String -> Text
T.pack String
body)
      (msg, rawMsg) =
        case body of
          Char
'"' : String
_ ->
            case ReadS String
forall a. Read a => ReadS a
reads String
body of
              [(String
decoded, String
"")] -> (String -> Text
T.pack String
decoded, String -> Text
T.pack String
body)
              [(String, String)]
_ -> (Text
txt, Text
txt)
          String
_ -> (Text
txt, Text
txt)
      raw = Text
"{-# WARNING " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
rawMsg Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" #-}"
  pure (length consumed, (raw, TkPragmaWarning msg))

parseDeprecatedPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseDeprecatedPragma :: String -> Maybe (Int, (Text, LexTokenKind))
parseDeprecatedPragma String
input = do
  (_, body, consumed) <- [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String
"DEPRECATED"] String
input
  let txt = Text -> Text
T.strip (String -> Text
T.pack String
body)
      (msg, rawMsg) =
        case body of
          Char
'"' : String
_ ->
            case ReadS String
forall a. Read a => ReadS a
reads String
body of
              [(String
decoded, String
"")] -> (String -> Text
T.pack String
decoded, String -> Text
T.pack String
body)
              [(String, String)]
_ -> (Text
txt, Text
txt)
          String
_ -> (Text
txt, Text
txt)
      raw = Text
"{-# DEPRECATED " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
rawMsg Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" #-}"
  pure (length consumed, (raw, TkPragmaDeprecated msg))

stripPragma :: String -> String -> Maybe String
stripPragma :: String -> String -> Maybe String
stripPragma String
name String
input = (\(String
_, String
body, String
_) -> String
body) ((String, String, String) -> String)
-> Maybe (String, String, String) -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String
name] String
input

stripNamedPragma :: [String] -> String -> Maybe (String, String, String)
stripNamedPragma :: [String] -> String -> Maybe (String, String, String)
stripNamedPragma [String]
names String
input = do
  rest0 <- String -> String -> Maybe String
forall a. Eq a => [a] -> [a] -> Maybe [a]
List.stripPrefix String
"{-#" String
input
  let rest1 = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
rest0
  name <- List.find (`List.isPrefixOf` rest1) names
  rest2 <- List.stripPrefix name rest1
  let rest3 = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
rest2
      (body, marker) = breakOnMarker "#-}" rest3
  guardPrefix "#-}" marker
  let consumedLen = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
input Int -> Int -> Int
forall a. Num a => a -> a -> a
- String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
3 String
marker)
  pure (name, trimRight body, take consumedLen input)

parseLanguagePragmaNames :: Text -> [ExtensionSetting]
parseLanguagePragmaNames :: Text -> [ExtensionSetting]
parseLanguagePragmaNames Text
body =
  (Text -> Maybe ExtensionSetting) -> [Text] -> [ExtensionSetting]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Text -> Maybe ExtensionSetting
parseExtensionSettingName (Text -> Maybe ExtensionSetting)
-> (Text -> Text) -> Text -> Maybe ExtensionSetting
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#')) (HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"," Text
body)

parseOptionsPragmaSettings :: Text -> [ExtensionSetting]
parseOptionsPragmaSettings :: Text -> [ExtensionSetting]
parseOptionsPragmaSettings Text
body = [Text] -> [ExtensionSetting]
go (Text -> [Text]
pragmaWords Text
body)
  where
    go :: [Text] -> [ExtensionSetting]
go [Text]
ws =
      case [Text]
ws of
        [] -> []
        Text
"-cpp" : [Text]
rest -> Extension -> ExtensionSetting
EnableExtension Extension
CPP ExtensionSetting -> [ExtensionSetting] -> [ExtensionSetting]
forall a. a -> [a] -> [a]
: [Text] -> [ExtensionSetting]
go [Text]
rest
        Text
"-fffi" : [Text]
rest -> Extension -> ExtensionSetting
EnableExtension Extension
ForeignFunctionInterface ExtensionSetting -> [ExtensionSetting] -> [ExtensionSetting]
forall a. a -> [a] -> [a]
: [Text] -> [ExtensionSetting]
go [Text]
rest
        Text
"-fglasgow-exts" : [Text]
rest -> [ExtensionSetting]
glasgowExtsSettings [ExtensionSetting] -> [ExtensionSetting] -> [ExtensionSetting]
forall a. Semigroup a => a -> a -> a
<> [Text] -> [ExtensionSetting]
go [Text]
rest
        Text
opt : [Text]
rest
          | Just Text
ext <- Text -> Text -> Maybe Text
T.stripPrefix Text
"-X" Text
opt,
            Bool -> Bool
not (Text -> Bool
T.null Text
ext) ->
              case Text -> Maybe ExtensionSetting
parseExtensionSettingName Text
ext of
                Just ExtensionSetting
setting -> ExtensionSetting
setting ExtensionSetting -> [ExtensionSetting] -> [ExtensionSetting]
forall a. a -> [a] -> [a]
: [Text] -> [ExtensionSetting]
go [Text]
rest
                Maybe ExtensionSetting
Nothing -> [Text] -> [ExtensionSetting]
go [Text]
rest
        Text
_ : [Text]
rest -> [Text] -> [ExtensionSetting]
go [Text]
rest

glasgowExtsSettings :: [ExtensionSetting]
glasgowExtsSettings :: [ExtensionSetting]
glasgowExtsSettings =
  (Extension -> ExtensionSetting)
-> [Extension] -> [ExtensionSetting]
forall a b. (a -> b) -> [a] -> [b]
map
    Extension -> ExtensionSetting
EnableExtension
    [ Extension
ConstrainedClassMethods,
      Extension
DeriveDataTypeable,
      Extension
DeriveFoldable,
      Extension
DeriveFunctor,
      Extension
DeriveGeneric,
      Extension
DeriveTraversable,
      Extension
EmptyDataDecls,
      Extension
ExistentialQuantification,
      Extension
ExplicitNamespaces,
      Extension
FlexibleContexts,
      Extension
FlexibleInstances,
      Extension
ForeignFunctionInterface,
      Extension
FunctionalDependencies,
      Extension
GeneralizedNewtypeDeriving,
      Extension
ImplicitParams,
      Extension
InterruptibleFFI,
      Extension
KindSignatures,
      Extension
LiberalTypeSynonyms,
      Extension
MagicHash,
      Extension
MultiParamTypeClasses,
      Extension
ParallelListComp,
      Extension
PatternGuards,
      Extension
PostfixOperators,
      Extension
RankNTypes,
      Extension
RecursiveDo,
      Extension
ScopedTypeVariables,
      Extension
StandaloneDeriving,
      Extension
TypeOperators,
      Extension
TypeSynonymInstances,
      Extension
UnboxedTuples,
      Extension
UnicodeSyntax,
      Extension
UnliftedFFITypes
    ]

pragmaWords :: Text -> [Text]
pragmaWords :: Text -> [Text]
pragmaWords Text
txt = [Text] -> String -> Maybe Char -> String -> [Text]
go [] [] Maybe Char
forall a. Maybe a
Nothing (Text -> String
T.unpack Text
txt)
  where
    go :: [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc String
current Maybe Char
quote String
chars =
      case String
chars of
        [] ->
          let acc' :: [Text]
acc' = [Text] -> String -> [Text]
pushCurrent [Text]
acc String
current
           in [Text] -> [Text]
forall a. [a] -> [a]
reverse [Text]
acc'
        Char
c : String
rest ->
          case Maybe Char
quote of
            Just Char
q
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
q -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc String
current Maybe Char
forall a. Maybe a
Nothing String
rest
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\' ->
                  case String
rest of
                    Char
escaped : String
rest' -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc (Char
escaped Char -> ShowS
forall a. a -> [a] -> [a]
: String
current) Maybe Char
quote String
rest'
                    [] -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc String
current Maybe Char
quote []
              | Bool
otherwise -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
current) Maybe Char
quote String
rest
            Maybe Char
Nothing
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'' -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc String
current (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) String
rest
              | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\' ->
                  case String
rest of
                    Char
escaped : String
rest' -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc (Char
escaped Char -> ShowS
forall a. a -> [a] -> [a]
: String
current) Maybe Char
forall a. Maybe a
Nothing String
rest'
                    [] -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc String
current Maybe Char
forall a. Maybe a
Nothing []
              | Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
' ', Char
'\n', Char
'\r', Char
'\t'] ->
                  let acc' :: [Text]
acc' = [Text] -> String -> [Text]
pushCurrent [Text]
acc String
current
                   in [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc' [] Maybe Char
forall a. Maybe a
Nothing String
rest
              | Bool
otherwise -> [Text] -> String -> Maybe Char -> String -> [Text]
go [Text]
acc (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
current) Maybe Char
forall a. Maybe a
Nothing String
rest

    pushCurrent :: [Text] -> String -> [Text]
pushCurrent [Text]
acc String
current =
      case ShowS
forall a. [a] -> [a]
reverse String
current of
        [] -> [Text]
acc
        String
token -> String -> Text
T.pack String
token Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc

parseHashLineDirective :: String -> Maybe DirectiveUpdate
parseHashLineDirective :: String -> Maybe DirectiveUpdate
parseHashLineDirective String
raw =
  let trimmed :: String
trimmed = ShowS
trimLeft (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
1 (ShowS
trimLeft String
raw))
      trimmed' :: String
trimmed' =
        if String
"line" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`List.isPrefixOf` String
trimmed
          then (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
4 String
trimmed)
          else String
trimmed
      (String
digits, String
_) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
trimmed'
   in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits
        then Maybe DirectiveUpdate
forall a. Maybe a
Nothing
        else DirectiveUpdate -> Maybe DirectiveUpdate
forall a. a -> Maybe a
Just DirectiveUpdate {directiveLine :: Maybe Int
directiveLine = Int -> Maybe Int
forall a. a -> Maybe a
Just (String -> Int
forall a. Read a => String -> a
read String
digits), directiveCol :: Maybe Int
directiveCol = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1}

parseControlPragma :: String -> Maybe (String, Either Text DirectiveUpdate)
parseControlPragma :: String -> Maybe (String, Either Text DirectiveUpdate)
parseControlPragma String
input
  | Just String
body <- String -> String -> Maybe String
stripPragma String
"LINE" String
input =
      let trimmed :: [String]
trimmed = String -> [String]
words String
body
       in case [String]
trimmed of
            String
lineNo : [String]
_
              | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
lineNo ->
                  (String, Either Text DirectiveUpdate)
-> Maybe (String, Either Text DirectiveUpdate)
forall a. a -> Maybe a
Just
                    ( String -> ShowS
fullPragmaConsumed String
"LINE" String
body,
                      DirectiveUpdate -> Either Text DirectiveUpdate
forall a b. b -> Either a b
Right DirectiveUpdate {directiveLine :: Maybe Int
directiveLine = Int -> Maybe Int
forall a. a -> Maybe a
Just (String -> Int
forall a. Read a => String -> a
read String
lineNo), directiveCol :: Maybe Int
directiveCol = Int -> Maybe Int
forall a. a -> Maybe a
Just Int
1}
                    )
            [String]
_ -> (String, Either Text DirectiveUpdate)
-> Maybe (String, Either Text DirectiveUpdate)
forall a. a -> Maybe a
Just (String -> ShowS
fullPragmaConsumed String
"LINE" String
body, Text -> Either Text DirectiveUpdate
forall a b. a -> Either a b
Left Text
"malformed LINE pragma")
  | Just String
body <- String -> String -> Maybe String
stripPragma String
"COLUMN" String
input =
      let trimmed :: [String]
trimmed = String -> [String]
words String
body
       in case [String]
trimmed of
            String
colNo : [String]
_
              | (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
colNo ->
                  (String, Either Text DirectiveUpdate)
-> Maybe (String, Either Text DirectiveUpdate)
forall a. a -> Maybe a
Just
                    ( String -> ShowS
fullPragmaConsumed String
"COLUMN" String
body,
                      DirectiveUpdate -> Either Text DirectiveUpdate
forall a b. b -> Either a b
Right DirectiveUpdate {directiveLine :: Maybe Int
directiveLine = Maybe Int
forall a. Maybe a
Nothing, directiveCol :: Maybe Int
directiveCol = Int -> Maybe Int
forall a. a -> Maybe a
Just (String -> Int
forall a. Read a => String -> a
read String
colNo)}
                    )
            [String]
_ -> (String, Either Text DirectiveUpdate)
-> Maybe (String, Either Text DirectiveUpdate)
forall a. a -> Maybe a
Just (String -> ShowS
fullPragmaConsumed String
"COLUMN" String
body, Text -> Either Text DirectiveUpdate
forall a b. a -> Either a b
Left Text
"malformed COLUMN pragma")
  | Bool
otherwise = Maybe (String, Either Text DirectiveUpdate)
forall a. Maybe a
Nothing

fullPragmaConsumed :: String -> String -> String
fullPragmaConsumed :: String -> ShowS
fullPragmaConsumed String
name String
body = String
"{-# " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
name String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> ShowS
trimRight String
body String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" #-}"

mkToken :: LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken :: LexerState -> LexerState -> Text -> LexTokenKind -> LexToken
mkToken LexerState
start LexerState
end Text
tokTxt LexTokenKind
kind =
  LexToken
    { lexTokenKind :: LexTokenKind
lexTokenKind = LexTokenKind
kind,
      lexTokenText :: Text
lexTokenText = Text
tokTxt,
      lexTokenSpan :: SourceSpan
lexTokenSpan = LexerState -> LexerState -> SourceSpan
mkSpan LexerState
start LexerState
end
    }

mkSpan :: LexerState -> LexerState -> SourceSpan
mkSpan :: LexerState -> LexerState -> SourceSpan
mkSpan LexerState
start LexerState
end =
  SourceSpan
    { sourceSpanStartLine :: Int
sourceSpanStartLine = LexerState -> Int
lexerLine LexerState
start,
      sourceSpanStartCol :: Int
sourceSpanStartCol = LexerState -> Int
lexerCol LexerState
start,
      sourceSpanEndLine :: Int
sourceSpanEndLine = LexerState -> Int
lexerLine LexerState
end,
      sourceSpanEndCol :: Int
sourceSpanEndCol = LexerState -> Int
lexerCol LexerState
end
    }

advanceChars :: String -> LexerState -> LexerState
advanceChars :: String -> LexerState -> LexerState
advanceChars String
chars LexerState
st = (LexerState -> Char -> LexerState)
-> LexerState -> String -> LexerState
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl LexerState -> Char -> LexerState
advanceOne LexerState
st String
chars
  where
    advanceOne :: LexerState -> Char -> LexerState
advanceOne LexerState
acc Char
ch =
      case Char
ch of
        Char
'\n' ->
          LexerState
acc
            { lexerInput = drop 1 (lexerInput acc),
              lexerLine = lexerLine acc + 1,
              lexerCol = 1,
              lexerAtLineStart = True
            }
        Char
_ ->
          LexerState
acc
            { lexerInput = drop 1 (lexerInput acc),
              lexerCol = lexerCol acc + 1,
              lexerAtLineStart = False
            }

consumeWhile :: (Char -> Bool) -> LexerState -> LexerState
consumeWhile :: (Char -> Bool) -> LexerState -> LexerState
consumeWhile Char -> Bool
f LexerState
st = String -> LexerState -> LexerState
advanceChars ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile Char -> Bool
f (LexerState -> String
lexerInput LexerState
st)) LexerState
st

-- | Take digits with optional underscores.
--
-- When @allowUnderscores@ is True (NumericUnderscores enabled):
--   - Underscores may appear between digits, including consecutive underscores
--   - Leading underscores are NOT allowed (the first character must be a digit)
--   - Trailing underscores cause lexing to stop (they're not consumed)
--
-- When @allowUnderscores@ is False:
--   - No underscores are accepted; only digits are consumed
takeDigitsWithUnderscores :: Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores :: Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigitChar String
chars =
  let (String
firstChunk, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigitChar String
chars
   in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
firstChunk
        then (String
"", String
chars)
        else
          if Bool
allowUnderscores
            then String -> String -> (String, String)
go String
firstChunk String
rest
            else (String
firstChunk, String
rest)
  where
    go :: String -> String -> (String, String)
go String
acc String
xs =
      case String
xs of
        Char
'_' : String
rest ->
          -- Consume consecutive underscores
          let (String
underscores, String
rest') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') String
rest
              allUnderscores :: String
allUnderscores = Char
'_' Char -> ShowS
forall a. a -> [a] -> [a]
: String
underscores
              (String
chunk, String
rest'') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigitChar String
rest'
           in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
chunk
                then (String
acc, String
xs) -- Trailing underscore(s), stop here
                else String -> String -> (String, String)
go (String
acc String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
allUnderscores String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
chunk) String
rest''
        String
_ -> (String
acc, String
xs)

-- | Take digits with optional leading underscores after a base prefix.
--
-- When @allowUnderscores@ is True (NumericUnderscores enabled):
--   - Leading underscores are allowed (e.g., 0x_ff, 0x__ff)
--   - Underscores may appear between digits, including consecutive underscores
--   - Trailing underscores cause lexing to stop
--
-- When @allowUnderscores@ is False:
--   - No underscores are accepted; only digits are consumed
takeDigitsWithLeadingUnderscores :: Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithLeadingUnderscores :: Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithLeadingUnderscores Bool
allowUnderscores Char -> Bool
isDigitChar String
chars
  | Bool -> Bool
not Bool
allowUnderscores =
      let (String
digits, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigitChar String
chars
       in (String
digits, String
rest)
  | Bool
otherwise =
      -- With NumericUnderscores, leading underscores are allowed
      let (String
leadingUnderscores, String
rest0) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') String
chars
          (String
firstChunk, String
rest1) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigitChar String
rest0
       in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
firstChunk
            then (String
"", String
chars) -- Must have at least one digit somewhere
            else String -> String -> (String, String)
go (String
leadingUnderscores String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
firstChunk) String
rest1
  where
    go :: String -> String -> (String, String)
go String
acc String
xs =
      case String
xs of
        Char
'_' : String
rest ->
          let (String
underscores, String
rest') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') String
rest
              allUnderscores :: String
allUnderscores = Char
'_' Char -> ShowS
forall a. a -> [a] -> [a]
: String
underscores
              (String
chunk, String
rest'') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigitChar String
rest'
           in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
chunk
                then (String
acc, String
xs)
                else String -> String -> (String, String)
go (String
acc String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
allUnderscores String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
chunk) String
rest''
        String
_ -> (String
acc, String
xs)

-- | Parse the exponent part of a float literal (e.g., "e+10", "E-5").
--
-- When @allowUnderscores@ is True (NumericUnderscores enabled):
--   - Underscores may appear before the exponent marker (e.g., "1_e+23")
--   - This is handled by consuming trailing underscores from the mantissa into the exponent
--   - Underscores may also appear within the exponent digits
--
-- When @allowUnderscores@ is False:
--   - Only digits are accepted in the exponent
takeExponent :: Bool -> String -> (String, String)
takeExponent :: Bool -> String -> (String, String)
takeExponent Bool
allowUnderscores String
chars =
  case String
chars of
    -- Handle leading underscores before 'e'/'E' when NumericUnderscores enabled
    Char
'_' : String
rest
      | Bool
allowUnderscores ->
          let (String
underscores, String
rest') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') String
rest
              allUnderscores :: String
allUnderscores = Char
'_' Char -> ShowS
forall a. a -> [a] -> [a]
: String
underscores
           in case String
rest' of
                Char
marker : String
rest2
                  | Char
marker Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"eE" :: String) ->
                      let (String
signPart, String
rest3) =
                            case String
rest2 of
                              Char
sign : String
more | Char
sign Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"+-" :: String) -> ([Char
sign], String
more)
                              String
_ -> (String
"", String
rest2)
                          (String
digits, String
rest4) = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigit String
rest3
                       in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits
                            then (String
"", String
chars)
                            else (String
allUnderscores String -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char
marker] String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
signPart String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
digits, String
rest4)
                String
_ -> (String
"", String
chars)
    Char
marker : String
rest
      | Char
marker Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"eE" :: String) ->
          let (String
signPart, String
rest1) =
                case String
rest of
                  Char
sign : String
more | Char
sign Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"+-" :: String) -> ([Char
sign], String
more)
                  String
_ -> (String
"", String
rest)
              (String
digits, String
rest2) = Bool -> (Char -> Bool) -> String -> (String, String)
takeDigitsWithUnderscores Bool
allowUnderscores Char -> Bool
isDigit String
rest1
           in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits then (String
"", String
chars) else (Char
marker Char -> ShowS
forall a. a -> [a] -> [a]
: String
signPart String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
digits, String
rest2)
    String
_ -> (String
"", String
chars)

takeHexExponent :: String -> Maybe String
takeHexExponent :: String -> Maybe String
takeHexExponent String
chars =
  case String
chars of
    Char
marker : String
rest
      | Char
marker Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"pP" :: String) ->
          let (String
signPart, String
rest1) =
                case String
rest of
                  Char
sign : String
more | Char
sign Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"+-" :: String) -> ([Char
sign], String
more)
                  String
_ -> (String
"", String
rest)
              (String
digits, String
_) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
rest1
           in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits then Maybe String
forall a. Maybe a
Nothing else String -> Maybe String
forall a. a -> Maybe a
Just (Char
marker Char -> ShowS
forall a. a -> [a] -> [a]
: String
signPart String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
digits)
    String
_ -> Maybe String
forall a. Maybe a
Nothing

scanQuoted :: Char -> String -> Either String (String, String)
scanQuoted :: Char -> String -> Either String (String, String)
scanQuoted Char
endCh = String -> String -> Either String (String, String)
go []
  where
    go :: String -> String -> Either String (String, String)
go String
acc String
chars =
      case String
chars of
        [] -> String -> Either String (String, String)
forall a b. a -> Either a b
Left (ShowS
forall a. [a] -> [a]
reverse String
acc)
        Char
c : String
rest
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
endCh -> (String, String) -> Either String (String, String)
forall a b. b -> Either a b
Right (ShowS
forall a. [a] -> [a]
reverse String
acc, String
rest)
          | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\\' ->
              case String
rest of
                Char
escaped : String
rest' -> String -> String -> Either String (String, String)
go (Char
escaped Char -> ShowS
forall a. a -> [a] -> [a]
: Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
acc) String
rest'
                [] -> String -> Either String (String, String)
forall a b. a -> Either a b
Left (ShowS
forall a. [a] -> [a]
reverse (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
acc))
          | Bool
otherwise -> String -> String -> Either String (String, String)
go (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
acc) String
rest

takeQuoter :: String -> (String, String)
takeQuoter :: String -> (String, String)
takeQuoter String
chars =
  case String
chars of
    Char
c : String
rest
      | Char -> Bool
isIdentStart Char
c ->
          let (String
tailChars, String
rest0) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdentTailOrStart String
rest
           in String -> String -> (String, String)
go (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
tailChars) String
rest0
    String
_ -> (String
"", String
chars)
  where
    go :: String -> String -> (String, String)
go String
acc String
chars' =
      case String
chars' of
        Char
'.' : Char
c : String
rest
          | Char -> Bool
isIdentStart Char
c ->
              let (String
tailChars, String
rest0) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdentTailOrStart String
rest
               in String -> String -> (String, String)
go (String
acc String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
"." String -> ShowS
forall a. Semigroup a => a -> a -> a
<> [Char
c] String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
tailChars) String
rest0
        String
_ -> (String
acc, String
chars')

breakOnMarker :: String -> String -> (String, String)
breakOnMarker :: String -> String -> (String, String)
breakOnMarker String
marker = String -> String -> (String, String)
go []
  where
    go :: String -> String -> (String, String)
go String
acc String
chars
      | String
marker String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`List.isPrefixOf` String
chars = (ShowS
forall a. [a] -> [a]
reverse String
acc, String
chars)
      | Bool
otherwise =
          case String
chars of
            [] -> (ShowS
forall a. [a] -> [a]
reverse String
acc, [])
            Char
c : String
rest -> String -> String -> (String, String)
go (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
acc) String
rest

takeLineRemainder :: String -> String
takeLineRemainder :: ShowS
takeLineRemainder String
chars =
  let (String
prefix, String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') String
chars
   in String
prefix String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
1 String
rest

trimLeft :: String -> String
trimLeft :: ShowS
trimLeft = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace

trimRight :: String -> String
trimRight :: ShowS
trimRight = ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
reverse

guardPrefix :: (Eq a) => [a] -> [a] -> Maybe ()
guardPrefix :: forall a. Eq a => [a] -> [a] -> Maybe ()
guardPrefix [a]
prefix [a]
actual =
  if [a]
prefix [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`List.isPrefixOf` [a]
actual
    then () -> Maybe ()
forall a. a -> Maybe a
Just ()
    else Maybe ()
forall a. Maybe a
Nothing

readMaybeChar :: String -> Maybe Char
readMaybeChar :: String -> Maybe Char
readMaybeChar String
raw =
  case ReadS Char
forall a. Read a => ReadS a
reads String
raw of
    [(Char
c, String
"")] -> Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c
    [(Char, String)]
_ -> Maybe Char
forall a. Maybe a
Nothing

readHexLiteral :: Text -> Integer
readHexLiteral :: Text -> Integer
readHexLiteral Text
txt =
  case ReadS Integer
forall a. (Eq a, Num a) => ReadS a
readHex (Text -> String
T.unpack ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') (Int -> Text -> Text
T.drop Int
2 Text
txt))) of
    [(Integer
n, String
"")] -> Integer
n
    [(Integer, String)]
_ -> Integer
0

readOctLiteral :: Text -> Integer
readOctLiteral :: Text -> Integer
readOctLiteral Text
txt =
  case ReadS Integer
forall a. (Eq a, Num a) => ReadS a
readOct (Text -> String
T.unpack ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') (Int -> Text -> Text
T.drop Int
2 Text
txt))) of
    [(Integer
n, String
"")] -> Integer
n
    [(Integer, String)]
_ -> Integer
0

readBinLiteral :: Text -> Integer
readBinLiteral :: Text -> Integer
readBinLiteral Text
txt =
  case Integer -> (Char -> Bool) -> (Char -> Int) -> ReadS Integer
forall a. Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadS a
readInt Integer
2 (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
"01" :: String)) Char -> Int
digitToInt (Text -> String
T.unpack ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'_') (Int -> Text -> Text
T.drop Int
2 Text
txt))) of
    [(Integer
n, String
"")] -> Integer
n
    [(Integer, String)]
_ -> Integer
0

parseHexFloatLiteral :: String -> String -> String -> Double
parseHexFloatLiteral :: String -> String -> String -> Double
parseHexFloatLiteral String
intDigits String
fracDigits String
expo =
  (String -> Double
parseHexDigits String
intDigits Double -> Double -> Double
forall a. Num a => a -> a -> a
+ String -> Double
parseHexFraction String
fracDigits) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
2 Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ String -> Int
exponentValue String
expo)

parseHexDigits :: String -> Double
parseHexDigits :: String -> Double
parseHexDigits = (Double -> Char -> Double) -> Double -> String -> Double
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Double
acc Char
d -> Double
acc Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
16 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
d)) Double
0

parseHexFraction :: String -> Double
parseHexFraction :: String -> Double
parseHexFraction String
ds =
  [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
d) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
16 Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
i) | (Char
d, Int
i) <- String -> [Int] -> [(Char, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
ds [Int
1 :: Int ..]]

exponentValue :: String -> Int
exponentValue :: String -> Int
exponentValue String
expo =
  case String
expo of
    Char
_ : Char
'-' : String
ds -> Int -> Int
forall a. Num a => a -> a
negate (String -> Int
forall a. Read a => String -> a
read String
ds)
    Char
_ : Char
'+' : String
ds -> String -> Int
forall a. Read a => String -> a
read String
ds
    Char
_ : String
ds -> String -> Int
forall a. Read a => String -> a
read String
ds
    String
_ -> Int
0

isIdentStart :: Char -> Bool
isIdentStart :: Char -> Bool
isIdentStart Char
c = Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'

isIdentTail :: Char -> Bool
isIdentTail :: Char -> Bool
isIdentTail Char
c = Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''

isSymbolicOpChar :: Char -> Bool
isSymbolicOpChar :: Char -> Bool
isSymbolicOpChar Char
c = Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
":!#$%&*+./<=>?@\\^|-~" :: String) Bool -> Bool -> Bool
|| Char -> Bool
isUnicodeSymbol Char
c

-- | Unicode symbols that may be used with UnicodeSyntax extension.
-- These are: ∷ ⇒ → ← ∀ ★ ⤙ ⤚ ⤛ ⤜ ⦇ ⦈ ⟦ ⟧ ⊸
isUnicodeSymbol :: Char -> Bool
isUnicodeSymbol :: Char -> Bool
isUnicodeSymbol Char
c =
  Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'∷' -- U+2237 PROPORTION (for ::)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⇒' -- U+21D2 RIGHTWARDS DOUBLE ARROW (for =>)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'→' -- U+2192 RIGHTWARDS ARROW (for ->)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'←' -- U+2190 LEFTWARDS ARROW (for <-)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'∀' -- U+2200 FOR ALL (for forall)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'★' -- U+2605 BLACK STAR (for *)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⤙' -- U+2919 LEFTWARDS ARROW-TAIL (for -<)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⤚' -- U+291A RIGHTWARDS ARROW-TAIL (for >-)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⤛' -- U+291B LEFTWARDS DOUBLE ARROW-TAIL (for -<<)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⤜' -- U+291C RIGHTWARDS DOUBLE ARROW-TAIL (for >>-)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⦇' -- U+2987 Z NOTATION LEFT IMAGE BRACKET (for (|)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⦈' -- U+2988 Z NOTATION RIGHT IMAGE BRACKET (for |))
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⟦' -- U+27E6 MATHEMATICAL LEFT WHITE SQUARE BRACKET (for [|)
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⟧' -- U+27E7 MATHEMATICAL RIGHT WHITE SQUARE BRACKET (for |])
    Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'⊸' -- U+22B8 MULTIMAP (for %1-> with LinearTypes)

-- | Check if the remainder after '--' should start a line comment.
-- Per Haskell Report: '--' starts a comment only if the entire symbol sequence
-- consists solely of dashes, or is not followed by any symbol character.
-- E.g., '-- foo' is a comment, '---' is a comment, but '-->' is an operator.
isLineComment :: String -> Bool
isLineComment :: String -> Bool
isLineComment String
rest =
  case String
rest of
    [] -> Bool
True -- Just '--' followed by nothing or whitespace
    Char
c : String
_
      | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' -> String -> Bool
isLineComment ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-') String
rest) -- More dashes, keep checking
      | Char -> Bool
isSymbolicOpChar Char
c -> Bool
False -- Non-dash symbol char means it's an operator
      | Bool
otherwise -> Bool
True -- Non-symbol char means comment

isIdentTailOrStart :: Char -> Bool
isIdentTailOrStart :: Char -> Bool
isIdentTailOrStart Char
c = Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''

isReservedIdentifier :: Text -> Bool
isReservedIdentifier :: Text -> Bool
isReservedIdentifier = Maybe LexTokenKind -> Bool
forall a. Maybe a -> Bool
isJust (Maybe LexTokenKind -> Bool)
-> (Text -> Maybe LexTokenKind) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe LexTokenKind
keywordTokenKind

keywordTokenKind :: Text -> Maybe LexTokenKind
keywordTokenKind :: Text -> Maybe LexTokenKind
keywordTokenKind Text
txt = case Text
txt of
  Text
"case" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordCase
  Text
"class" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordClass
  Text
"data" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordData
  Text
"default" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordDefault
  Text
"deriving" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordDeriving
  Text
"do" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordDo
  Text
"else" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordElse
  Text
"foreign" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordForeign
  Text
"if" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordIf
  Text
"import" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordImport
  Text
"in" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordIn
  Text
"infix" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordInfix
  Text
"infixl" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordInfixl
  Text
"infixr" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordInfixr
  Text
"instance" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordInstance
  Text
"let" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordLet
  Text
"module" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordModule
  Text
"newtype" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordNewtype
  Text
"of" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordOf
  Text
"then" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordThen
  Text
"type" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordType
  Text
"where" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordWhere
  Text
"_" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordUnderscore
  -- Context-sensitive keywords (not strictly reserved per Report)
  Text
"qualified" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordQualified
  Text
"as" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordAs
  Text
"hiding" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkKeywordHiding
  Text
_ -> Maybe LexTokenKind
forall a. Maybe a
Nothing

-- | Classify reserved operators per Haskell Report Section 2.4.
reservedOpTokenKind :: Text -> Maybe LexTokenKind
reservedOpTokenKind :: Text -> Maybe LexTokenKind
reservedOpTokenKind Text
txt = case Text
txt of
  Text
".." -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedDotDot
  Text
":" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedColon
  Text
"::" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedDoubleColon
  Text
"=" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedEquals
  Text
"\\" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedBackslash
  Text
"|" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedPipe
  Text
"<-" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedLeftArrow
  Text
"->" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedRightArrow
  Text
"@" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedAt
  -- Note: ~ is NOT reserved; it uses whitespace-sensitive lexing (GHC proposal 0229)
  Text
"=>" -> LexTokenKind -> Maybe LexTokenKind
forall a. a -> Maybe a
Just LexTokenKind
TkReservedDoubleArrow
  Text
_ -> Maybe LexTokenKind
forall a. Maybe a
Nothing