never executed always true always false
1 {-# LANGUAGE DeriveAnyClass #-}
2 {-# LANGUAGE DeriveGeneric #-}
3 {-# LANGUAGE OverloadedStrings #-}
4
5 -- |
6 -- Module : Aihc.Cpp
7 -- Description : Pure Haskell C preprocessor for Haskell source files
8 -- License : Unlicense
9 --
10 -- This module provides a C preprocessor implementation designed for
11 -- preprocessing Haskell source files that use CPP extensions.
12 --
13 -- The main entry point is 'preprocess', which takes a 'Config' and
14 -- source text, returning a 'Step' that either completes with a 'Result'
15 -- or requests an include file to be resolved.
16 module Aihc.Cpp
17 ( -- * Preprocessing
18 preprocess,
19
20 -- * Configuration
21 Config (..),
22 defaultConfig,
23
24 -- * Results
25 Step (..),
26 Result (..),
27
28 -- * Include handling
29 IncludeRequest (..),
30 IncludeKind (..),
31
32 -- * Diagnostics
33 Diagnostic (..),
34 Severity (..),
35 )
36 where
37
38 import Control.DeepSeq (NFData)
39 import Data.Char (isAlphaNum, isDigit, isLetter, isSpace)
40 import Data.Map.Strict (Map)
41 import qualified Data.Map.Strict as M
42 import Data.Text (Text)
43 import qualified Data.Text as T
44 import qualified Data.Text.Lazy as TL
45 import qualified Data.Text.Lazy.Builder as TB
46 import qualified Data.Text.Read as TR
47 import GHC.Generics (Generic)
48 import System.FilePath (takeDirectory, (</>))
49
50 -- $setup
51 -- >>> :set -XOverloadedStrings
52 -- >>> import qualified Data.Map.Strict as M
53 -- >>> import qualified Data.Text as T
54 -- >>> import qualified Data.Text.IO as T
55
56 -- | Configuration for the C preprocessor.
57 data Config = Config
58 { -- | The name of the input file, used in @#line@ directives and
59 -- @__FILE__@ expansion.
60 configInputFile :: FilePath,
61 -- | User-defined macros. These are expanded as object-like macros.
62 -- Note that the values should include any necessary quoting. For
63 -- example, to define a string macro, use @"\"value\""@.
64 configMacros :: !(Map Text Text)
65 }
66
67 data MacroDef
68 = ObjectMacro !Text
69 | FunctionMacro ![Text] !Text
70 deriving (Eq, Show)
71
72 -- | Default configuration with sensible defaults.
73 --
74 -- * 'configInputFile' is set to @\"\<input\>\"@
75 -- * 'configMacros' includes @__DATE__@ and @__TIME__@ set to the Unix epoch
76 --
77 -- To customize the date and time macros:
78 --
79 -- >>> import qualified Data.Map.Strict as M
80 -- >>> let cfg = defaultConfig { configMacros = M.fromList [("__DATE__", "\"Mar 15 2026\""), ("__TIME__", "\"14:30:00\"")] }
81 -- >>> configMacros cfg
82 -- fromList [("__DATE__","\"Mar 15 2026\""),("__TIME__","\"14:30:00\"")]
83 --
84 -- To add additional macros while keeping the defaults:
85 --
86 -- >>> import qualified Data.Map.Strict as M
87 -- >>> let cfg = defaultConfig { configMacros = M.insert "VERSION" "42" (configMacros defaultConfig) }
88 -- >>> M.lookup "VERSION" (configMacros cfg)
89 -- Just "42"
90 defaultConfig :: Config
91 defaultConfig =
92 Config
93 { configInputFile = "<input>",
94 configMacros =
95 M.fromList
96 [ ("__DATE__", "\"Jan 1 1970\""),
97 ("__TIME__", "\"00:00:00\"")
98 ]
99 }
100
101 -- | The kind of @#include@ directive.
102 data IncludeKind = IncludeLocal | IncludeSystem deriving (Eq, Show, Generic, NFData)
103
104 -- | Information about a pending @#include@ that needs to be resolved.
105 data IncludeRequest = IncludeRequest
106 { -- | The path specified in the include directive.
107 includePath :: !FilePath,
108 -- | Whether this is a local (@\"...\"@) or system (@\<...\>@) include.
109 includeKind :: !IncludeKind,
110 -- | The file that contains the @#include@ directive.
111 includeFrom :: !FilePath,
112 -- | The line number of the @#include@ directive.
113 includeLine :: !Int
114 }
115 deriving (Eq, Show, Generic, NFData)
116
117 -- | Severity level for diagnostics.
118 data Severity = Warning | Error deriving (Eq, Show, Generic, NFData)
119
120 -- | A diagnostic message emitted during preprocessing.
121 data Diagnostic = Diagnostic
122 { -- | The severity of the diagnostic.
123 diagSeverity :: !Severity,
124 -- | The diagnostic message text.
125 diagMessage :: !Text,
126 -- | The file where the diagnostic occurred.
127 diagFile :: !FilePath,
128 -- | The line number where the diagnostic occurred.
129 diagLine :: !Int
130 }
131 deriving (Eq, Show, Generic, NFData)
132
133 -- | The result of preprocessing.
134 data Result = Result
135 { -- | The preprocessed output text.
136 resultOutput :: !Text,
137 -- | Any diagnostics (warnings or errors) emitted during preprocessing.
138 resultDiagnostics :: ![Diagnostic]
139 }
140 deriving (Eq, Show, Generic, NFData)
141
142 -- | A step in the preprocessing process. Either preprocessing is complete
143 -- ('Done') or an @#include@ directive needs to be resolved ('NeedInclude').
144 data Step
145 = -- | Preprocessing is complete.
146 Done !Result
147 | -- | An @#include@ directive was encountered. The caller must provide
148 -- the contents of the included file (or 'Nothing' if not found),
149 -- and preprocessing will continue.
150 NeedInclude !IncludeRequest !(Maybe Text -> Step)
151
152 -- | Preprocess C preprocessor directives in the input text.
153 --
154 -- This function handles:
155 --
156 -- * Macro definitions (@#define@) and expansion
157 -- * Conditional compilation (@#if@, @#ifdef@, @#ifndef@, @#elif@, @#else@, @#endif@)
158 -- * File inclusion (@#include@)
159 -- * Diagnostics (@#warning@, @#error@)
160 -- * Line control (@#line@)
161 -- * Predefined macros (@__FILE__@, @__LINE__@, @__DATE__@, @__TIME__@)
162 --
163 -- === Macro expansion
164 --
165 -- Object-like macros are expanded in the output:
166 --
167 -- >>> let Done r = preprocess defaultConfig "#define FOO 42\nThe answer is FOO"
168 -- >>> T.putStr (resultOutput r)
169 -- #line 1 "<input>"
170 -- <BLANKLINE>
171 -- The answer is 42
172 --
173 -- Function-like macros are also supported:
174 --
175 -- >>> let Done r = preprocess defaultConfig "#define MAX(a,b) ((a) > (b) ? (a) : (b))\nMAX(3, 5)"
176 -- >>> T.putStr (resultOutput r)
177 -- #line 1 "<input>"
178 -- <BLANKLINE>
179 -- ((3) > (5) ? (3) : (5))
180 --
181 -- === Conditional compilation
182 --
183 -- Conditional directives control which sections of code are included:
184 --
185 -- >>> :{
186 -- let Done r = preprocess defaultConfig
187 -- "#define DEBUG 1\n#if DEBUG\ndebug mode\n#else\nrelease mode\n#endif"
188 -- in T.putStr (resultOutput r)
189 -- :}
190 -- #line 1 "<input>"
191 -- <BLANKLINE>
192 -- <BLANKLINE>
193 -- debug mode
194 -- <BLANKLINE>
195 -- <BLANKLINE>
196 -- <BLANKLINE>
197 --
198 -- === Include handling
199 --
200 -- When an @#include@ directive is encountered, 'preprocess' returns a
201 -- 'NeedInclude' step. The caller must provide the contents of the included
202 -- file:
203 --
204 -- >>> :{
205 -- let NeedInclude req k = preprocess defaultConfig "#include \"header.h\"\nmain code"
206 -- Done r = k (Just "-- header content")
207 -- in T.putStr (resultOutput r)
208 -- :}
209 -- #line 1 "<input>"
210 -- #line 1 "./header.h"
211 -- -- header content
212 -- #line 2 "<input>"
213 -- main code
214 --
215 -- If the include file is not found, pass 'Nothing' to emit an error:
216 --
217 -- >>> :{
218 -- let NeedInclude _ k = preprocess defaultConfig "#include \"missing.h\""
219 -- Done r = k Nothing
220 -- in do
221 -- T.putStr (resultOutput r)
222 -- mapM_ print (resultDiagnostics r)
223 -- :}
224 -- #line 1 "<input>"
225 -- Diagnostic {diagSeverity = Error, diagMessage = "missing include: missing.h", diagFile = "<input>", diagLine = 1}
226 --
227 -- === Diagnostics
228 --
229 -- The @#warning@ directive emits a warning:
230 --
231 -- >>> :{
232 -- let Done r = preprocess defaultConfig "#warning This is a warning"
233 -- in do
234 -- T.putStr (resultOutput r)
235 -- mapM_ print (resultDiagnostics r)
236 -- :}
237 -- #line 1 "<input>"
238 -- <BLANKLINE>
239 -- Diagnostic {diagSeverity = Warning, diagMessage = "This is a warning", diagFile = "<input>", diagLine = 1}
240 --
241 -- The @#error@ directive emits an error and stops preprocessing:
242 --
243 -- >>> :{
244 -- let Done r = preprocess defaultConfig "#error Build failed\nthis line is not processed"
245 -- in do
246 -- T.putStr (resultOutput r)
247 -- mapM_ print (resultDiagnostics r)
248 -- :}
249 -- #line 1 "<input>"
250 -- <BLANKLINE>
251 -- Diagnostic {diagSeverity = Error, diagMessage = "Build failed", diagFile = "<input>", diagLine = 1}
252 preprocess :: Config -> Text -> Step
253 preprocess cfg input =
254 processFile (configInputFile cfg) (joinMultiline 1 (T.lines input)) [] initialState finish
255 where
256 initialState =
257 let st0 = emitLine (linePragma 1 (configInputFile cfg)) (emptyState (configInputFile cfg))
258 in st0
259 { stMacros = M.map ObjectMacro (configMacros cfg)
260 }
261 finish st =
262 let out = T.intercalate "\n" (reverse (stOutputRev st))
263 outWithTrailingNewline =
264 if T.null out
265 then out
266 else out <> "\n"
267 in Done
268 Result
269 { resultOutput = outWithTrailingNewline,
270 resultDiagnostics = reverse (stDiagnosticsRev st)
271 }
272
273 joinMultiline :: Int -> [Text] -> [(Int, Int, Text)]
274 joinMultiline _ [] = []
275 joinMultiline n (l : ls)
276 | "\\" `T.isSuffixOf` l && "#" `T.isPrefixOf` T.stripStart l =
277 let (content, rest, extraLines) = pull (T.init l) ls
278 spanLen = extraLines + 1
279 in (n, spanLen, content) : joinMultiline (n + spanLen) rest
280 | otherwise = (n, 1, l) : joinMultiline (n + 1) ls
281 where
282 pull acc [] = (acc, [], 0)
283 pull acc (x : xs)
284 | "\\" `T.isSuffixOf` x =
285 let (res, r, c) = pull (acc <> T.init x) xs
286 in (res, r, c + 1)
287 | otherwise = (acc <> x, xs, 1)
288
289 splitLines :: Text -> [Text]
290 splitLines txt
291 | T.null txt = []
292 | otherwise = T.splitOn "\n" txt
293
294 data EngineState = EngineState
295 { stMacros :: !(Map Text MacroDef),
296 stOutputRev :: ![Text],
297 stDiagnosticsRev :: ![Diagnostic],
298 stSkippingDanglingElse :: !Bool,
299 stHsBlockCommentDepth :: !Int,
300 stCBlockCommentDepth :: !Int,
301 stCurrentFile :: !FilePath,
302 stCurrentLine :: !Int
303 }
304
305 emptyState :: FilePath -> EngineState
306 emptyState filePath =
307 EngineState
308 { stMacros = M.empty,
309 stOutputRev = [],
310 stDiagnosticsRev = [],
311 stSkippingDanglingElse = False,
312 stHsBlockCommentDepth = 0,
313 stCBlockCommentDepth = 0,
314 stCurrentFile = filePath,
315 stCurrentLine = 1
316 }
317
318 data CondFrame = CondFrame
319 { frameOuterActive :: !Bool,
320 frameConditionTrue :: !Bool,
321 frameInElse :: !Bool,
322 frameCurrentActive :: !Bool
323 }
324
325 currentActive :: [CondFrame] -> Bool
326 currentActive [] = True
327 currentActive (f : _) = frameCurrentActive f
328
329 type Continuation = EngineState -> Step
330
331 data LineContext = LineContext
332 { lcFilePath :: !FilePath,
333 lcLineNo :: !Int,
334 lcLineSpan :: !Int,
335 lcNextLineNo :: !Int,
336 lcRestLines :: ![(Int, Int, Text)],
337 lcStack :: ![CondFrame],
338 lcContinue :: EngineState -> Step,
339 lcContinueWith :: [CondFrame] -> EngineState -> Step,
340 lcDone :: Continuation
341 }
342
343 processFile :: FilePath -> [(Int, Int, Text)] -> [CondFrame] -> EngineState -> Continuation -> Step
344 processFile _ [] _ st k = k st
345 processFile filePath ((lineNo, lineSpan, line) : restLines) stack st k =
346 let lineScan = scanLine (stHsBlockCommentDepth st) (stCBlockCommentDepth st) line
347 startsInBlockComment = stHsBlockCommentDepth st > 0 || stCBlockCommentDepth st > 0
348 parsedDirective =
349 if startsInBlockComment
350 then Nothing
351 else parseDirective line
352 nextLineNo = case restLines of
353 (n, _, _) : _ -> n
354 [] -> lineNo + lineSpan
355 advanceLineState st' =
356 st'
357 { stCurrentLine = nextLineNo,
358 stHsBlockCommentDepth = lineScanFinalHsDepth lineScan,
359 stCBlockCommentDepth = lineScanFinalCDepth lineScan
360 }
361 continue st' = processFile filePath restLines stack (advanceLineState st') k
362 continueWith stack' st' = processFile filePath restLines stack' (advanceLineState st') k
363 ctx =
364 LineContext
365 { lcFilePath = filePath,
366 lcLineNo = lineNo,
367 lcLineSpan = lineSpan,
368 lcNextLineNo = nextLineNo,
369 lcRestLines = restLines,
370 lcStack = stack,
371 lcContinue = continue,
372 lcContinueWith = continueWith,
373 lcDone = k
374 }
375 in if stSkippingDanglingElse st
376 then recoverDanglingElse ctx parsedDirective st
377 else case parsedDirective of
378 Nothing ->
379 if currentActive stack
380 then continue (emitLine (expandLineBySpan st (lineScanSpans lineScan)) st)
381 else continue (emitBlankLines lineSpan st)
382 Just directive ->
383 handleDirective ctx st directive
384
385 recoverDanglingElse :: LineContext -> Maybe Directive -> EngineState -> Step
386 recoverDanglingElse ctx parsedDirective st =
387 case parsedDirective of
388 Just DirEndIf ->
389 lcContinue
390 ctx
391 ( addDiag
392 Warning
393 "unmatched #endif"
394 (lcFilePath ctx)
395 (lcLineNo ctx)
396 (st {stSkippingDanglingElse = False})
397 )
398 Just _ ->
399 lcContinue ctx st
400 Nothing ->
401 lcContinue ctx (emitDirectiveBlank ctx st)
402
403 handleDirective :: LineContext -> EngineState -> Directive -> Step
404 handleDirective ctx st directive =
405 case directive of
406 DirDefineObject name value ->
407 mutateMacrosWhenActive ctx st (M.insert name (ObjectMacro value))
408 DirDefineFunction name params body ->
409 mutateMacrosWhenActive ctx st (M.insert name (FunctionMacro params body))
410 DirUndef name ->
411 mutateMacrosWhenActive ctx st (M.delete name)
412 DirInclude kind includeTarget ->
413 handleIncludeDirective ctx st kind includeTarget
414 DirIf expr ->
415 pushConditionalFrame ctx st (evalCondition st expr)
416 DirIfDef name ->
417 pushConditionalFrame ctx st (M.member name (stMacros st))
418 DirIfNDef name ->
419 pushConditionalFrame ctx st (not (M.member name (stMacros st)))
420 DirElif expr ->
421 handleElifDirective ctx st expr
422 DirElse ->
423 handleElseDirective ctx st
424 DirEndIf ->
425 case lcStack ctx of
426 [] ->
427 lcContinue
428 ctx
429 (addDiag Warning "unmatched #endif" (lcFilePath ctx) (lcLineNo ctx) st)
430 _ : rest ->
431 continueBlankWithStack ctx rest st
432 DirWarning msg ->
433 addDiagnosticWhenActive ctx Warning msg st
434 DirError msg ->
435 if currentActive (lcStack ctx)
436 then
437 lcDone
438 ctx
439 (emitDirectiveBlank ctx (addDiag Error msg (lcFilePath ctx) (lcLineNo ctx) st))
440 else continueBlank ctx st
441 DirLine n mPath ->
442 handleLineDirective ctx st n mPath
443 DirUnsupported name ->
444 addDiagnosticWhenActive ctx Warning ("unsupported directive: " <> name) st
445
446 emitDirectiveBlank :: LineContext -> EngineState -> EngineState
447 emitDirectiveBlank ctx = emitBlankLines (lcLineSpan ctx)
448
449 continueBlank :: LineContext -> EngineState -> Step
450 continueBlank ctx st = lcContinue ctx (emitDirectiveBlank ctx st)
451
452 continueBlankWithStack :: LineContext -> [CondFrame] -> EngineState -> Step
453 continueBlankWithStack ctx stack st = lcContinueWith ctx stack (emitDirectiveBlank ctx st)
454
455 mutateMacrosWhenActive :: LineContext -> EngineState -> (Map Text MacroDef -> Map Text MacroDef) -> Step
456 mutateMacrosWhenActive ctx st mutate =
457 if currentActive (lcStack ctx)
458 then continueBlank ctx (st {stMacros = mutate (stMacros st)})
459 else continueBlank ctx st
460
461 addDiagnosticWhenActive :: LineContext -> Severity -> Text -> EngineState -> Step
462 addDiagnosticWhenActive ctx severity message st =
463 if currentActive (lcStack ctx)
464 then continueBlank ctx (addDiag severity message (lcFilePath ctx) (lcLineNo ctx) st)
465 else continueBlank ctx st
466
467 pushConditionalFrame :: LineContext -> EngineState -> Bool -> Step
468 pushConditionalFrame ctx st cond =
469 let frame = mkFrame (currentActive (lcStack ctx)) cond
470 in continueBlankWithStack ctx (frame : lcStack ctx) st
471
472 handleElifDirective :: LineContext -> EngineState -> Text -> Step
473 handleElifDirective ctx st expr =
474 case lcStack ctx of
475 [] ->
476 continueBlank
477 ctx
478 (addDiag Error "#elif without matching #if" (lcFilePath ctx) (lcLineNo ctx) st)
479 f : rest ->
480 if frameInElse f
481 then
482 continueBlank
483 ctx
484 (addDiag Error "#elif after #else" (lcFilePath ctx) (lcLineNo ctx) st)
485 else
486 let anyTaken = frameConditionTrue f
487 newCond = not anyTaken && evalCondition st expr
488 f' =
489 f
490 { frameConditionTrue = anyTaken || newCond,
491 frameCurrentActive = frameOuterActive f && newCond
492 }
493 in continueBlankWithStack ctx (f' : rest) st
494
495 handleElseDirective :: LineContext -> EngineState -> Step
496 handleElseDirective ctx st =
497 case lcStack ctx of
498 [] ->
499 lcContinue ctx (st {stSkippingDanglingElse = True})
500 f : rest ->
501 if frameInElse f
502 then
503 continueBlank
504 ctx
505 (addDiag Error "duplicate #else in conditional block" (lcFilePath ctx) (lcLineNo ctx) st)
506 else
507 let newCurrent = frameOuterActive f && not (frameConditionTrue f)
508 f' =
509 f
510 { frameInElse = True,
511 frameCurrentActive = newCurrent
512 }
513 in continueBlankWithStack ctx (f' : rest) st
514
515 handleIncludeDirective :: LineContext -> EngineState -> IncludeKind -> Text -> Step
516 handleIncludeDirective ctx st kind includeTarget
517 | not (currentActive (lcStack ctx)) = continueBlank ctx st
518 | otherwise = NeedInclude includeReq nextStep
519 where
520 includePathText = T.unpack includeTarget
521 includeReq =
522 IncludeRequest
523 { includePath = includePathText,
524 includeKind = kind,
525 includeFrom = lcFilePath ctx,
526 includeLine = lcLineNo ctx
527 }
528 nextStep Nothing =
529 lcContinue
530 ctx
531 (addDiag Error ("missing include: " <> includeTarget) (lcFilePath ctx) (lcLineNo ctx) st)
532 nextStep (Just includeText) =
533 let includeFilePath =
534 case kind of
535 IncludeLocal -> takeDirectory (lcFilePath ctx) </> includePathText
536 IncludeSystem -> includePathText
537 stWithIncludePragma =
538 emitLine (linePragma 1 includeFilePath) (st {stCurrentFile = includeFilePath, stCurrentLine = 1})
539 resumeParent stAfterInclude =
540 processFile
541 (lcFilePath ctx)
542 (lcRestLines ctx)
543 (lcStack ctx)
544 ( emitLine
545 (linePragma (lcNextLineNo ctx) (lcFilePath ctx))
546 (stAfterInclude {stCurrentFile = lcFilePath ctx, stCurrentLine = lcNextLineNo ctx})
547 )
548 (lcDone ctx)
549 in processFile includeFilePath (joinMultiline 1 (splitLines includeText)) [] stWithIncludePragma resumeParent
550
551 handleLineDirective :: LineContext -> EngineState -> Int -> Maybe FilePath -> Step
552 handleLineDirective ctx st lineNumber maybePath
553 | not (currentActive (lcStack ctx)) = continueBlank ctx st
554 | otherwise =
555 let stWithFile =
556 case maybePath of
557 Just path -> st {stCurrentFile = path}
558 Nothing -> st
559 stWithLinePragma =
560 emitLine
561 (linePragma lineNumber (stCurrentFile stWithFile))
562 (stWithFile {stCurrentLine = lineNumber})
563 in processFile
564 (lcFilePath ctx)
565 (lcRestLines ctx)
566 (lcStack ctx)
567 (stWithLinePragma {stCurrentLine = lineNumber})
568 (lcDone ctx)
569
570 mkFrame :: Bool -> Bool -> CondFrame
571 mkFrame outer cond =
572 CondFrame
573 { frameOuterActive = outer,
574 frameConditionTrue = cond,
575 frameInElse = False,
576 frameCurrentActive = outer && cond
577 }
578
579 emitLine :: Text -> EngineState -> EngineState
580 emitLine line st = st {stOutputRev = line : stOutputRev st}
581
582 emitBlankLines :: Int -> EngineState -> EngineState
583 emitBlankLines n st
584 | n <= 0 = st
585 | otherwise = st {stOutputRev = replicate n "" <> stOutputRev st}
586
587 addDiag :: Severity -> Text -> FilePath -> Int -> EngineState -> EngineState
588 addDiag sev msg filePath lineNo st =
589 st
590 { stDiagnosticsRev =
591 Diagnostic
592 { diagSeverity = sev,
593 diagMessage = msg,
594 diagFile = filePath,
595 diagLine = lineNo
596 }
597 : stDiagnosticsRev st
598 }
599
600 linePragma :: Int -> FilePath -> Text
601 linePragma n path = "#line " <> T.pack (show n) <> " \"" <> T.pack path <> "\""
602
603 builderToText :: TB.Builder -> Text
604 builderToText = TL.toStrict . TB.toLazyText
605
606 trimSpacesText :: Text -> Text
607 trimSpacesText = T.dropWhileEnd isSpace . T.dropWhile isSpace
608
609 data LineSpan = LineSpan
610 { lineSpanInBlockComment :: !Bool,
611 lineSpanText :: !Text
612 }
613
614 data LineScan = LineScan
615 { lineScanSpans :: ![LineSpan],
616 lineScanFinalHsDepth :: !Int,
617 lineScanFinalCDepth :: !Int
618 }
619
620 expandLineBySpan :: EngineState -> [LineSpan] -> Text
621 expandLineBySpan st =
622 T.concat . map expandSpan
623 where
624 expandSpan lineChunk
625 | lineSpanInBlockComment lineChunk = lineSpanText lineChunk
626 | otherwise = expandMacros st (lineSpanText lineChunk)
627
628 scanLine :: Int -> Int -> Text -> LineScan
629 scanLine hsDepth0 cDepth0 input =
630 let (spansRev, currentBuilder, hasCurrent, currentInComment, finalHsDepth, finalCDepth) =
631 go hsDepth0 cDepth0 False False False [] mempty False (hsDepth0 > 0 || cDepth0 > 0) input
632 spans = reverse (flushSpan spansRev currentBuilder hasCurrent currentInComment)
633 in LineScan
634 { lineScanSpans = spans,
635 lineScanFinalHsDepth = finalHsDepth,
636 lineScanFinalCDepth = finalCDepth
637 }
638 where
639 flushSpan :: [LineSpan] -> TB.Builder -> Bool -> Bool -> [LineSpan]
640 flushSpan spansRev currentBuilder hasCurrent inComment =
641 if not hasCurrent
642 then spansRev
643 else LineSpan {lineSpanInBlockComment = inComment, lineSpanText = builderToText currentBuilder} : spansRev
644
645 appendWithMode ::
646 [LineSpan] ->
647 TB.Builder ->
648 Bool ->
649 Bool ->
650 Bool ->
651 Text ->
652 ([LineSpan], TB.Builder, Bool, Bool)
653 appendWithMode spansRev currentBuilder hasCurrent currentInComment newInComment chunk
654 | T.null chunk = (spansRev, currentBuilder, hasCurrent, currentInComment)
655 | not hasCurrent = (spansRev, TB.fromText chunk, True, newInComment)
656 | currentInComment == newInComment = (spansRev, currentBuilder <> TB.fromText chunk, True, currentInComment)
657 | otherwise =
658 let spansRev' = flushSpan spansRev currentBuilder hasCurrent currentInComment
659 in (spansRev', TB.fromText chunk, True, newInComment)
660
661 go ::
662 Int ->
663 Int ->
664 Bool ->
665 Bool ->
666 Bool ->
667 [LineSpan] ->
668 TB.Builder ->
669 Bool ->
670 Bool ->
671 Text ->
672 ([LineSpan], TB.Builder, Bool, Bool, Int, Int)
673 go hsDepth cDepth inString inChar escaped spansRev currentBuilder hasCurrent currentInComment remaining =
674 case T.uncons remaining of
675 Nothing -> (spansRev, currentBuilder, hasCurrent, currentInComment, hsDepth, cDepth)
676 Just (c1, rest1) ->
677 case T.uncons rest1 of
678 Nothing ->
679 let outChar = if cDepth > 0 then " " else T.singleton c1
680 inCommentNow = hsDepth > 0 || cDepth > 0
681 (spansRev', currentBuilder', hasCurrent', currentInComment') =
682 appendWithMode spansRev currentBuilder hasCurrent currentInComment inCommentNow outChar
683 in (spansRev', currentBuilder', hasCurrent', currentInComment', hsDepth, cDepth)
684 Just (c2, rest2) ->
685 if cDepth > 0
686 then
687 if c1 == '*' && c2 == '/'
688 then
689 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
690 appendWithMode spansRev currentBuilder hasCurrent currentInComment True " "
691 in go hsDepth 0 False False False spansRev' currentBuilder' hasCurrent' currentInComment' rest2
692 else
693 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
694 appendWithMode spansRev currentBuilder hasCurrent currentInComment True " "
695 in go hsDepth cDepth False False False spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
696 else
697 if not inString && not inChar && hsDepth == 0 && c1 == '-' && c2 == '-'
698 then
699 let lineTail = T.cons c1 (T.cons c2 rest2)
700 (spansRev', currentBuilder', hasCurrent', currentInComment') =
701 appendWithMode spansRev currentBuilder hasCurrent currentInComment True lineTail
702 in (spansRev', currentBuilder', hasCurrent', currentInComment', hsDepth, cDepth)
703 else
704 if inString
705 then
706 let escaped' = not escaped && c1 == '\\'
707 inString' = escaped || c1 /= '"'
708 (spansRev', currentBuilder', hasCurrent', currentInComment') =
709 appendWithMode spansRev currentBuilder hasCurrent currentInComment (hsDepth > 0) (T.singleton c1)
710 in go hsDepth cDepth inString' False escaped' spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
711 else
712 if inChar
713 then
714 let escaped' = not escaped && c1 == '\\'
715 inChar' = escaped || c1 /= '\''
716 (spansRev', currentBuilder', hasCurrent', currentInComment') =
717 appendWithMode spansRev currentBuilder hasCurrent currentInComment (hsDepth > 0) (T.singleton c1)
718 in go hsDepth cDepth False inChar' escaped' spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
719 else
720 if hsDepth == 0 && c1 == '"'
721 then
722 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
723 appendWithMode spansRev currentBuilder hasCurrent currentInComment False (T.singleton c1)
724 in go hsDepth cDepth True False False spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
725 else
726 if hsDepth == 0 && c1 == '\''
727 then
728 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
729 appendWithMode spansRev currentBuilder hasCurrent currentInComment False (T.singleton c1)
730 in go hsDepth cDepth False True False spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
731 else
732 if hsDepth > 0 && c1 == '-' && c2 == '}'
733 then
734 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
735 appendWithMode spansRev currentBuilder hasCurrent currentInComment True "-}"
736 hsDepth' = hsDepth - 1
737 in go hsDepth' cDepth False False False spansRev' currentBuilder' hasCurrent' currentInComment' rest2
738 else
739 if c1 == '{' && c2 == '-' && not ("#" `T.isPrefixOf` rest2)
740 then
741 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
742 appendWithMode spansRev currentBuilder hasCurrent currentInComment True "{-"
743 in go (hsDepth + 1) cDepth False False False spansRev' currentBuilder' hasCurrent' currentInComment' rest2
744 else
745 if hsDepth == 0 && c1 == '/' && c2 == '*'
746 then
747 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
748 appendWithMode spansRev currentBuilder hasCurrent currentInComment True " "
749 in go hsDepth 1 False False False spansRev' currentBuilder' hasCurrent' currentInComment' rest2
750 else
751 let (spansRev', currentBuilder', hasCurrent', currentInComment') =
752 appendWithMode spansRev currentBuilder hasCurrent currentInComment (hsDepth > 0) (T.singleton c1)
753 in go hsDepth cDepth False False False spansRev' currentBuilder' hasCurrent' currentInComment' (T.cons c2 rest2)
754
755 data Directive
756 = DirDefineObject !Text !Text
757 | DirDefineFunction !Text ![Text] !Text
758 | DirUndef !Text
759 | DirInclude !IncludeKind !Text
760 | DirIf !Text
761 | DirIfDef !Text
762 | DirIfNDef !Text
763 | DirElif !Text
764 | DirElse
765 | DirEndIf
766 | DirLine !Int !(Maybe FilePath)
767 | DirWarning !Text
768 | DirError !Text
769 | DirUnsupported !Text
770
771 parseDirective :: Text -> Maybe Directive
772 parseDirective raw =
773 let trimmed = T.stripStart raw
774 in if "#" `T.isPrefixOf` trimmed
775 then
776 let body = T.stripStart (T.drop 1 trimmed)
777 in case T.uncons body of
778 Just (c, _) | isLetter c || isDigit c -> parseDirectiveBody body
779 _ -> Nothing
780 else Nothing
781
782 parseDirectiveBody :: Text -> Maybe Directive
783 parseDirectiveBody body =
784 let (name, rest0) = T.span isIdentChar body
785 rest = T.stripStart rest0
786 in if T.null name
787 then case T.uncons body of
788 Just (c, _) | isDigit c -> parseLineDirective body
789 _ -> Nothing
790 else case name of
791 "define" -> parseDefine rest
792 "undef" -> DirUndef <$> parseIdentifier rest
793 "include" -> parseInclude rest
794 "if" -> Just (DirIf rest)
795 "ifdef" -> DirIfDef <$> parseIdentifier rest
796 "ifndef" -> DirIfNDef <$> parseIdentifier rest
797 "isndef" -> Just (DirUnsupported "isndef")
798 "elif" -> Just (DirElif rest)
799 "elseif" -> Just (DirElif rest)
800 "else" -> Just DirElse
801 "endif" -> Just DirEndIf
802 "line" -> parseLineDirective rest
803 "warning" -> Just (DirWarning rest)
804 "error" -> Just (DirError rest)
805 _ -> Just (DirUnsupported name)
806
807 parseLineDirective :: Text -> Maybe Directive
808 parseLineDirective body =
809 case TR.decimal body of
810 Left _ -> Nothing
811 Right (lineNumber, rest0) ->
812 let rest = T.stripStart rest0
813 in case parseQuotedText rest of
814 Nothing -> Just (DirLine lineNumber Nothing)
815 Just path -> Just (DirLine lineNumber (Just (T.unpack path)))
816
817 parseDefine :: Text -> Maybe Directive
818 parseDefine rest = do
819 let (name, rest0) = T.span isIdentChar rest
820 if T.null name
821 then Nothing
822 else case T.uncons rest0 of
823 Just ('(', afterOpen) ->
824 let (params, restAfterParams) = parseDefineParams afterOpen
825 in case params of
826 Nothing -> Just (DirUnsupported "define-function-macro")
827 Just names -> Just (DirDefineFunction name names (T.stripStart restAfterParams))
828 _ -> Just (DirDefineObject name (T.stripStart rest0))
829
830 parseDefineParams :: Text -> (Maybe [Text], Text)
831 parseDefineParams input =
832 let (inside, suffix) = T.breakOn ")" input
833 in if T.null suffix
834 then (Nothing, "")
835 else
836 let rawParams = T.splitOn "," inside
837 params = map (T.takeWhile isIdentChar . T.strip) rawParams
838 in if T.null (T.strip inside)
839 then (Just [], T.drop 1 suffix)
840 else
841 if any T.null params
842 then (Nothing, T.drop 1 suffix)
843 else (Just params, T.drop 1 suffix)
844
845 parseIdentifier :: Text -> Maybe Text
846 parseIdentifier txt =
847 let ident = T.takeWhile isIdentChar (T.stripStart txt)
848 in if T.null ident then Nothing else Just ident
849
850 parseInclude :: Text -> Maybe Directive
851 parseInclude txt =
852 case T.uncons (T.stripStart txt) of
853 Just ('"', rest) ->
854 let (path, suffix) = T.breakOn "\"" rest
855 in if T.null suffix then Nothing else Just (DirInclude IncludeLocal path)
856 Just ('<', rest) ->
857 let (path, suffix) = T.breakOn ">" rest
858 in if T.null suffix then Nothing else Just (DirInclude IncludeSystem path)
859 _ -> Nothing
860
861 parseQuotedText :: Text -> Maybe Text
862 parseQuotedText txt = do
863 ('"', rest) <- T.uncons txt
864 let (path, suffix) = T.breakOn "\"" rest
865 if T.null suffix then Nothing else Just path
866
867 expandMacros :: EngineState -> Text -> Text
868 expandMacros st = applyDepth (32 :: Int)
869 where
870 applyDepth 0 t = t
871 applyDepth n t =
872 let next = expandOnce st t
873 in if next == t then t else applyDepth (n - 1) next
874
875 expandOnce :: EngineState -> Text -> Text
876 expandOnce st = go False False False
877 where
878 macros = stMacros st
879 go :: Bool -> Bool -> Bool -> Text -> Text
880 go _ _ _ txt
881 | T.null txt = ""
882 go inString inChar escaped txt =
883 case T.uncons txt of
884 Nothing -> ""
885 Just (c, rest)
886 | inString ->
887 let escaped' = c == '\\' && not escaped
888 inString' = not (c == '"' && not escaped)
889 in T.cons c (go inString' False escaped' rest)
890 | inChar ->
891 let escaped' = c == '\\' && not escaped
892 inChar' = not (c == '\'' && not escaped)
893 in T.cons c (go False inChar' escaped' rest)
894 | c == '"' ->
895 T.cons c (go True False False rest)
896 | c == '\'' ->
897 T.cons c (go False True False rest)
898 | isIdentStart c ->
899 expandIdentifier txt
900 | otherwise ->
901 T.cons c (go False False False rest)
902
903 expandIdentifier :: Text -> Text
904 expandIdentifier input =
905 let (ident, rest) = T.span isIdentChar input
906 in case ident of
907 "__LINE__" -> T.pack (show (stCurrentLine st)) <> go False False False rest
908 "__FILE__" -> T.pack (show (stCurrentFile st)) <> go False False False rest
909 _ ->
910 case M.lookup ident macros of
911 Just (ObjectMacro replacement) ->
912 replacement <> go False False False rest
913 Just (FunctionMacro params body) ->
914 case parseCallArgs rest of
915 Nothing -> ident <> go False False False rest
916 Just (args, restAfter)
917 | length args == length params ->
918 let body' = substituteParams (M.fromList (zip params args)) body
919 in body' <> go False False False restAfter
920 | otherwise -> ident <> go False False False rest
921 Nothing -> ident <> go False False False rest
922
923 parseCallArgs :: Text -> Maybe ([Text], Text)
924 parseCallArgs input = do
925 ('(', rest) <- T.uncons input
926 parseArgs 0 [] mempty rest
927
928 parseArgs :: Int -> [Text] -> TB.Builder -> Text -> Maybe ([Text], Text)
929 parseArgs depth argsRev current remaining =
930 case T.uncons remaining of
931 Nothing -> Nothing
932 Just (ch, rest)
933 | ch == '(' ->
934 parseArgs (depth + 1) argsRev (current <> TB.singleton ch) rest
935 | ch == ')' && depth > 0 ->
936 parseArgs (depth - 1) argsRev (current <> TB.singleton ch) rest
937 | ch == ')' && depth == 0 ->
938 let arg = trimSpacesText (builderToText current)
939 argsRev' =
940 if T.null arg && null argsRev
941 then argsRev
942 else arg : argsRev
943 in Just (reverse argsRev', rest)
944 | ch == ',' && depth == 0 ->
945 let arg = trimSpacesText (builderToText current)
946 in parseArgs depth (arg : argsRev) mempty rest
947 | otherwise ->
948 parseArgs depth argsRev (current <> TB.singleton ch) rest
949
950 substituteParams :: Map Text Text -> Text -> Text
951 substituteParams subs = go False False False
952 where
953 go :: Bool -> Bool -> Bool -> Text -> Text
954 go _ _ _ txt
955 | T.null txt = ""
956 go inDouble inSingle escaped txt =
957 case T.uncons txt of
958 Nothing -> ""
959 Just (c, rest)
960 | inDouble ->
961 T.cons c $
962 case c of
963 '\\' ->
964 if escaped
965 then go True inSingle False rest
966 else go True inSingle True rest
967 '"' ->
968 if escaped
969 then go True inSingle False rest
970 else go False inSingle False rest
971 _ -> go True inSingle False rest
972 | inSingle ->
973 T.cons c $
974 case c of
975 '\\' ->
976 if escaped
977 then go inDouble True False rest
978 else go inDouble True True rest
979 '\'' ->
980 if escaped
981 then go inDouble True False rest
982 else go inDouble False False rest
983 _ -> go inDouble True False rest
984 | c == '"' ->
985 T.cons c (go True False False rest)
986 | c == '\'' ->
987 T.cons c (go False True False rest)
988 | isIdentStart c ->
989 let (ident, rest') = T.span isIdentChar txt
990 in M.findWithDefault ident ident subs <> go False False False rest'
991 | otherwise ->
992 T.cons c (go False False False rest)
993
994 isIdentStart :: Char -> Bool
995 isIdentStart c = c == '_' || isLetter c
996
997 isIdentChar :: Char -> Bool
998 isIdentChar c = c == '_' || isAlphaNum c
999
1000 --------------------------------------------------------------------------------
1001 -- Expression Evaluation
1002 --------------------------------------------------------------------------------
1003
1004 evalCondition :: EngineState -> Text -> Bool
1005 evalCondition st expr = eval expr /= 0
1006 where
1007 macros = stMacros st
1008 eval = evalNumeric . replaceRemainingWithZero . expandMacros st . replaceDefined macros
1009
1010 evalNumeric :: Text -> Integer
1011 evalNumeric input =
1012 let tokens = tokenize input
1013 in case parseExpr tokens of
1014 (val, _) -> val
1015
1016 data Token = TOp Text | TNum Integer | TIdent Text | TOpenParen | TCloseParen deriving (Show)
1017
1018 tokenize :: Text -> [Token]
1019 tokenize input =
1020 case T.uncons input of
1021 Nothing -> []
1022 Just (c, rest)
1023 | isSpace c ->
1024 tokenize (T.dropWhile isSpace rest)
1025 | isDigit c ->
1026 let (num, remaining) = T.span isDigit input
1027 in case TR.decimal num of
1028 Right (value, _) -> TNum value : tokenize remaining
1029 Left _ -> tokenize remaining
1030 | isIdentStart c ->
1031 let (ident, remaining) = T.span isIdentChar input
1032 in TIdent ident : tokenize remaining
1033 | c == '(' ->
1034 TOpenParen : tokenize rest
1035 | c == ')' ->
1036 TCloseParen : tokenize rest
1037 | otherwise ->
1038 let (op, remaining) = T.span isOpChar input
1039 in if T.null op
1040 then tokenize rest
1041 else TOp op : tokenize remaining
1042
1043 isOpChar :: Char -> Bool
1044 isOpChar c =
1045 c == '+'
1046 || c == '-'
1047 || c == '*'
1048 || c == '/'
1049 || c == '%'
1050 || c == '&'
1051 || c == '|'
1052 || c == '!'
1053 || c == '='
1054 || c == '<'
1055 || c == '>'
1056
1057 parseExpr :: [Token] -> (Integer, [Token])
1058 parseExpr = parseOr
1059
1060 binary :: ([Token] -> (Integer, [Token])) -> [Text] -> [Token] -> (Integer, [Token])
1061 binary next ops ts =
1062 let (v1, ts1) = next ts
1063 in go v1 ts1
1064 where
1065 go v1 (TOp op : ts2)
1066 | op `elem` ops =
1067 let (v2, ts3) = next ts2
1068 in go (apply op v1 v2) ts3
1069 go v1 ts2 = (v1, ts2)
1070
1071 apply "||" a b = if a /= 0 || b /= 0 then 1 else 0
1072 apply "&&" a b = if a /= 0 && b /= 0 then 1 else 0
1073 apply "==" a b = if a == b then 1 else 0
1074 apply "!=" a b = if a /= b then 1 else 0
1075 apply "<" a b = if a < b then 1 else 0
1076 apply ">" a b = if a > b then 1 else 0
1077 apply "<=" a b = if a <= b then 1 else 0
1078 apply ">=" a b = if a >= b then 1 else 0
1079 apply "+" a b = a + b
1080 apply "-" a b = a - b
1081 apply "*" a b = a * b
1082 apply "/" a b = if b == 0 then 0 else a `div` b
1083 apply "%" a b = if b == 0 then 0 else a `mod` b
1084 apply _ a _ = a
1085
1086 parseOr, parseAnd, parseEq, parseRel, parseAdd, parseMul :: [Token] -> (Integer, [Token])
1087 parseOr = binary parseAnd ["||"]
1088 parseAnd = binary parseEq ["&&"]
1089 parseEq = binary parseRel ["==", "!="]
1090 parseRel = binary parseAdd ["<", ">", "<=", ">="]
1091 parseAdd = binary parseMul ["+", "-"]
1092 parseMul = binary parseUnary ["*", "/", "%"]
1093
1094 parseUnary :: [Token] -> (Integer, [Token])
1095 parseUnary (TOp "!" : ts) = let (v, ts') = parseUnary ts in (if v == 0 then 1 else 0, ts')
1096 parseUnary (TOp "-" : ts) = let (v, ts') = parseUnary ts in (-v, ts')
1097 parseUnary ts = parseAtom ts
1098
1099 parseAtom :: [Token] -> (Integer, [Token])
1100 parseAtom (TNum n : ts) = (n, ts)
1101 parseAtom (TIdent _ : ts) = (0, ts)
1102 parseAtom (TOpenParen : ts) =
1103 let (v, ts1) = parseExpr ts
1104 in case ts1 of
1105 TCloseParen : ts2 -> (v, ts2)
1106 _ -> (v, ts1)
1107 parseAtom ts = (0, ts)
1108
1109 replaceDefined :: Map Text MacroDef -> Text -> Text
1110 replaceDefined macros = go
1111 where
1112 go txt =
1113 case T.uncons txt of
1114 Nothing -> ""
1115 Just (c, rest)
1116 | "defined" `T.isPrefixOf` txt && not (nextCharIsIdent (T.drop 7 txt)) ->
1117 expandDefined (T.dropWhile isSpace (T.drop 7 txt))
1118 | otherwise ->
1119 T.cons c (go rest)
1120
1121 expandDefined rest =
1122 case T.uncons rest of
1123 Just ('(', restAfterOpen) ->
1124 let rest' = T.dropWhile isSpace restAfterOpen
1125 (name, restAfterName0) = T.span isIdentChar rest'
1126 restAfterName = T.dropWhile isSpace restAfterName0
1127 in case T.uncons restAfterName of
1128 Just (')', restAfterClose) ->
1129 boolLiteral (M.member name macros) <> go restAfterClose
1130 _ ->
1131 boolLiteral False <> go restAfterName
1132 _ ->
1133 let (name, restAfterName) = T.span isIdentChar rest
1134 in if T.null name
1135 then boolLiteral False <> go rest
1136 else boolLiteral (M.member name macros) <> go restAfterName
1137
1138 boolLiteral True = " 1 "
1139 boolLiteral False = " 0 "
1140
1141 nextCharIsIdent remaining =
1142 case T.uncons remaining of
1143 Just (c, _) -> isIdentChar c
1144 Nothing -> False
1145
1146 replaceRemainingWithZero :: Text -> Text
1147 replaceRemainingWithZero = go
1148 where
1149 go txt =
1150 case T.uncons txt of
1151 Nothing -> ""
1152 Just (c, rest)
1153 | isIdentStart c ->
1154 let (_, remaining) = T.span isIdentChar txt
1155 in " 0 " <> go remaining
1156 | otherwise ->
1157 T.cons c (go rest)