{-| This module contains the building blocks used to construct the lexer.
-}
module Agda.Syntax.Parser.LexActions
    ( -- * Main function
      lexToken
      -- * Lex actions
      -- ** General actions
    , token
    , withInterval, withInterval', withInterval_
    , withLayout
    , begin, end, beginWith, endWith
    , begin_, end_
    , lexError
      -- ** Specialized actions
    , keyword, symbol, identifier, literal, literal', integer
      -- * Lex predicates
    , followedBy, eof, inState
    ) where

import Data.Char
import Data.Maybe

import Agda.Syntax.Parser.Lexer
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Position
import Agda.Syntax.Literal

import Agda.Utils.List
import Agda.Utils.Tuple

import Agda.Utils.Impossible

{--------------------------------------------------------------------------
    Scan functions
 --------------------------------------------------------------------------}

-- | Called at the end of a file. Returns 'TokEOF'.
returnEOF :: AlexInput -> Parser Token
returnEOF :: AlexInput -> Parser Token
returnEOF AlexInput{ SrcFile
lexSrcFile :: AlexInput -> SrcFile
lexSrcFile :: SrcFile
lexSrcFile, PositionWithoutFile
lexPos :: AlexInput -> PositionWithoutFile
lexPos :: PositionWithoutFile
lexPos } = do
  -- Andreas, 2018-12-30, issue #3480
  -- The following setLastPos leads to parse error reporting
  -- far away from the interesting position, in particular
  -- if there is a long comment before the EOF.
  -- (Such a long comment is frequent in interactive programming, as
  -- commenting out until the end of the file is a common habit.)
  -- -- setLastPos lexPos
  -- Without it, we get much more useful error locations.
  String -> Parser ()
setPrevToken "<EOF>"
  Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Interval -> Token
TokEOF (Interval -> Token) -> Interval -> Token
forall a b. (a -> b) -> a -> b
$ SrcFile -> PositionWithoutFile -> PositionWithoutFile -> Interval
forall a.
a -> PositionWithoutFile -> PositionWithoutFile -> Interval' a
posToInterval SrcFile
lexSrcFile PositionWithoutFile
lexPos PositionWithoutFile
lexPos

-- | Set the current input and lex a new token (calls 'lexToken').
skipTo :: AlexInput -> Parser Token
skipTo :: AlexInput -> Parser Token
skipTo inp :: AlexInput
inp = do
  AlexInput -> Parser ()
setLexInput AlexInput
inp
  Parser Token
lexToken

{-| Scan the input to find the next token. Calls
'Agda.Syntax.Parser.Lexer.alexScanUser'. This is the main lexing function
where all the work happens. The function 'Agda.Syntax.Parser.Lexer.lexer',
used by the parser is the continuation version of this function.
-}
lexToken :: Parser Token
lexToken :: Parser Token
lexToken =
    do  AlexInput
inp <- Parser AlexInput
getLexInput
        [LexState]
lss <- Parser [LexState]
getLexState
        ParseFlags
flags <- Parser ParseFlags
getParseFlags
        case ([LexState], ParseFlags)
-> AlexInput -> LexState -> AlexReturn (LexAction Token)
alexScanUser ([LexState]
lss, ParseFlags
flags) AlexInput
inp (LexState -> [LexState] -> LexState
forall a. a -> [a] -> a
headWithDefault LexState
forall a. HasCallStack => a
__IMPOSSIBLE__ [LexState]
lss) of
            AlexEOF                     -> AlexInput -> Parser Token
returnEOF AlexInput
inp
            AlexSkip inp' :: AlexInput
inp' len :: LexState
len           -> AlexInput -> Parser Token
skipTo AlexInput
inp'
            AlexToken inp' :: AlexInput
inp' len :: LexState
len action :: LexAction Token
action   -> (Token -> Token) -> Parser Token -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
postToken (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ LexAction Token
action AlexInput
inp AlexInput
inp' LexState
len
            AlexError i :: AlexInput
i                 -> String -> Parser Token
forall a. String -> Parser a
parseError (String -> Parser Token) -> String -> Parser Token
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
              [ "Lexical error"
              , case String -> Maybe Char
forall a. [a] -> Maybe a
listToMaybe (String -> Maybe Char) -> String -> Maybe Char
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
i of
                  Just '\t'                -> " (you may want to replace tabs with spaces)"
                  Just c :: Char
c | Bool -> Bool
not (Char -> Bool
isPrint Char
c) -> " (unprintable character)"
                  _ -> ""
              , ":"
              ]

isSub :: Char -> Bool
isSub :: Char -> Bool
isSub c :: Char
c = '\x2080' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\x2089'

readSubscript :: [Char] -> Integer
readSubscript :: String -> Integer
readSubscript = String -> Integer
forall a. Read a => String -> a
read (String -> Integer) -> (String -> String) -> String -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
map (\c :: Char
c -> LexState -> Char
forall a. Enum a => LexState -> a
toEnum (Char -> LexState
forall a. Enum a => a -> LexState
fromEnum Char
c LexState -> LexState -> LexState
forall a. Num a => a -> a -> a
- 0x2080 LexState -> LexState -> LexState
forall a. Num a => a -> a -> a
+ Char -> LexState
forall a. Enum a => a -> LexState
fromEnum '0'))

postToken :: Token -> Token
postToken :: Token -> Token
postToken (TokId (r :: Interval
r, "\x03bb")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymLambda Interval
r
postToken (TokId (r :: Interval
r, "\x2026")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEllipsis Interval
r
postToken (TokId (r :: Interval
r, "\x2192")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymArrow Interval
r
postToken (TokId (r :: Interval
r, "\x2983")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleOpenBrace Interval
r
postToken (TokId (r :: Interval
r, "\x2984")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymDoubleCloseBrace Interval
r
postToken (TokId (r :: Interval
r, "\x2987")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymOpenIdiomBracket Interval
r
postToken (TokId (r :: Interval
r, "\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymCloseIdiomBracket Interval
r
postToken (TokId (r :: Interval
r, "\x2987\x2988")) = Symbol -> Interval -> Token
TokSymbol Symbol
SymEmptyIdiomBracket Interval
r
postToken (TokId (r :: Interval
r, "\x2200")) = Keyword -> Interval -> Token
TokKeyword Keyword
KwForall Interval
r
postToken (TokId (r :: Interval
r, s :: String
s))
  | String
set String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== "Set" Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSub String
n = (Interval, Integer) -> Token
TokSetN (Interval
r, String -> Integer
readSubscript String
n)
  where
    (set :: String
set, n :: String
n)      = LexState -> String -> (String, String)
forall a. LexState -> [a] -> ([a], [a])
splitAt 3 String
s
postToken (TokId (r :: Interval
r, s :: String
s))
  | String
prop String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== "Prop" Bool -> Bool -> Bool
&& (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isSub String
n = (Interval, Integer) -> Token
TokPropN (Interval
r, String -> Integer
readSubscript String
n)
  where
    (prop :: String
prop, n :: String
n)     = LexState -> String -> (String, String)
forall a. LexState -> [a] -> ([a], [a])
splitAt 4 String
s
postToken t :: Token
t = Token
t

{--------------------------------------------------------------------------
    Lex actions
 --------------------------------------------------------------------------}

-- | The most general way of parsing a token.
token :: (String -> Parser tok) -> LexAction tok
token :: (String -> Parser tok) -> LexAction tok
token action :: String -> Parser tok
action inp :: AlexInput
inp inp' :: AlexInput
inp' len :: LexState
len =
    do  AlexInput -> Parser ()
setLexInput AlexInput
inp'
        String -> Parser ()
setPrevToken String
t
        PositionWithoutFile -> Parser ()
setLastPos (PositionWithoutFile -> Parser ())
-> PositionWithoutFile -> Parser ()
forall a b. (a -> b) -> a -> b
$ AlexInput -> PositionWithoutFile
lexPos AlexInput
inp
        String -> Parser tok
action String
t
    where
        t :: String
t = LexState -> String -> String
forall a. LexState -> [a] -> [a]
take LexState
len (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp

-- | Parse a token from an 'Interval' and the lexed string.
withInterval :: ((Interval, String) -> tok) -> LexAction tok
withInterval :: ((Interval, String) -> tok) -> LexAction tok
withInterval f :: (Interval, String) -> tok
f = (String -> Parser tok) -> LexAction tok
forall tok. (String -> Parser tok) -> LexAction tok
token ((String -> Parser tok) -> LexAction tok)
-> (String -> Parser tok) -> LexAction tok
forall a b. (a -> b) -> a -> b
$ \s :: String
s -> do
                   Interval
r <- Parser Interval
getParseInterval
                   tok -> Parser tok
forall (m :: * -> *) a. Monad m => a -> m a
return (tok -> Parser tok) -> tok -> Parser tok
forall a b. (a -> b) -> a -> b
$ (Interval, String) -> tok
f (Interval
r,String
s)

-- | Like 'withInterval', but applies a function to the string.
withInterval' :: (String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' :: (String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' f :: String -> a
f t :: (Interval, a) -> tok
t = ((Interval, String) -> tok) -> LexAction tok
forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval ((Interval, a) -> tok
t ((Interval, a) -> tok)
-> ((Interval, String) -> (Interval, a))
-> (Interval, String)
-> tok
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval -> Interval
forall a. a -> a
id (Interval -> Interval)
-> (String -> a) -> (Interval, String) -> (Interval, a)
forall a c b d. (a -> c) -> (b -> d) -> (a, b) -> (c, d)
-*- String -> a
f))

-- | Return a token without looking at the lexed string.
withInterval_ :: (Interval -> r) -> LexAction r
withInterval_ :: (Interval -> r) -> LexAction r
withInterval_ f :: Interval -> r
f = ((Interval, String) -> r) -> LexAction r
forall tok. ((Interval, String) -> tok) -> LexAction tok
withInterval (Interval -> r
f (Interval -> r)
-> ((Interval, String) -> Interval) -> (Interval, String) -> r
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval, String) -> Interval
forall a b. (a, b) -> a
fst)


-- | Executed for layout keywords. Enters the 'Agda.Syntax.Parser.Lexer.layout'
--   state and performs the given action.
withLayout :: LexAction r -> LexAction r
withLayout :: LexAction r -> LexAction r
withLayout a :: LexAction r
a i1 :: AlexInput
i1 i2 :: AlexInput
i2 n :: LexState
n =
    do  LexState -> Parser ()
pushLexState LexState
layout
        LexAction r
a AlexInput
i1 AlexInput
i2 LexState
n


-- | Enter a new state without consuming any input.
begin :: LexState -> LexAction Token
begin :: LexState -> LexAction Token
begin code :: LexState
code _ _ _ =
    do  LexState -> Parser ()
pushLexState LexState
code
        Parser Token
lexToken


-- | Enter a new state throwing away the current lexeme.
begin_ :: LexState -> LexAction Token
begin_ :: LexState -> LexAction Token
begin_ code :: LexState
code _ inp' :: AlexInput
inp' _ =
    do  LexState -> Parser ()
pushLexState LexState
code
        AlexInput -> Parser Token
skipTo AlexInput
inp'

-- | Exit the current state throwing away the current lexeme.
end_ :: LexAction Token
end_ :: LexAction Token
end_ _ inp' :: AlexInput
inp' _ =
    do  Parser ()
popLexState
        AlexInput -> Parser Token
skipTo AlexInput
inp'


-- | Enter a new state and perform the given action.
beginWith :: LexState -> LexAction a -> LexAction a
beginWith :: LexState -> LexAction a -> LexAction a
beginWith code :: LexState
code a :: LexAction a
a inp :: AlexInput
inp inp' :: AlexInput
inp' n :: LexState
n =
    do  LexState -> Parser ()
pushLexState LexState
code
        LexAction a
a AlexInput
inp AlexInput
inp' LexState
n

-- | Exit the current state and perform the given action.
endWith :: LexAction a -> LexAction a
endWith :: LexAction a -> LexAction a
endWith a :: LexAction a
a inp :: AlexInput
inp inp' :: AlexInput
inp' n :: LexState
n =
    do  Parser ()
popLexState
        LexAction a
a AlexInput
inp AlexInput
inp' LexState
n


-- | Exit the current state without consuming any input
end :: LexAction Token
end :: LexAction Token
end _ _ _ =
    do  Parser ()
popLexState
        Parser Token
lexToken

-- | Parse a 'Keyword' token, triggers layout for 'layoutKeywords'.
keyword :: Keyword -> LexAction Token
keyword :: Keyword -> LexAction Token
keyword k :: Keyword
k = LexAction Token -> LexAction Token
forall r. LexAction r -> LexAction r
layout (LexAction Token -> LexAction Token)
-> LexAction Token -> LexAction Token
forall a b. (a -> b) -> a -> b
$ (Interval -> Token) -> LexAction Token
forall r. (Interval -> r) -> LexAction r
withInterval_ (Keyword -> Interval -> Token
TokKeyword Keyword
k)
    where
        layout :: LexAction r -> LexAction r
layout | Keyword
k Keyword -> [Keyword] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Keyword]
layoutKeywords  = LexAction r -> LexAction r
forall r. LexAction r -> LexAction r
withLayout
               | Bool
otherwise              = LexAction r -> LexAction r
forall a. a -> a
id


-- | Parse a 'Symbol' token.
symbol :: Symbol -> LexAction Token
symbol :: Symbol -> LexAction Token
symbol s :: Symbol
s = (Interval -> Token) -> LexAction Token
forall r. (Interval -> r) -> LexAction r
withInterval_ (Symbol -> Interval -> Token
TokSymbol Symbol
s)


-- | Parse a number.

number :: String -> Integer
number :: String -> Integer
number str :: String
str = String -> Integer
forall a. Read a => String -> a
read (String -> Integer) -> String -> Integer
forall a b. (a -> b) -> a -> b
$ case String
str of
  '0' : 'x' : num :: String
num -> String
str
  _               -> [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> [String]
forall a. (a -> Bool) -> [a] -> [[a]]
wordsBy ('_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) String
str

integer :: String -> Integer
integer :: String -> Integer
integer = \case
  '-' : str :: String
str -> - (String -> Integer
number String
str)
  str :: String
str       -> String -> Integer
number String
str

-- | Parse a literal.
literal' :: (String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' :: (String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' read :: String -> a
read lit :: Range -> a -> Literal
lit =
  (String -> a) -> ((Interval, a) -> Token) -> LexAction Token
forall a tok.
(String -> a) -> ((Interval, a) -> tok) -> LexAction tok
withInterval' String -> a
read (Literal -> Token
TokLiteral (Literal -> Token)
-> ((Interval, a) -> Literal) -> (Interval, a) -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range -> a -> Literal) -> (Range, a) -> Literal
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Range -> a -> Literal
lit ((Range, a) -> Literal)
-> ((Interval, a) -> (Range, a)) -> (Interval, a) -> Literal
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Interval -> Range) -> (Interval, a) -> (Range, a)
forall a c b. (a -> c) -> (a, b) -> (c, b)
mapFst Interval -> Range
forall t. HasRange t => t -> Range
getRange)

literal :: Read a => (Range -> a -> Literal) -> LexAction Token
literal :: (Range -> a -> Literal) -> LexAction Token
literal = (String -> a) -> (Range -> a -> Literal) -> LexAction Token
forall a.
(String -> a) -> (Range -> a -> Literal) -> LexAction Token
literal' String -> a
forall a. Read a => String -> a
read

-- | Parse an identifier. Identifiers can be qualified (see 'Name').
--   Example: @Foo.Bar.f@
identifier :: LexAction Token
identifier :: LexAction Token
identifier = (Either (Interval, String) [(Interval, String)] -> Token)
-> LexAction Token
forall a.
(Either (Interval, String) [(Interval, String)] -> a)
-> LexAction a
qualified (((Interval, String) -> Token)
-> ([(Interval, String)] -> Token)
-> Either (Interval, String) [(Interval, String)]
-> Token
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Interval, String) -> Token
TokId [(Interval, String)] -> Token
TokQId)


-- | Parse a possibly qualified name.
qualified :: (Either (Interval, String) [(Interval, String)] -> a) -> LexAction a
qualified :: (Either (Interval, String) [(Interval, String)] -> a)
-> LexAction a
qualified tok :: Either (Interval, String) [(Interval, String)] -> a
tok =
    (String -> Parser a) -> LexAction a
forall tok. (String -> Parser tok) -> LexAction tok
token ((String -> Parser a) -> LexAction a)
-> (String -> Parser a) -> LexAction a
forall a b. (a -> b) -> a -> b
$ \s :: String
s ->
    do  Interval
i <- Parser Interval
getParseInterval
        case Interval -> [String] -> [(Interval, String)]
mkName Interval
i ([String] -> [(Interval, String)])
-> [String] -> [(Interval, String)]
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> [String]
forall a. (a -> Bool) -> [a] -> [[a]]
wordsBy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
=='.') String
s of
            []  -> String -> Parser a
forall a. String -> Parser a
lexError "lex error on .."
            [x :: (Interval, String)
x] -> a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> Parser a) -> a -> Parser a
forall a b. (a -> b) -> a -> b
$ Either (Interval, String) [(Interval, String)] -> a
tok (Either (Interval, String) [(Interval, String)] -> a)
-> Either (Interval, String) [(Interval, String)] -> a
forall a b. (a -> b) -> a -> b
$ (Interval, String)
-> Either (Interval, String) [(Interval, String)]
forall a b. a -> Either a b
Left  (Interval, String)
x
            xs :: [(Interval, String)]
xs  -> a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> Parser a) -> a -> Parser a
forall a b. (a -> b) -> a -> b
$ Either (Interval, String) [(Interval, String)] -> a
tok (Either (Interval, String) [(Interval, String)] -> a)
-> Either (Interval, String) [(Interval, String)] -> a
forall a b. (a -> b) -> a -> b
$ [(Interval, String)]
-> Either (Interval, String) [(Interval, String)]
forall a b. b -> Either a b
Right [(Interval, String)]
xs
    where
        -- Compute the ranges for the substrings (separated by '.') of
        -- a name. Dots are included: the intervals generated for
        -- "A.B.x" correspond to "A.", "B." and "x".
        mkName :: Interval -> [String] -> [(Interval, String)]
        mkName :: Interval -> [String] -> [(Interval, String)]
mkName _ []     = []
        mkName i :: Interval
i [x :: String
x]    = [(Interval
i, String
x)]
        mkName i :: Interval
i (x :: String
x:xs :: [String]
xs) = (Interval
i0, String
x) (Interval, String) -> [(Interval, String)] -> [(Interval, String)]
forall a. a -> [a] -> [a]
: Interval -> [String] -> [(Interval, String)]
mkName Interval
i1 [String]
xs
            where
                p0 :: Position' SrcFile
p0 = Interval -> Position' SrcFile
forall a. Interval' a -> Position' a
iStart Interval
i
                p1 :: Position' SrcFile
p1 = Interval -> Position' SrcFile
forall a. Interval' a -> Position' a
iEnd Interval
i
                p' :: Position' SrcFile
p' = Position' SrcFile -> Char -> Position' SrcFile
forall a. Position' a -> Char -> Position' a
movePos (Position' SrcFile -> String -> Position' SrcFile
forall a. Position' a -> String -> Position' a
movePosByString Position' SrcFile
p0 String
x) '.'
                i0 :: Interval
i0 = Position' SrcFile -> Position' SrcFile -> Interval
forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p0 Position' SrcFile
p'
                i1 :: Interval
i1 = Position' SrcFile -> Position' SrcFile -> Interval
forall a. Position' a -> Position' a -> Interval' a
Interval Position' SrcFile
p' Position' SrcFile
p1


{--------------------------------------------------------------------------
    Predicates
 --------------------------------------------------------------------------}

-- | True when the given character is the next character of the input string.
followedBy :: Char -> LexPredicate
followedBy :: Char -> LexPredicate
followedBy c' :: Char
c' _ _ _ inp :: AlexInput
inp =
    case AlexInput -> String
lexInput AlexInput
inp of
        []  -> Bool
False
        c :: Char
c:_ -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c'

-- | True if we are at the end of the file.
eof :: LexPredicate
eof :: LexPredicate
eof _ _ _ inp :: AlexInput
inp = String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> Bool) -> String -> Bool
forall a b. (a -> b) -> a -> b
$ AlexInput -> String
lexInput AlexInput
inp

-- | True if the given state appears somewhere on the state stack
inState :: LexState -> LexPredicate
inState :: LexState -> LexPredicate
inState s :: LexState
s (ls :: [LexState]
ls, _) _ _ _ = LexState
s LexState -> [LexState] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [LexState]
ls