module UU.Scanner.Scanner where
import Data.Char(isLower, isUpper, isSpace, isAlphaNum, isDigit, chr, ord)
import Data.List(sort)
import Data.Maybe(isJust)
import UU.Util.BinaryTrees(tab2tree,btLocateIn)
import UU.Scanner.Token(Token, EnumValToken(..), valueToken, reserved, errToken)
import UU.Scanner.Position(Pos, initPos, advc, adv)
scanFile :: [String] -> [String] -> String -> String -> FilePath -> IO [Token]
scanFile :: [String] -> [String] -> String -> String -> String -> IO [Token]
scanFile keywordstxt :: [String]
keywordstxt keywordsops :: [String]
keywordsops specchars :: String
specchars opchars :: String
opchars fn :: String
fn =
do String
txt <- String -> IO String
readFile String
fn
[Token] -> IO [Token]
forall (m :: * -> *) a. Monad m => a -> m a
return ([String]
-> [String] -> String -> String -> Pos -> String -> [Token]
scan [String]
keywordstxt [String]
keywordsops String
specchars String
opchars (String -> Pos
initPos String
fn) String
txt)
scan :: [String] -> [String] -> String -> String -> Pos -> String -> [Token]
scan :: [String]
-> [String] -> String -> String -> Pos -> String -> [Token]
scan keywordstxt :: [String]
keywordstxt keywordsops :: [String]
keywordsops specchars :: String
specchars opchars :: String
opchars pos :: Pos
pos input :: String
input
= Pos -> String -> [Token]
doScan Pos
pos String
input
where
locatein :: Ord a => [a] -> a -> Bool
locatein :: [a] -> a -> Bool
locatein es :: [a]
es = Maybe a -> Bool
forall a. Maybe a -> Bool
isJust (Maybe a -> Bool) -> (a -> Maybe a) -> a -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> Ordering) -> BinSearchTree a -> a -> Maybe a
forall a b. (a -> b -> Ordering) -> BinSearchTree a -> b -> Maybe a
btLocateIn a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ([a] -> BinSearchTree a
forall av. [av] -> BinSearchTree av
tab2tree ([a] -> [a]
forall a. Ord a => [a] -> [a]
sort [a]
es))
iskw :: String -> Bool
iskw = [String] -> String -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein [String]
keywordstxt
isop :: String -> Bool
isop = [String] -> String -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein [String]
keywordsops
isSymbol :: Char -> Bool
isSymbol = String -> Char -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein String
specchars
isOpsym :: Char -> Bool
isOpsym = String -> Char -> Bool
forall a. Ord a => [a] -> a -> Bool
locatein String
opchars
isIdStart :: Char -> Bool
isIdStart c :: Char
c = Char -> Bool
isLower Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_'
isIdChar :: Char -> Bool
isIdChar c :: Char
c = Char -> Bool
isAlphaNum Char
c
Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\''
Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '_'
scanIdent :: Pos -> String -> (String, Pos, String)
scanIdent p :: Pos
p s :: String
s = let (name :: String
name,rest :: String
rest) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isIdChar String
s
in (String
name,Column -> Pos -> Pos
advc (String -> Column
forall (t :: * -> *) a. Foldable t => t a -> Column
length String
name) Pos
p,String
rest)
doScan :: Pos -> String -> [Token]
doScan p :: Pos
p [] = []
doScan p :: Pos
p (c :: Char
c:s :: String
s) | Char -> Bool
isSpace Char
c = let (sp :: String
sp,next :: String
next) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSpace String
s
in Pos -> String -> [Token]
doScan ((Pos -> Char -> Pos) -> Pos -> String -> Pos
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Pos -> Char -> Pos
adv Pos
p (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
sp)) String
next
doScan p :: Pos
p ('-':'-':s :: String
s) = Pos -> String -> [Token]
doScan Pos
p ((Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\n') String
s)
doScan p :: Pos
p ('{':'-':s :: String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc 2 Pos
p) String
s
doScan p :: Pos
p ('"':ss :: String
ss)
= let (s :: String
s,swidth :: Column
swidth,rest :: String
rest) = String -> (String, Column, String)
scanString String
ss
in if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest Bool -> Bool -> Bool
|| String -> Char
forall a. [a] -> a
head String
rest Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '"'
then String -> Pos -> Token
errToken "Unterminated string literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc Column
swidth Pos
p) String
rest
else EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkString String
s Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc (Column
swidthColumn -> Column -> Column
forall a. Num a => a -> a -> a
+2) Pos
p) (String -> String
forall a. [a] -> [a]
tail String
rest)
doScan p :: Pos
p ('\'':ss :: String
ss)
= let (mc :: Maybe Char
mc,cwidth :: Column
cwidth,rest :: String
rest) = String -> (Maybe Char, Column, String)
scanChar String
ss
in case Maybe Char
mc of
Nothing -> String -> Pos -> Token
errToken "Error in character literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc Column
cwidth Pos
p) String
rest
Just c :: Char
c -> if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
rest Bool -> Bool -> Bool
|| String -> Char
forall a. [a] -> a
head String
rest Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '\''
then String -> Pos -> Token
errToken "Unterminated character literal" Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc (Column
cwidthColumn -> Column -> Column
forall a. Num a => a -> a -> a
+1) Pos
p) String
rest
else EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkChar [Char
c] Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc (Column
cwidthColumn -> Column -> Column
forall a. Num a => a -> a -> a
+2) Pos
p) (String -> String
forall a. [a] -> [a]
tail String
rest)
doScan p :: Pos
p cs :: String
cs@(c :: Char
c:s :: String
s)
| Char -> Bool
isSymbol Char
c = String -> Pos -> Token
reserved [Char
c] Pos
p
Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan(Column -> Pos -> Pos
advc 1 Pos
p) String
s
| Char -> Bool
isIdStart Char
c Bool -> Bool -> Bool
|| Char -> Bool
isUpper Char
c
= let (name' :: String
name', p' :: Pos
p', s' :: String
s') = Pos -> String -> (String, Pos, String)
scanIdent (Column -> Pos -> Pos
advc 1 Pos
p) String
s
name :: String
name = Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
name'
tok :: Token
tok = if String -> Bool
iskw String
name
then String -> Pos -> Token
reserved String
name Pos
p
else if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
name' Bool -> Bool -> Bool
&& Char -> Bool
isSymbol Char
c
then String -> Pos -> Token
reserved [Char
c] Pos
p
else EnumValToken -> String -> Pos -> Token
valueToken (if Char -> Bool
isIdStart Char
c then EnumValToken
TkVarid else EnumValToken
TkConid) String
name Pos
p
in Token
tok Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan Pos
p' String
s'
| Char -> Bool
isOpsym Char
c = let (name :: String
name, s' :: String
s') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isOpsym String
cs
tok :: Token
tok | String -> Bool
isop String
name = String -> Pos -> Token
reserved String
name Pos
p
| Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==':' = EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkConOp String
name Pos
p
| Bool
otherwise = EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
TkOp String
name Pos
p
in Token
tok Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan ((Pos -> Char -> Pos) -> Pos -> String -> Pos
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl Pos -> Char -> Pos
adv Pos
p String
name) String
s'
| Char -> Bool
isDigit Char
c = let (tktype :: EnumValToken
tktype,number :: String
number,width :: Column
width,s' :: String
s') = String -> (EnumValToken, String, Column, String)
getNumber String
cs
in EnumValToken -> String -> Pos -> Token
valueToken EnumValToken
tktype String
number Pos
p Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Column -> Pos -> Pos
advc Column
width Pos
p) String
s'
| Bool
otherwise = String -> Pos -> Token
errToken ("Unexpected character " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c) Pos
p
Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: Pos -> String -> [Token]
doScan (Pos -> Char -> Pos
adv Pos
p Char
c) String
s
lexNest :: (Pos -> String -> [Token])
-> Pos
-> String
-> [Token]
lexNest :: (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest cont :: Pos -> String -> [Token]
cont pos :: Pos
pos inp :: String
inp = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
cont Pos
pos String
inp
where lexNest' :: (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' c :: Pos -> String -> [Token]
c p :: Pos
p ('-':'}':s :: String
s) = Pos -> String -> [Token]
c (Column -> Pos -> Pos
advc 2 Pos
p) String
s
lexNest' c :: Pos -> String -> [Token]
c p :: Pos
p ('{':'-':s :: String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' ((Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
c) (Column -> Pos -> Pos
advc 2 Pos
p) String
s
lexNest' c :: Pos -> String -> [Token]
c p :: Pos
p (x :: Char
x:s :: String
s) = (Pos -> String -> [Token]) -> Pos -> String -> [Token]
lexNest' Pos -> String -> [Token]
c (Pos -> Char -> Pos
adv Pos
p Char
x) String
s
lexNest' _ _ [] = [ String -> Pos -> Token
errToken "Unterminated nested comment" Pos
pos]
scanString :: String -> (String,Int,String)
scanString :: String -> (String, Column, String)
scanString [] = ("",0,[])
scanString ('\\':'&':xs :: String
xs) = let (str :: String
str,w :: Column
w,r :: String
r) = String -> (String, Column, String)
scanString String
xs
in (String
str,Column
wColumn -> Column -> Column
forall a. Num a => a -> a -> a
+2,String
r)
scanString ('\'':xs :: String
xs) = let (str :: String
str,w :: Column
w,r :: String
r) = String -> (String, Column, String)
scanString String
xs
in ('\''Char -> String -> String
forall a. a -> [a] -> [a]
: String
str,Column
wColumn -> Column -> Column
forall a. Num a => a -> a -> a
+1,String
r)
scanString xs :: String
xs = let (ch :: Maybe Char
ch,cw :: Column
cw,cr :: String
cr) = String -> (Maybe Char, Column, String)
getchar String
xs
(str :: String
str,w :: Column
w,r :: String
r) = String -> (String, Column, String)
scanString String
cr
str' :: String
str' = String -> (Char -> String) -> Maybe Char -> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe "" (Char -> String -> String
forall a. a -> [a] -> [a]
:String
str) Maybe Char
ch
in (String, Column, String)
-> (Char -> (String, Column, String))
-> Maybe Char
-> (String, Column, String)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ("",0,String
xs) (\c :: Char
c -> (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
str,Column
cwColumn -> Column -> Column
forall a. Num a => a -> a -> a
+Column
w,String
r)) Maybe Char
ch
scanChar :: [Char] -> (Maybe Char,Int,[Char])
scanChar :: String -> (Maybe Char, Column, String)
scanChar ('"' :xs :: String
xs) = (Char -> Maybe Char
forall a. a -> Maybe a
Just '"',1,String
xs)
scanChar xs :: String
xs = String -> (Maybe Char, Column, String)
getchar String
xs
getchar :: [Char] -> (Maybe Char,Int,[Char])
getchar :: String -> (Maybe Char, Column, String)
getchar [] = (Maybe Char
forall a. Maybe a
Nothing,0,[])
getchar s :: String
s@('\n':_ ) = (Maybe Char
forall a. Maybe a
Nothing,0,String
s )
getchar s :: String
s@('\t':_ ) = (Maybe Char
forall a. Maybe a
Nothing,0,String
s)
getchar s :: String
s@('\'':_ ) = (Maybe Char
forall a. Maybe a
Nothing,0,String
s)
getchar s :: String
s@('\"' :_ ) = (Maybe Char
forall a. Maybe a
Nothing,0,String
s)
getchar ('\\':xs :: String
xs) = let (c :: Maybe Char
c,l :: Column
l,r :: String
r) = String -> (Maybe Char, Column, String)
getEscChar String
xs
in (Maybe Char
c,Column
lColumn -> Column -> Column
forall a. Num a => a -> a -> a
+1,String
r)
getchar (x :: Char
x:xs :: String
xs) = (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x,1,String
xs)
getEscChar :: [Char] -> (Maybe Char,Int,[Char])
getEscChar :: String -> (Maybe Char, Column, String)
getEscChar [] = (Maybe Char
forall a. Maybe a
Nothing,0,[])
getEscChar s :: String
s@(x :: Char
x:xs :: String
xs) | Char -> Bool
isDigit Char
x = let (tp :: EnumValToken
tp,n :: String
n,len :: Column
len,rest :: String
rest) = String -> (EnumValToken, String, Column, String)
getNumber String
s
val :: Column
val = case EnumValToken
tp of
TkInteger8 -> Column -> String -> Column
readn 8 String
n
TkInteger16 -> Column -> String -> Column
readn 16 String
n
TkInteger10 -> Column -> String -> Column
readn 10 String
n
in if Column
val Column -> Column -> Bool
forall a. Ord a => a -> a -> Bool
>= 0 Bool -> Bool -> Bool
&& Column
val Column -> Column -> Bool
forall a. Ord a => a -> a -> Bool
<= 255
then (Char -> Maybe Char
forall a. a -> Maybe a
Just (Column -> Char
chr Column
val),Column
len, String
rest)
else (Maybe Char
forall a. Maybe a
Nothing,1,String
rest)
| Bool
otherwise = case Char
x Char -> [(Char, Char)] -> Maybe Char
forall a b. Eq a => a -> [(a, b)] -> Maybe b
`lookup` [(Char, Char)]
cntrChars of
Nothing -> (Maybe Char
forall a. Maybe a
Nothing,0,String
s)
Just c :: Char
c -> (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c,1,String
xs)
where cntrChars :: [(Char, Char)]
cntrChars = [('a','\a'),('b','\b'),('f','\f'),('n','\n'),('r','\r'),('t','\t')
,('v','\v'),('\\','\\'),('\"','\"'),('\'','\'')]
readn :: Int -> [Char] -> Int
readn :: Column -> String -> Column
readn base :: Column
base n :: String
n = (Column -> Char -> Column) -> Column -> String -> Column
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\r :: Column
r x :: Char
x -> Char -> Column
value Char
x Column -> Column -> Column
forall a. Num a => a -> a -> a
+ Column
base Column -> Column -> Column
forall a. Num a => a -> a -> a
* Column
r) 0 String
n
getNumber :: [Char] -> (EnumValToken,[Char],Int,[Char])
getNumber :: String -> (EnumValToken, String, Column, String)
getNumber cs :: String
cs@(c :: Char
c:s :: String
s)
| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '0' = (EnumValToken, String, Column, String)
num10
| String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
s = (EnumValToken, String, Column, String)
const0
| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'x' Bool -> Bool -> Bool
|| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'X' = (EnumValToken, String, Column, String)
num16
| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'o' Bool -> Bool -> Bool
|| Char
hs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== 'O' = (EnumValToken, String, Column, String)
num8
| Bool
otherwise = (EnumValToken, String, Column, String)
num10
where (hs :: Char
hs:ts :: String
ts) = String
s
const0 :: (EnumValToken, String, Column, String)
const0 = (EnumValToken
TkInteger10, "0",1,String
s)
num10 :: (EnumValToken, String, Column, String)
num10 = let (n :: String
n,r :: String
r) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
cs
in (EnumValToken
TkInteger10,String
n,String -> Column
forall (t :: * -> *) a. Foldable t => t a -> Column
length String
n,String
r)
num16 :: (EnumValToken, String, Column, String)
num16 = (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Column, String)
readNum Char -> Bool
isHexaDigit String
ts EnumValToken
TkInteger16
num8 :: (EnumValToken, String, Column, String)
num8 = (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Column, String)
readNum Char -> Bool
isOctalDigit String
ts EnumValToken
TkInteger8
readNum :: (Char -> Bool)
-> String -> EnumValToken -> (EnumValToken, String, Column, String)
readNum p :: Char -> Bool
p ts :: String
ts tk :: EnumValToken
tk
= let nrs :: (String, String)
nrs@(n :: String
n,rs :: String
rs) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
p String
ts
in if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
n then (EnumValToken, String, Column, String)
const0
else (EnumValToken
tk , String
n, 2Column -> Column -> Column
forall a. Num a => a -> a -> a
+String -> Column
forall (t :: * -> *) a. Foldable t => t a -> Column
length String
n,String
rs)
isHexaDigit :: Char -> Bool
isHexaDigit :: Char -> Bool
isHexaDigit d :: Char
d = Char -> Bool
isDigit Char
d Bool -> Bool -> Bool
|| (Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'A' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'F') Bool -> Bool -> Bool
|| (Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= 'a' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= 'f')
isOctalDigit :: Char -> Bool
isOctalDigit :: Char -> Bool
isOctalDigit d :: Char
d = Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= '0' Bool -> Bool -> Bool
&& Char
d Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '7'
value :: Char -> Int
value :: Char -> Column
value c :: Char
c | Char -> Bool
isDigit Char
c = Char -> Column
ord Char
c Column -> Column -> Column
forall a. Num a => a -> a -> a
- Char -> Column
ord '0'
| Char -> Bool
isUpper Char
c = Char -> Column
ord Char
c Column -> Column -> Column
forall a. Num a => a -> a -> a
- Char -> Column
ord 'A' Column -> Column -> Column
forall a. Num a => a -> a -> a
+ 10
| Char -> Bool
isLower Char
c = Char -> Column
ord Char
c Column -> Column -> Column
forall a. Num a => a -> a -> a
- Char -> Column
ord 'a' Column -> Column -> Column
forall a. Num a => a -> a -> a
+ 10