{-# LANGUAGE ForeignFunctionInterface, EmptyDataDecls #-}
{-# OPTIONS_GHC -fno-cse -fno-full-laziness #-}


-- | Low-level interface to Expat. Unless speed is paramount, this should
-- normally be avoided in favour of the interfaces provided by
-- 'Text.XML.Expat.SAX' and 'Text.XML.Expat.Tree', etc.
module Text.XML.Expat.Internal.IO (
  HParser,
  hexpatNewParser,
  encodingToString,
  Encoding(..),
  XMLParseError(..),
  XMLParseLocation(..)
  ) where

import Control.Applicative
import Control.DeepSeq
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as I
import Data.Int
import Data.Word
import Foreign
import Foreign.C


data Parser_struct
type ParserPtr = Ptr Parser_struct

data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString :: Encoding -> String
encodingToString Encoding
ASCII    = String
"US-ASCII"
encodingToString Encoding
UTF8     = String
"UTF-8"
encodingToString Encoding
UTF16    = String
"UTF-16"
encodingToString Encoding
ISO88591 = String
"ISO-8859-1"

withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding :: forall a. Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Maybe Encoding
Nothing    CString -> IO a
f = CString -> IO a
f CString
forall a. Ptr a
nullPtr
withOptEncoding (Just Encoding
enc) CString -> IO a
f = String -> (CString -> IO a) -> IO a
forall a. String -> (CString -> IO a) -> IO a
withCString (Encoding -> String
encodingToString Encoding
enc) CString -> IO a
f

-- ByteString.useAsCStringLen is almost what we need, but C2HS wants a CInt
-- instead of an Int.
withBStringLen :: B.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen :: forall a. ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen ByteString
bs (CString, CInt) -> IO a
f = do
  ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.useAsCStringLen ByteString
bs ((CStringLen -> IO a) -> IO a) -> (CStringLen -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \(CString
str, Int
len) -> (CString, CInt) -> IO a
f (CString
str, Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)

unStatus :: CInt -> Bool
unStatus :: CInt -> Bool
unStatus CInt
0 = Bool
False
unStatus CInt
_ = Bool
True

getError :: ParserPtr -> IO XMLParseError
getError :: ParserPtr -> IO XMLParseError
getError ParserPtr
pp = do
    code <- ParserPtr -> IO CInt
xmlGetErrorCode ParserPtr
pp
    cerr <- xmlErrorString code
    err <- peekCString cerr
    loc <- getParseLocation pp
    return $ XMLParseError err loc

-- |Obtain C value from Haskell 'Bool'.
--
cFromBool :: Num a => Bool -> a
cFromBool :: forall a. Num a => Bool -> a
cFromBool = Bool -> a
forall a. Num a => Bool -> a
fromBool

-- | Parse error, consisting of message text and error location
data XMLParseError = XMLParseError String XMLParseLocation deriving (XMLParseError -> XMLParseError -> Bool
(XMLParseError -> XMLParseError -> Bool)
-> (XMLParseError -> XMLParseError -> Bool) -> Eq XMLParseError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: XMLParseError -> XMLParseError -> Bool
== :: XMLParseError -> XMLParseError -> Bool
$c/= :: XMLParseError -> XMLParseError -> Bool
/= :: XMLParseError -> XMLParseError -> Bool
Eq, Int -> XMLParseError -> ShowS
[XMLParseError] -> ShowS
XMLParseError -> String
(Int -> XMLParseError -> ShowS)
-> (XMLParseError -> String)
-> ([XMLParseError] -> ShowS)
-> Show XMLParseError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XMLParseError -> ShowS
showsPrec :: Int -> XMLParseError -> ShowS
$cshow :: XMLParseError -> String
show :: XMLParseError -> String
$cshowList :: [XMLParseError] -> ShowS
showList :: [XMLParseError] -> ShowS
Show)

instance NFData XMLParseError where
    rnf :: XMLParseError -> ()
rnf (XMLParseError String
msg XMLParseLocation
loc) = (String, XMLParseLocation) -> ()
forall a. NFData a => a -> ()
rnf (String
msg, XMLParseLocation
loc)

-- | Specifies a location of an event within the input text
data XMLParseLocation = XMLParseLocation {
        XMLParseLocation -> Int64
xmlLineNumber   :: Int64,  -- ^ Line number of the event
        XMLParseLocation -> Int64
xmlColumnNumber :: Int64,  -- ^ Column number of the event
        XMLParseLocation -> Int64
xmlByteIndex    :: Int64,  -- ^ Byte index of event from start of document
        XMLParseLocation -> Int64
xmlByteCount    :: Int64   -- ^ The number of bytes in the event
    }
    deriving (XMLParseLocation -> XMLParseLocation -> Bool
(XMLParseLocation -> XMLParseLocation -> Bool)
-> (XMLParseLocation -> XMLParseLocation -> Bool)
-> Eq XMLParseLocation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: XMLParseLocation -> XMLParseLocation -> Bool
== :: XMLParseLocation -> XMLParseLocation -> Bool
$c/= :: XMLParseLocation -> XMLParseLocation -> Bool
/= :: XMLParseLocation -> XMLParseLocation -> Bool
Eq, Int -> XMLParseLocation -> ShowS
[XMLParseLocation] -> ShowS
XMLParseLocation -> String
(Int -> XMLParseLocation -> ShowS)
-> (XMLParseLocation -> String)
-> ([XMLParseLocation] -> ShowS)
-> Show XMLParseLocation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XMLParseLocation -> ShowS
showsPrec :: Int -> XMLParseLocation -> ShowS
$cshow :: XMLParseLocation -> String
show :: XMLParseLocation -> String
$cshowList :: [XMLParseLocation] -> ShowS
showList :: [XMLParseLocation] -> ShowS
Show)

instance NFData XMLParseLocation where
    rnf :: XMLParseLocation -> ()
rnf (XMLParseLocation Int64
lin Int64
col Int64
ind Int64
cou) = (Int64, Int64, Int64, Int64) -> ()
forall a. NFData a => a -> ()
rnf (Int64
lin, Int64
col, Int64
ind, Int64
cou)

getParseLocation :: ParserPtr -> IO XMLParseLocation
getParseLocation :: ParserPtr -> IO XMLParseLocation
getParseLocation ParserPtr
pp = do
    line <- ParserPtr -> IO CULong
xmlGetCurrentLineNumber ParserPtr
pp
    col <- xmlGetCurrentColumnNumber pp
    index <- xmlGetCurrentByteIndex pp
    count <- xmlGetCurrentByteCount pp
    return $ XMLParseLocation {
            xmlLineNumber = fromIntegral line,
            xmlColumnNumber = fromIntegral col,
            xmlByteIndex = fromIntegral index,
            xmlByteCount = fromIntegral count
        }

-- Note on word sizes:
--
-- on expat 2.0:
-- XML_GetCurrentLineNumber returns XML_Size
-- XML_GetCurrentColumnNumber returns XML_Size
-- XML_GetCurrentByteIndex returns XML_Index
-- These are defined in expat_external.h
--
-- debian-i386 says XML_Size and XML_Index are 4 bytes.
-- ubuntu-amd64 says XML_Size and XML_Index are 8 bytes.
-- These two systems do NOT define XML_LARGE_SIZE, which would force these types
-- to be 64-bit.
--
-- If we guess the word size too small, it shouldn't matter: We will just discard
-- the most significant part.  If we get the word size too large, we will get
-- garbage (very bad).
--
-- So - what I will do is use CLong and CULong, which correspond to what expat
-- is using when XML_LARGE_SIZE is disabled, and give the correct sizes on the
-- two machines mentioned above.  At the absolute worst the word size will be too
-- short.

foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
    :: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
    :: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
    :: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
    :: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
    :: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
    :: CInt -> IO CString

type HParser = B.ByteString -> Bool -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)

foreign import ccall unsafe "hexpatNewParser"
  _hexpatNewParser :: Ptr CChar -> CInt -> IO MyParserPtr

foreign import ccall unsafe "hexpatGetParser"
  _hexpatGetParser :: MyParserPtr -> ParserPtr

data MyParser_struct
type MyParserPtr = Ptr MyParser_struct

foreign import ccall "&hexpatFreeParser" hexpatFreeParser :: FunPtr (MyParserPtr -> IO ())

hexpatNewParser :: Maybe Encoding
                -> Maybe (B.ByteString -> Maybe B.ByteString)  -- ^ Entity decoder
                -> Bool        -- ^ Whether to include input locations
                -> IO (HParser, IO XMLParseLocation)
hexpatNewParser :: Maybe Encoding
-> Maybe (ByteString -> Maybe ByteString)
-> Bool
-> IO (HParser, IO XMLParseLocation)
hexpatNewParser Maybe Encoding
enc Maybe (ByteString -> Maybe ByteString)
mDecoder Bool
locations =
    Maybe Encoding
-> (CString -> IO (HParser, IO XMLParseLocation))
-> IO (HParser, IO XMLParseLocation)
forall a. Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Maybe Encoding
enc ((CString -> IO (HParser, IO XMLParseLocation))
 -> IO (HParser, IO XMLParseLocation))
-> (CString -> IO (HParser, IO XMLParseLocation))
-> IO (HParser, IO XMLParseLocation)
forall a b. (a -> b) -> a -> b
$ \CString
cEnc -> do
        parser <- FinalizerPtr MyParser_struct
-> Ptr MyParser_struct -> IO (ForeignPtr MyParser_struct)
forall a. FinalizerPtr a -> Ptr a -> IO (ForeignPtr a)
newForeignPtr FinalizerPtr MyParser_struct
hexpatFreeParser (Ptr MyParser_struct -> IO (ForeignPtr MyParser_struct))
-> IO (Ptr MyParser_struct) -> IO (ForeignPtr MyParser_struct)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< CString -> CInt -> IO (Ptr MyParser_struct)
_hexpatNewParser CString
cEnc (Bool -> CInt
forall a. Num a => Bool -> a
cFromBool Bool
locations)
        return (parse parser, withForeignPtr parser $ \Ptr MyParser_struct
mp -> ParserPtr -> IO XMLParseLocation
getParseLocation (ParserPtr -> IO XMLParseLocation)
-> ParserPtr -> IO XMLParseLocation
forall a b. (a -> b) -> a -> b
$ Ptr MyParser_struct -> ParserPtr
_hexpatGetParser Ptr MyParser_struct
mp)
  where
    parse :: ForeignPtr MyParser_struct -> HParser
parse ForeignPtr MyParser_struct
parser = case Maybe (ByteString -> Maybe ByteString)
mDecoder of
        Maybe (ByteString -> Maybe ByteString)
Nothing -> \ByteString
text Bool
final ->
            (Ptr (Ptr Word8)
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr (Ptr Word8)
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr (Ptr Word8)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr Word8)
ppData ->
            (Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr CInt
pLen ->
            ByteString
-> ((CString, CInt)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a. ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen ByteString
text (((CString, CInt)
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> ((CString, CInt)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \(CString
textBuf, CInt
textLen) ->
            ForeignPtr MyParser_struct
-> (Ptr MyParser_struct
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr MyParser_struct
parser ((Ptr MyParser_struct
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr MyParser_struct
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr MyParser_struct
pp -> do
                ok <- CInt -> Bool
unStatus (CInt -> Bool) -> IO CInt -> IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr MyParser_struct
-> CString
-> CInt
-> CInt
-> Ptr (Ptr Word8)
-> Ptr CInt
-> IO CInt
_hexpatParseUnsafe Ptr MyParser_struct
pp CString
textBuf CInt
textLen (Bool -> CInt
forall a. Num a => Bool -> a
cFromBool Bool
final) Ptr (Ptr Word8)
ppData Ptr CInt
pLen
                pData <- peek ppData
                len <- peek pLen
                err <- if ok
                    then return Nothing
                    else Just <$> getError (_hexpatGetParser pp)
                fpData <- newForeignPtr funPtrFree pData
                return (fpData, len, err)
        Just ByteString -> Maybe ByteString
decoder -> \ByteString
text Bool
final ->
            (Ptr (Ptr Word8)
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr (Ptr Word8)
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr (Ptr Word8)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr (Ptr Word8)
ppData ->
            (Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. Storable a => (Ptr a -> IO b) -> IO b
alloca ((Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr CInt -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr CInt
pLen ->
            ByteString
-> ((CString, CInt)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a. ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen ByteString
text (((CString, CInt)
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> ((CString, CInt)
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \(CString
textBuf, CInt
textLen) ->
            ForeignPtr MyParser_struct
-> (Ptr MyParser_struct
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr MyParser_struct
parser ((Ptr MyParser_struct
  -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
 -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> (Ptr MyParser_struct
    -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError))
-> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
forall a b. (a -> b) -> a -> b
$ \Ptr MyParser_struct
pp -> do
                eh <- CEntityHandler -> IO (FunPtr CEntityHandler)
mkCEntityHandler (CEntityHandler -> IO (FunPtr CEntityHandler))
-> ((ByteString -> Maybe ByteString) -> CEntityHandler)
-> (ByteString -> Maybe ByteString)
-> IO (FunPtr CEntityHandler)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Maybe ByteString) -> CEntityHandler
wrapCEntityHandler ((ByteString -> Maybe ByteString) -> IO (FunPtr CEntityHandler))
-> (ByteString -> Maybe ByteString) -> IO (FunPtr CEntityHandler)
forall a b. (a -> b) -> a -> b
$ ByteString -> Maybe ByteString
decoder
                _hexpatSetEntityHandler pp eh
                ok <- unStatus <$> _hexpatParseSafe pp textBuf textLen (cFromBool final) ppData pLen
                freeHaskellFunPtr eh
                pData <- peek ppData
                len <- peek pLen
                err <- if ok
                    then return Nothing
                    else Just <$> getError (_hexpatGetParser pp)
                fpData <- newForeignPtr funPtrFree pData
                return (fpData, len, err)

foreign import ccall unsafe "hexpatParse"
  _hexpatParseUnsafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt

foreign import ccall safe "hexpatParse"
  _hexpatParseSafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt

type CEntityHandler = Ptr CChar -> IO (Ptr CChar)

foreign import ccall safe "wrapper"
  mkCEntityHandler :: CEntityHandler
                   -> IO (FunPtr CEntityHandler)

peekByteStringLen :: CStringLen -> IO B.ByteString
{-# INLINE peekByteStringLen #-}
peekByteStringLen :: CStringLen -> IO ByteString
peekByteStringLen (CString
cstr, Int
len) =
    Int -> (Ptr Word8 -> IO ()) -> IO ByteString
I.create (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) ((Ptr Word8 -> IO ()) -> IO ByteString)
-> (Ptr Word8 -> IO ()) -> IO ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
        Ptr Word8 -> Ptr Word8 -> Int -> IO ()
I.memcpy Ptr Word8
ptr (CString -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr CString
cstr) (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)

wrapCEntityHandler :: (B.ByteString -> Maybe B.ByteString) -> CEntityHandler
wrapCEntityHandler :: (ByteString -> Maybe ByteString) -> CEntityHandler
wrapCEntityHandler ByteString -> Maybe ByteString
handler = CEntityHandler
h
  where
    h :: CEntityHandler
h CString
cname = do
        sz <- CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> Int) -> IO CSize -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CString -> IO CSize
I.c_strlen CString
cname
        name <- peekByteStringLen (cname, sz)
        case handler name of
            Just ByteString
text -> do
                let (ForeignPtr Word8
fp, Int
offset, Int
len) = ByteString -> (ForeignPtr Word8, Int, Int)
I.toForeignPtr ByteString
text
                ForeignPtr Word8 -> (Ptr Word8 -> IO CString) -> IO CString
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fp ((Ptr Word8 -> IO CString) -> IO CString)
-> (Ptr Word8 -> IO CString) -> IO CString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ctextBS -> do
                    ctext <- Int -> IO CString
forall a. Int -> IO (Ptr a)
mallocBytes (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) :: IO CString
                    I.memcpy (castPtr ctext) (ctextBS `plusPtr` offset) (fromIntegral len)
                    poke (ctext `plusPtr` len) (0 :: CChar)
                    return ctext
            Maybe ByteString
Nothing -> CEntityHandler
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return CString
forall a. Ptr a
nullPtr

foreign import ccall unsafe "hexpatSetEntityHandler"
  _hexpatSetEntityHandler :: MyParserPtr -> FunPtr CEntityHandler -> IO ()

foreign import ccall "&free" funPtrFree :: FunPtr (Ptr Word8 -> IO ())