implementation module Text.XML import StdArray, StdBool, StdInt, StdList, StdTuple, StdGeneric, StdFunc import Data.Error, Data.Void, Data.Either, Data.Maybe, Text, Text.ParserCombinators, GenEq uname :: !String -> XMLQName uname name = XMLQName Nothing name qname :: !XMLNamespacePrefix !String -> XMLQName qname namespace name = XMLQName (Just namespace) name addNamespaces :: !(Maybe XMLURI) [(!XMLNamespacePrefix,!String)] !XMLNode -> XMLNode addNamespaces mbDefaultNamespace namespaces (XMLElem qname attrs children) # ns = map (\(prefix,uri) -> XMLAttr (XMLQName (Just "xmlns") prefix) uri) namespaces # ns = case mbDefaultNamespace of Nothing = ns Just defaultNamespace = [XMLAttr (XMLQName Nothing "xmlns") defaultNamespace:ns] = (XMLElem qname (ns ++ attrs) children) docSize :: !XMLDoc -> Int docSize (XMLDoc defaultNamespace namespaces documentElement) # documentElement = addNamespaces defaultNamespace namespaces documentElement = 37 + nodeSize documentElement nodeSize :: !XMLNode -> Int nodeSize (XMLText text) = escapedSize text nodeSize (XMLElem qname attrs children) # attrsSize = sum (map attrSize attrs) + length attrs = if (isEmpty children) (3 + qnameSize qname + attrsSize) (5 + 2 * qnameSize qname + attrsSize + sum (map nodeSize children)) attrSize :: !XMLAttr -> Int attrSize (XMLAttr qname value) = 3 + qnameSize qname + escapedSize value qnameSize :: !XMLQName -> Int qnameSize (XMLQName Nothing name) = size name qnameSize (XMLQName (Just ns) name) = 1 + size ns + size name //Calculates the number of chars in a string when xml special characters are escaped escapedSize :: !{#Char} -> Int escapedSize s = escapedSize` s (size s) 0 where escapedSize` s n i | i == n = 0 | s.[i] == '<' = 4 + escapedSize` s n (i + 1) | s.[i] == '>' = 4 + escapedSize` s n (i + 1) | s.[i] == '&' = 5 + escapedSize` s n (i + 1) | otherwise = 1 + escapedSize` s n (i + 1) serializeDoc :: !XMLDoc !*{#Char} !Int -> (!*{#Char}, !Int) serializeDoc (XMLDoc defaultNamespace namespaces documentElement) dest dest_i # documentElement = addNamespaces defaultNamespace namespaces documentElement # (dest,dest_i) = copyChars "" 0 False dest dest_i = serializeNode documentElement dest dest_i serializeNode :: !XMLNode !*{#Char} !Int -> (!*{#Char}, !Int) serializeNode (XMLText text) dest dest_i = copyChars text 0 True dest dest_i serializeNode (XMLElem qname attrs []) dest dest_i # dest = {dest & [dest_i] = '<'} # dest_i = dest_i + 1 # (dest,dest_i) = serializeQName qname dest dest_i # (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i # dest = {dest & [dest_i] = '/'} # dest_i = dest_i + 1 # dest = {dest & [dest_i] = '>'} = (dest,dest_i + 1) serializeNode (XMLElem qname attrs children) dest dest_i # dest = {dest & [dest_i] = '<'} # dest_i = dest_i + 1 # (dest,dest_i) = serializeQName qname dest dest_i # (dest,dest_i) = serializeMap serializeAttr attrs dest dest_i # dest = {dest & [dest_i] = '>'} # dest_i = dest_i + 1 # (dest,dest_i) = serializeMap serializeNode children dest dest_i # dest = {dest & [dest_i] = '<'} # dest_i = dest_i + 1 # dest = {dest & [dest_i] = '/'} # dest_i = dest_i + 1 # (dest,dest_i) = serializeQName qname dest dest_i # dest = {dest & [dest_i] = '>'} = (dest,dest_i + 1) serializeMap f [] dest dest_i = (dest, dest_i) serializeMap f [x:xs] dest dest_i # (dest, dest_i) = f x dest dest_i = serializeMap f xs dest dest_i serializeAttr :: !XMLAttr !*{#Char} !Int -> (!*{#Char}, !Int) serializeAttr (XMLAttr qname value) dest dest_i # dest = {dest & [dest_i] = ' '} # dest_i = dest_i + 1 # (dest,dest_i) = serializeQName qname dest dest_i # dest = {dest & [dest_i] = '='} # dest_i = dest_i + 1 # dest = {dest & [dest_i] = '"'} # dest_i = dest_i + 1 # (dest,dest_i) = copyChars value 0 True dest dest_i # dest = {dest & [dest_i] = '"'} # dest_i = dest_i + 1 = (dest,dest_i) serializeQName :: !XMLQName !*{#Char} !Int -> (!*{#Char}, !Int) serializeQName (XMLQName Nothing name) dest dest_i = copyChars name 0 False dest dest_i serializeQName (XMLQName (Just ns) name) dest dest_i # (dest, dest_i) = copyChars ns 0 False dest dest_i # dest = {dest & [dest_i] = ':'} # dest_i = dest_i + 1 = copyChars name 0 False dest dest_i copyChars :: !{#Char} !Int !Bool !*{#Char} !Int -> (!*{#Char},!Int) copyChars src src_i escape dest dest_i | src_i == (size src) = (dest, dest_i) | otherwise | escape && (src.[src_i] == '<') # dest = {dest & [dest_i] = '&', [dest_i + 1] = 'l', [dest_i + 2] = 't', [dest_i + 3] = ';'} = copyChars src (src_i + 1) escape dest (dest_i + 4) | escape && (src.[src_i] == '>') # dest = {dest & [dest_i] = '&', [dest_i + 1] = 'g', [dest_i + 2] = 't', [dest_i + 3] = ';'} = copyChars src (src_i + 1) escape dest (dest_i + 4) | escape && (src.[src_i] == '&') # dest = {dest & [dest_i] = '&', [dest_i + 1] = 'a', [dest_i + 2] = 'm', [dest_i + 3] = 'p', [dest_i + 4] = ';'} = copyChars src (src_i + 1) escape dest (dest_i + 5) | otherwise # dest = {dest & [dest_i] = src.[src_i]} = copyChars src (src_i + 1) escape dest (dest_i + 1) instance toString XMLDoc where toString doc # docsize = docSize doc # docstring = createArray docsize '\0' # (docstring,_) = serializeDoc doc docstring 0 = docstring instance fromString (MaybeErrorString XMLDoc) where fromString xmlStr # tokens = lex xmlStr 0 [] | isError tokens = liftError tokens # xmlDoc = pXMLDoc (fromOk tokens) | isEmpty xmlDoc = Error "parse error" = Ok (snd (hd xmlDoc)) //Token type which is the intermediary representation during XML parsing :: Token = TokenAttrValue !String | TokenCharData !String | TokenName !String | TokenStartTagOpen | TokenTagClose | TokenEmptyTagClose | TokenEndTagOpen | TokenDeclarationStart | TokenDeclarationEnd | TokenEqual derive gEq Token instance == Token where (==) a b = a === b isName (TokenName _) = True isName _ = False isCharData (TokenCharData _) = True isCharData _ = False isAttrValue (TokenAttrValue _) = True isAttrValue _ = False :: LexFunctionResult = Token !Int !Token | NoToken !Int | Fail !String :: LexFunction :== String Int -> Maybe LexFunctionResult lex :: !String !Int ![Token] -> MaybeErrorString [Token] lex input offset tokens | offset >= size input = Ok (reverse tokens) //Done | dataMode tokens && isJust charDataResult = processResult (fromJust charDataResult) | otherwise = processResult (lexAny input offset lexFunctions) where lexFunctions = [ lexWhitespace , lexDeclarationStart , lexDeclarationEnd , lexEmptyTagClose , lexTagClose , lexEndTagOpen , lexStartTagOpen , lexEqual , lexAttrValue , lexName ] dataMode [TokenTagClose:_] = True dataMode [TokenEmptyTagClose:_] = True dataMode _ = False charDataResult = lexCharData input offset processResult r = case r of Token offset token = lex input offset [token:tokens] //Lex another token and do recursive call NoToken offset = lex input offset tokens Fail err = Error err //Try any of the lexers in the list until one succeeds lexAny :: !String !Int ![LexFunction] -> LexFunctionResult lexAny input offset [] = Fail ("invalid input character: '" +++ toString input.[offset] +++ "'") lexAny input offset [f:fs] = case f input offset of Just result = result Nothing = lexAny input offset fs lexEqual = lexFixed "=" TokenEqual lexDeclarationEnd = lexFixed "?>" TokenDeclarationEnd lexEndTagOpen = lexFixed "" TokenEmptyTagClose lexTagClose = lexFixed ">" TokenTagClose lexDeclarationStart input offset = case lexFixed " "" = Just (Token end (TokenCharData data)) | otherwise = Nothing | otherwise = Nothing where end = findEnd isTextChar input (offset + 1) isTextChar c = c <> '<' && c <> '&' //Names lexName input offset | isNameStartChar input.[offset] = Just (Token end (TokenName (input % (offset, end - 1)))) | otherwise = Nothing where end = findEnd isNameChar input (offset + 1) isNameStartChar c | c == ':' || c == '_' = True | c >= 'a' && c <= 'z' = True | c >= 'A' && c <= 'Z' = True | otherwise = False isNameChar c | isNameStartChar c = True | c == '-' || c == '.' = True | c >= '0' && c <= '9' = True | otherwise = False //AttrValue lexAttrValue input offset | input.[offset] <> '"' = Nothing = Just (Token end (TokenAttrValue (input % (offset + 1, end - 2)))) where end = findAttrValueEnd input (offset + 1) findAttrValueEnd input offset | offset >= size input = offset | input.[offset] == '"' = offset + 1 | otherwise = findAttrValueEnd input (offset + 1) lexWhitespace input offset | last == offset = Nothing = Just (NoToken last) where last = findEnd isWhitespace input offset isWhitespace '\x20' = True isWhitespace '\x9' = True isWhitespace '\xD' = True isWhitespace '\xA' = True isWhitespace _ = False //Lex token of fixed size lexFixed chars token input offset | input % (offset,offset + (size chars) - 1) == chars = Just (Token (offset + size chars) token) = Nothing //Find the first offset where the predicate no longer holds findEnd pred input offset | offset >= size input = offset | pred input.[offset] = findEnd pred input (offset + 1) = offset pXMLDoc :: Parser Token XMLDoc pXMLDoc = begin1 pXMLDoc` where pXMLDoc` = mkXMLDoc @> (pDocDeclaration <|> yield [] )-&+ pElem mkXMLDoc (XMLElem name attributes elements) = XMLDoc mbURI namespaces (XMLElem name attrs elements) where (mbURI,namespaces,attrs) = filterNamespaces attributes (Nothing,[],[]) filterNamespaces [] acc = acc filterNamespaces [attr=:(XMLAttr name val):rest] (mbURI,namespaces,attrs) # acc = case name of XMLQName Nothing "xmlns" = (Just val,namespaces,attrs) XMLQName (Just "xmlns") ns = (mbURI,[(ns,val):namespaces],attrs) _ = (mbURI,namespaces,[attr:attrs]) = filterNamespaces rest acc pDocDeclaration = symbol TokenDeclarationStart &> (<+?> pAttr) <& symbol TokenDeclarationEnd pNode = pCharData <@ (\d -> XMLText d) pElem pElem = pElemCont pElemEmpty pElemCont = pElemStart <&> (\(name,attributes) -> symbol TokenTagClose &> (<*?> pNode) <& pElemContEnd >?< ((==) name) <@ (\nodes -> XMLElem (toQName name) attributes nodes)) pElemEmpty = pElemStart <& symbol TokenEmptyTagClose <@ (\(name,attributes) -> XMLElem (toQName name) attributes []) pElemStart = (\name attributes -> (name,attributes)) @> symbol TokenStartTagOpen -&+ pName +&+ (<*?> pAttr) pElemContEnd = symbol TokenEndTagOpen &> pName <& symbol TokenTagClose pAttr = (\name v -> XMLAttr (toQName name) v) @> pName +&- symbol TokenEqual +&+ pAttrValue pName = satisfy isName <@ (\(TokenName n) -> n) pAttrValue = satisfy isAttrValue <@ (\(TokenAttrValue v) -> v) pCharData = satisfy isCharData <@ (\(TokenCharData d) -> d) toQName :: !String -> XMLQName toQName name | colonIdx > 0 = qname (subString 0 colonIdx name) (subString (colonIdx + 1) (textSize name - colonIdx) name) | otherwise = uname name where colonIdx = indexOf ":" name // generic printer toXML :: !a -> XMLDoc | XMLEncode{|*|} a toXML a = XMLDoc Nothing [] (wrapToElem (XMLEncode{|*|} a)) toXMLString :: !a -> String | XMLEncode{|*|} a toXMLString a = toString (toXML a) :: XMLEncodeResult = XMLEncElem !(!XMLQName,![XMLAttr],![XMLNode]) | XMLEncText !(!String,!XMLQName) | XMLEncNodes ![XMLNode] !XMLQName | XMLEncNothing generic XMLEncode a :: !a -> XMLEncodeResult XMLEncode{|OBJECT|} fx (OBJECT o) = fx o XMLEncode{|CONS of d|} fx (CONS c) # nodes = getNodes (fx c) # name = uname (formatConsName d.gcd_name) | d.gcd_type_def.gtd_num_conses > 1 = XMLEncElem (name,[],nodes) | otherwise = XMLEncNodes nodes name where nonEmpty (XMLElem _ _ []) = False nonEmpty _ = True formatConsName name | startsWith "_" name = subString 1 (textSize name - 1) name | otherwise = name XMLEncode{|RECORD of d|} fx (RECORD c) # nodes = getNodes (fx c) # name = uname (formatConsName d.grd_name) | not (isEmpty d.grd_fields) = XMLEncNodes (filter nonEmpty nodes) name | otherwise = XMLEncNodes nodes name where nonEmpty (XMLElem _ _ []) = False nonEmpty _ = True formatConsName name | startsWith "_" name = subString 1 (textSize name - 1) name | otherwise = name XMLEncode{|FIELD of d|} fx (FIELD f) = XMLEncElem (uname d.gfd_name,[],getNodes (fx f)) XMLEncode{|EITHER|} fx fy either = case either of LEFT x = fx x RIGHT y = fy y XMLEncode{|PAIR|} fx fy (PAIR x y) = XMLEncNodes (getNodes` (fx x) ++ getNodes` (fy y)) (uname "PAIR") where getNodes` (XMLEncNodes nodes _) = nodes getNodes` res = [wrapToElem res] XMLEncode{|UNIT|} _ = XMLEncNodes [] (uname "UNIT") XMLEncode{|Int|} i = basicXML "integer" i XMLEncode{|Char|} c = basicXML "character" c XMLEncode{|Real|} r = basicXML "float" r XMLEncode{|String|} s = basicXML "string" s XMLEncode{|Bool|} b = basicXML "boolean" b basicXML name v = XMLEncText (toString v,uname name) XMLEncode{|[]|} fx list = XMLEncNodes (map (wrapToElem o fx) list) (uname "list") XMLEncode{|Maybe|} fx (Just x) = fx x XMLEncode{|Maybe|} _ Nothing = XMLEncNothing XMLEncode{|XMLIntAttribute|} fx (XMLIntAttribute name v x) = encodeAttr name v (fx x) XMLEncode{|XMLCharAttribute|} fx (XMLCharAttribute name v x) = encodeAttr name v (fx x) XMLEncode{|XMLRealAttribute|} fx (XMLRealAttribute name v x) = encodeAttr name v (fx x) XMLEncode{|XMLStringAttribute|} fx (XMLStringAttribute name v x) = encodeAttr name v (fx x) XMLEncode{|XMLBoolAttribute|} fx (XMLBoolAttribute name v x) = encodeAttr name v (fx x) encodeAttr name a x = XMLEncElem (fromElem (wrapToElemAttr x [XMLAttr name (toString a)])) derive XMLEncode Void, Either, (,), (,,), (,,,) // auxiliary functions wrapToElem :: !XMLEncodeResult -> XMLNode wrapToElem x = wrapToElemAttr x [] wrapToElemAttr :: !XMLEncodeResult ![XMLAttr] -> XMLNode wrapToElemAttr (XMLEncElem (name,attr,nodes)) attr` = XMLElem name (attr ++ attr`) nodes wrapToElemAttr (XMLEncText t=:(txt,name)) attr` = XMLElem name attr` [toText t] wrapToElemAttr (XMLEncNodes nodes wname) attr` = XMLElem wname attr` nodes wrapToElemAttr XMLEncNothing attr` = XMLElem (uname "nothing") attr` [] toElem :: !(!XMLQName,![XMLAttr],![XMLNode]) -> XMLNode toElem (name,attr,nodes) = XMLElem name attr nodes fromElem :: !XMLNode -> (!XMLQName,![XMLAttr],![XMLNode]) fromElem (XMLElem name attr nodes) = (name,attr,nodes) toText :: !(!String,!XMLQName) -> XMLNode toText (txt,_) = XMLText txt getNodes :: !XMLEncodeResult -> [XMLNode] getNodes (XMLEncElem elem) = [toElem elem] getNodes (XMLEncText txt) = [toText txt] getNodes (XMLEncNodes nodes _) = nodes getNodes XMLEncNothing = []