--- Modified: added parametrized containers for string mapping (reading) --- --- Implements the ReadWrite class, which is used to read and write data to files. Also contains predefined instances for some types. --- --- @author Lasse Züngel module MyFlatCurry.ReadWriteBaseContainer where import Data.Maybe import Data.List import Data.Map import System.IO import Text.Show import Prelude hiding (ShowS, showString, showChar, shows) import RW.Trie import Debug.Trace import Control.Search.Unsafe (oneValue) class ReadWrite a where readRW :: Container c => c -> String -> (a,String) showRW :: RWParameters -> RW.Trie.Trie String -> a -> (RW.Trie.Trie String, Text.Show.ShowS) writeRW :: RWParameters -> System.IO.Handle -> a -> RW.Trie.Trie String -> IO (RW.Trie.Trie String) -- Returns the type of the value. typeOf :: a -> RWType readListRW :: Container c => c -> String -> ([a],String) readListRW strs ('0' : cs) = ([],cs) readListRW strs ('1' : cs) = (x : xs,r2) where (x,r1) = readRW strs cs (xs,r2) = readListRW strs r1 showListRW :: RWParameters -> RW.Trie.Trie String -> [a] -> (RW.Trie.Trie String,ShowS) showListRW _ strs [] = (strs,showString "0") showListRW params strs (x : xs) = (strs'',showString "1" . x' . xs') where (strs',x') = showRW params strs x (strs'',xs') = showListRW params strs' xs writeListRW :: RWParameters -> System.IO.Handle -> [a] -> RW.Trie.Trie String -> IO (RW.Trie.Trie String) writeListRW _ h [] strs = System.IO.hPutStr h "0" >> return strs writeListRW params h (x : xs) strs = System.IO.hPutStr h "1" >> writeRW params h x strs >>= writeListRW params h xs instance ReadWrite Int where readRW _ cs = (readInt n,r) where (n,_ : r) = span (flip (/=) ';') cs showRW _ strs n = (strs,shows n . showString ";") writeRW _ h n strs = do System.IO.hPutStr h (show n ++ ";") >> return strs typeOf _ = monoRWType "Int" instance ReadWrite Float where readRW _ cs = (read n,r) where (n,_ : r) = span (flip (/=) ';') cs showRW _ strs n = (strs,shows n . showString ";") writeRW _ h n strs = do System.IO.hPutStr h (show n ++ ";") >> return strs typeOf _ = monoRWType "Float" instance ReadWrite a => ReadWrite [a] where readRW = readListRW showRW = showListRW writeRW = writeListRW typeOf n = RWType "[]" [typeOf $ get_a' n] where get_a' :: [a'] -> a' get_a' _ = failed instance ReadWrite Char where readRW _ (c : cs) | c /= '\\' = (c,cs) | otherwise = case cs of '"' : cs1 -> ('"',cs1) 'a' : cs1 -> ('\a',cs1) 'b' : cs1 -> ('\b',cs1) 't' : cs1 -> ('\t',cs1) 'n' : cs1 -> ('\n',cs1) 'v' : cs1 -> ('\v',cs1) 'f' : cs1 -> ('\f',cs1) 'r' : cs1 -> ('\r',cs1) '\\' : cs1 -> ('\\',cs1) _ -> error "Invalid escape sequence" showRW _ strs c = (strs,showString $ escapeChar c) writeRW _ h c strs = do System.IO.hPutStr h (escapeChar c) >> return strs readListRW strs cs = case index of [] -> readStubString r _ -> (Data.Maybe.fromJust $ contLookup index strs,r) where (index,r) = readStringId strs cs :: (String,String) readStubString cs = (str,cs') where (str,_ : cs') = span (flip (/=) '"') cs showListRW params strs str = (strs',index) where (strs',index) = writeString params strs str writeListRW params h str strs = System.IO.hPutStr h (index "") >> return strs' where (strs',index) = writeString params strs str typeOf _ = monoRWType "Char" instance ReadWrite () where readRW _ cs = ((),cs) showRW _ strs () = (strs,showString "") writeRW _ h () strs = return strs typeOf _ = monoRWType "()" instance (ReadWrite a,ReadWrite b) => ReadWrite (a,b) where readRW strs cs = ((x,y),r2) where (x,r1) = readRW strs cs (y,r2) = readRW strs r1 showRW params strs (x,y) = (strs'',x' . y') where (strs',x') = showRW params strs x (strs'',y') = showRW params strs' y writeRW params h (x,y) strs = writeRW params h x strs >>= writeRW params h y typeOf n = RWType "()" [typeOf $ get_a' n,typeOf $ get_b' n] where get_a' :: (a',b') -> a' get_a' _ = failed get_b' :: (a',b') -> b' get_b' _ = failed instance (ReadWrite a,ReadWrite b,ReadWrite c) => ReadWrite (a,b,c) where readRW strs cs = ((x,y,z),r3) where (x,r1) = readRW strs cs (y,r2) = readRW strs r1 (z,r3) = readRW strs r2 showRW params strs (x,y,z) = (strs''',x' . (y' . z')) where (strs',x') = showRW params strs x (strs'',y') = showRW params strs' y (strs''',z') = showRW params strs'' z writeRW params h (x,y,z) strs = (writeRW params h x strs >>= writeRW params h y) >>= writeRW params h z typeOf n = RWType "()" [typeOf $ get_a' n,typeOf $ get_b' n,typeOf $ get_c' n] where get_a' :: (a',b',c') -> a' get_a' _ = failed get_b' :: (a',b',c') -> b' get_b' _ = failed get_c' :: (a',b',c') -> c' get_c' _ = failed instance (ReadWrite a,ReadWrite b,ReadWrite c,ReadWrite d) => ReadWrite (a, b, c, d) where readRW strs cs = ((x,y,z,w),r4) where (x,r1) = readRW strs cs (y,r2) = readRW strs r1 (z,r3) = readRW strs r2 (w,r4) = readRW strs r3 showRW params strs (x,y,z,w) = (strs'''',x' . (y' . (z' . w'))) where (strs',x') = showRW params strs x (strs'',y') = showRW params strs' y (strs''',z') = showRW params strs'' z (strs'''',w') = showRW params strs''' w writeRW params h (x,y,z,w) strs = writeRW params h x strs >>= writeRW params h y >>= writeRW params h z >>= writeRW params h w typeOf n = RWType "()" [typeOf $ get_a' n,typeOf $ get_b' n,typeOf $ get_c' n,typeOf $ get_d' n] where get_a' :: (a',b',c',d') -> a' get_a' _ = failed get_b' :: (a',b',c',d') -> b' get_b' _ = failed get_c' :: (a',b',c',d') -> c' get_c' _ = failed get_d' :: (a',b',c',d') -> d' get_d' _ = failed ---------------------------------------------------------------------------------------------------- --- abstract string container -- Wrappers for different string containers (for reading). -- -- Wrapping the containers and using the Container class as an abstraction yields a 3% runtime slowdown, -- but it allows us to easily switch between different containers which is useful for testing and bench- -- marking. In production, one should usually use 'Trie String' directly instead of resorting to this -- abstraction. newtype ListString = ListString [(String,String)] idListString = 0 newtype MapString = MapString (Map String String) idMapString = 1 newtype TrieString = TrieString (Trie String) idTrieString = 2 --- Abstract string container class Container a where contEmpty :: a contInsert :: String -> String -> a -> a contLookup :: String -> a -> Maybe String contSize :: a -> Int contFromList :: [(String,String)] -> a contToList :: a -> [(String,String)] instance Container ListString where contEmpty = ListString [] contInsert k v (ListString m) = ListString ((k,v) : m) contLookup k (ListString m) = Prelude.lookup k m contSize (ListString m) = Prelude.length m contFromList xs = ListString xs contToList (ListString m) = m instance Container MapString where contEmpty = MapString Data.Map.empty contInsert k v (MapString m) = MapString (Data.Map.insert k v m) contLookup k (MapString m) = Data.Map.lookup k m contSize (MapString m) = Data.Map.size m contFromList xs = MapString (Data.Map.fromList xs) contToList (MapString m) = Data.Map.toList m instance Container TrieString where contEmpty = TrieString RW.Trie.empty contInsert k v (TrieString m) = TrieString (RW.Trie.insert k v m) contLookup k (TrieString m) = RW.Trie.lookup k m contSize (TrieString m) = RW.Trie.size m contFromList xs = TrieString (RW.Trie.fromList xs) contToList (TrieString m) = RW.Trie.toList m -- Reads an integer. readInt :: String -> Int readInt (c:cs) | c == '-' = -readInt' cs | otherwise = readInt' (c:cs) readInt' cs = foldl (\n c -> n * 10 + (ord c - ord '0')) 0 cs data RWParameters = RWParameters { minStrLen :: Int -- Minimum string length for a string to be considered a stub (-> not extracted) , alphabetLen :: Int -- Length of the alphabet , container :: Int -- id of the container to use. 0 = ListString, 1 = MapString, 2 = TrieString } -- Default RWParameters defaultParams :: RWParameters defaultParams = RWParameters 6 26 idTrieString -- Represents a type data RWType = RWType String [RWType] deriving Eq -- Creates a monomorphic type monoRWType :: String -> RWType monoRWType name = RWType name [] -- Pretty-prints a type ppType :: RWType -> String ppType (RWType name args) | args == [] = name | name == "[]" && length args == 1 = "[" ++ (ppType (head args) ++ "]") | name == "()" = "(" ++ intercalate ", " (map ppType' args) ++ ")" | otherwise = name ++ " " ++ unwords (map ppType' args) where ppType' x | isMonomorphic x = ppType x | bracketed x = ppType x | otherwise = "(" ++ (ppType x ++ ")") isMonomorphic x = case x of RWType _ [] -> True _ -> False bracketed t = case t of RWType "[]" [_] -> True RWType "()" _ -> True _ -> False -- Returns a type at a given position of a type. typeAt :: [Int] -> RWType -> RWType typeAt [] x = x typeAt (i : is) (RWType _ args) = typeAt is (args !! i) -- The coding lookupCoding :: Int -> Char lookupCoding x = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ !\"#$%&'()*+,-./:<=>?@[\\]^_`{|}~" !! x -- Converts an integer (index) to a string (coding). intToASCII :: Int -> Int -> String intToASCII lc n | n < lc = [lookupCoding n] | otherwise = intToASCII lc (div n lc) ++ [lookupCoding (mod n lc)] -- Shows a string. -- -- If the string is long, it is extracted and represented only once. Otherwise, it is inlined. writeString :: RWParameters -> RW.Trie.Trie String -> String -> (RW.Trie.Trie String, String -> String) writeString (RWParameters sLen aLen _) strs s | isStub s = (strs,showChar ';' . (showString s . showChar '"')) | otherwise = case RW.Trie.lookup s strs of Just i -> (strs,showString i . showChar ';') Nothing -> let coding = intToASCII aLen (RW.Trie.size strs) in (RW.Trie.insert s coding strs,showString coding . showChar ';') where isStub str = lengthBelow str sLen && ((not $ elem '"' str) && (not $ containsNewline str)) where lengthBelow [] n = n > 0 lengthBelow (c : cs) n = lengthBelow cs (n - 1) -- Char escaping escapeChar :: Char -> String escapeChar c = case c of '"' -> "\\\"" '\a' -> "\\a" '\b' -> "\\b" '\t' -> "\\t" '\n' -> "\\n" '\v' -> "\\v" '\f' -> "\\f" '\r' -> "\\r" '\\' -> "\\\\" _ -> [c] -- Special case: Carriage return escapeCarriageReturn :: String -> String escapeCarriageReturn [] = [] escapeCarriageReturn (c : cs) | c == '\r' = '\\' : ('r' : escapeCarriageReturn cs) | otherwise = c : escapeCarriageReturn cs -- Special case: Carriage return unescapeCarriageReturn :: String -> String unescapeCarriageReturn [] = [] unescapeCarriageReturn (c : cs) = case c of '\\' -> case cs of 'r' : cs' -> '\r' : unescapeCarriageReturn cs' _ -> c : unescapeCarriageReturn cs _ -> c : unescapeCarriageReturn cs -- Parses some input and returns the value. If the parse failed (e.g. due to a type mismatch), Nothing is returned. readData :: ReadWrite a => String -> Maybe a readData ls = let n@(sLen,_,t,_,_) = parseInput ls result = calc n in if ppType (typeOf (fst result)) == t then Just $ fst result else Nothing where calc (sLen,containerId,_,encoding,strings) = case containerId of 0 -> readRW (contFromList $ zip (map (intToASCII sLen) (enumFrom 0)) strings :: ListString) encoding 1 -> readRW (contFromList $ zip (map (intToASCII sLen) (enumFrom 0)) strings :: MapString) encoding 2 -> readRW (contFromList $ zip (map (intToASCII sLen) (enumFrom 0)) strings :: TrieString) encoding -- Writes some data to a file. writeDataFile :: ReadWrite a => String -> a -> IO () writeDataFile = writeDataFileP defaultParams writeDataFileP :: ReadWrite a => RWParameters -> String -> a -> IO () writeDataFileP params file x = do h <- System.IO.openFile file System.IO.WriteMode System.IO.hPutStr h (show (alphabetLen params) ++ " " ++ (show (container params)) ++ "\n") System.IO.hPutStr h (ppType (typeOf x) ++ "\n") written <- RW.Trie.toList <$> writeRW params h x (RW.Trie.empty) System.IO.hPutStr h "\n" let strs = keysOrdByVal written mapM_ (System.IO.hPutStr h) (map outputStr strs) System.IO.hClose h -- Takes the input and chops it into a parametrized layout "version" (alphabet length), the reading container, type, an encoding and a list of strings. parseInput :: String -> (Int, Int, String, String, [String]) parseInput s = (readInt sLen, readInt container, t, encoding, parseStrings strings) where (lenAndContainer, _ : sx) = break (flip (==) '\n') s sLen = head $ words lenAndContainer container = head $ tail $ words lenAndContainer (t, _ : s') = break (flip (==) '\n') sx (encoding,_ : strings) = break (flip (==) '\n') s' parseStrings xs = case xs of [] -> [] _ -> let (len,_ : xs') = break (flip (==) ';') xs in ifThenElse (Prelude.null len) (let (str,xs'') = span (flip (/=) '\n') xs' in str : parseStrings (drop 1 xs'')) (let (str,xs'') = splitAt (read len :: Int) xs' in unescapeCarriageReturn str : parseStrings xs'') -- Converts data to a string. -- -- This is rarely what you want. Use showData if you want to write the data to a file. showData :: ReadWrite a => a -> String showData = showDataP defaultParams showDataP :: ReadWrite a => RWParameters -> a -> String showDataP params x = (show (alphabetLen params) ++ " " ++ show (container params) ++ "\n") ++ (ppType (typeOf x) ++ "\n") ++ (l ++ "\n") ++ concatMap outputStr (keysOrdByVal ls) where (ls,l) = let (ls', l') = showRW params (RW.Trie.empty) x in (RW.Trie.toList ls', l' "") outputStr :: String -> String outputStr s | containsNewline s = let s' = escapeCarriageReturn s in show (length s') ++ (";" ++ s') | otherwise = ";" ++ (s ++ "\n") -- Checks if the string contains some kind of return character. containsNewline :: String -> Bool containsNewline s = elem '\n' s || elem '\r' s ordHex :: Ord b => (a,[b]) -> (c,[b]) -> Bool ordHex (_,a) (_,b) | length a < length b = True | length a > length b = False | otherwise = a < b keysOrdByVal :: [(String, String)] -> [String] keysOrdByVal m = map fst (Data.List.sortBy ordHex m) readStringId :: Container c => c -> String -> (String,String) readStringId strs (c : cs) | c == ';' = ([],cs) | otherwise = let (xs,r1) = readStringId strs cs in (c : xs,r1)