Portability | untested |
---|---|
Stability | experimental |
Maintainer | twanvl@gmail.com |
Safe Haskell | None |
Data.CompactString
Contents
- The
CompactString
type - Introducing and eliminating
CompactString
s - Basic interface
- Transforming
CompactString
s - Reducing
CompactString
s (folds) - Building CompactStrings
- Substrings
- Predicates
- Searching CompactStrings
- Indexing CompactStrings
- Zipping and unzipping CompactStrings
- Ordered CompactStrings
- Encoding
- I/O with
CompactString
s
Description
A time and space-efficient implementation of strings using packed Word8 arrays, suitable for high performance use, both in terms of large data quantities, or high speed requirements.
This module is intended to be imported qualified
, to avoid name
clashes with Prelude functions. eg.
import qualified Data.CompactString as C
Internally, CompactStrings are encoded ByteString
s.
- class Encoding a
- data CompactString a
- empty :: CompactString a
- singleton :: Encoding a => Char -> CompactString a
- pack :: Encoding a => String -> CompactString a
- unpack :: Encoding a => CompactString a -> String
- cons :: Encoding a => Char -> CompactString a -> CompactString a
- snoc :: Encoding a => CompactString a -> Char -> CompactString a
- append :: Encoding a => CompactString a -> CompactString a -> CompactString a
- head :: Encoding a => CompactString a -> Char
- last :: Encoding a => CompactString a -> Char
- tail :: Encoding a => CompactString a -> CompactString a
- init :: Encoding a => CompactString a -> CompactString a
- headView :: Encoding a => CompactString a -> Maybe (Char, CompactString a)
- lastView :: Encoding a => CompactString a -> Maybe (CompactString a, Char)
- null :: Encoding a => CompactString a -> Bool
- length :: Encoding a => CompactString a -> Int
- map :: Encoding a => (Char -> Char) -> CompactString a -> CompactString a
- reverse :: Encoding a => CompactString a -> CompactString a
- intersperse :: Encoding a => Char -> CompactString a -> CompactString a
- intercalate :: Encoding a => CompactString a -> [CompactString a] -> CompactString a
- transpose :: Encoding a => [CompactString a] -> [CompactString a]
- foldl :: Encoding a => (acc -> Char -> acc) -> acc -> CompactString a -> acc
- foldl' :: Encoding a => (acc -> Char -> acc) -> acc -> CompactString a -> acc
- foldl1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
- foldl1' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
- foldr :: Encoding a => (Char -> acc -> acc) -> acc -> CompactString a -> acc
- foldr' :: Encoding a => (Char -> acc -> acc) -> acc -> CompactString a -> acc
- foldr1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
- foldr1' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
- concat :: Encoding a => [CompactString a] -> CompactString a
- concatMap :: Encoding a => (Char -> CompactString a) -> CompactString a -> CompactString a
- any :: Encoding a => (Char -> Bool) -> CompactString a -> Bool
- all :: Encoding a => (Char -> Bool) -> CompactString a -> Bool
- maximum :: Encoding a => CompactString a -> Char
- minimum :: Encoding a => CompactString a -> Char
- scanl :: Encoding a => (Char -> Char -> Char) -> Char -> CompactString a -> CompactString a
- scanl1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a
- scanr :: Encoding a => (Char -> Char -> Char) -> Char -> CompactString a -> CompactString a
- scanr1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a
- mapAccumL :: Encoding a => (acc -> Char -> (acc, Char)) -> acc -> CompactString a -> (acc, CompactString a)
- mapAccumR :: Encoding a => (acc -> Char -> (acc, Char)) -> acc -> CompactString a -> (acc, CompactString a)
- mapIndexed :: Encoding a => (Int -> Char -> Char) -> CompactString a -> CompactString a
- replicate :: Encoding a => Int -> Char -> CompactString a
- unfoldr :: Encoding a => (acc -> Maybe (Char, acc)) -> acc -> CompactString a
- unfoldrN :: Encoding a => Int -> (acc -> Maybe (Char, acc)) -> acc -> (CompactString a, Maybe acc)
- take :: Encoding a => Int -> CompactString a -> CompactString a
- drop :: Encoding a => Int -> CompactString a -> CompactString a
- splitAt :: Encoding a => Int -> CompactString a -> (CompactString a, CompactString a)
- takeWhile :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
- dropWhile :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
- span :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
- spanEnd :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
- break :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
- breakEnd :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
- group :: Encoding a => CompactString a -> [CompactString a]
- groupBy :: Encoding a => (Char -> Char -> Bool) -> CompactString a -> [CompactString a]
- inits :: Encoding a => CompactString a -> [CompactString a]
- tails :: Encoding a => CompactString a -> [CompactString a]
- split :: Encoding a => Char -> CompactString a -> [CompactString a]
- splitWith :: Encoding a => (Char -> Bool) -> CompactString a -> [CompactString a]
- lines :: Encoding a => CompactString a -> [CompactString a]
- words :: Encoding a => CompactString a -> [CompactString a]
- unlines :: Encoding a => [CompactString a] -> CompactString a
- unwords :: Encoding a => [CompactString a] -> CompactString a
- isPrefixOf :: CompactString a -> CompactString a -> Bool
- isSuffixOf :: Encoding a => CompactString a -> CompactString a -> Bool
- isInfixOf :: Encoding a => CompactString a -> CompactString a -> Bool
- findSubstring :: Encoding a => CompactString a -> CompactString a -> Maybe Int
- findSubstrings :: Encoding a => CompactString a -> CompactString a -> [Int]
- elem :: Encoding a => Char -> CompactString a -> Bool
- notElem :: Encoding a => Char -> CompactString a -> Bool
- find :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Char
- filter :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
- partition :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
- index :: Encoding a => CompactString a -> Int -> Char
- elemIndex :: Encoding a => Char -> CompactString a -> Maybe Int
- elemIndices :: Encoding a => Char -> CompactString a -> [Int]
- elemIndexEnd :: Encoding a => Char -> CompactString a -> Maybe Int
- findIndex :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Int
- findIndexEnd :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Int
- findIndices :: Encoding a => (Char -> Bool) -> CompactString a -> [Int]
- count :: Encoding a => Char -> CompactString a -> Int
- zip :: Encoding a => CompactString a -> CompactString a -> [(Char, Char)]
- zipWith :: Encoding a => (Char -> Char -> b) -> CompactString a -> CompactString a -> [b]
- zipWith' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a -> CompactString a
- unzip :: Encoding a => [(Char, Char)] -> (CompactString a, CompactString a)
- sort :: Encoding a => CompactString a -> CompactString a
- compare' :: (Encoding a, Encoding b) => CompactString a -> CompactString b -> Ordering
- toByteString :: Encoding a => CompactString a -> ByteString
- fromByteString :: (Encoding a, MonadPlus m) => ByteString -> m (CompactString a)
- fromByteString_ :: Encoding a => ByteString -> CompactString a
- validate :: (Encoding a, MonadPlus m) => CompactString a -> m (CompactString a)
- validate_ :: Encoding a => CompactString a -> CompactString a
- module Data.CompactString.Encodings
- recode :: (Encoding a, Encoding b, MonadPlus m) => CompactString a -> m (CompactString b)
- recode_ :: (Encoding a, Encoding b) => CompactString a -> CompactString b
- encode :: (Encoding a, Encoding e, MonadPlus m) => e -> CompactString a -> m ByteString
- encode_ :: (Encoding a, Encoding e) => e -> CompactString a -> ByteString
- decode :: (Encoding a, Encoding e, MonadPlus m) => e -> ByteString -> m (CompactString a)
- decode_ :: (Encoding a, Encoding e) => e -> ByteString -> CompactString a
- encodeBOM :: (Encoding a, Encoding e, MonadPlus m) => e -> CompactString a -> m ByteString
- encodeBOM_ :: (Encoding a, Encoding e) => e -> CompactString a -> ByteString
- decodeBOM :: (Encoding a, MonadPlus m) => ByteString -> m (CompactString a)
- decodeBOM_ :: Encoding a => ByteString -> CompactString a
- getLine :: Encoding a => IO (CompactString a)
- getContents :: Encoding a => IO (CompactString a)
- putStr :: Encoding a => CompactString a -> IO ()
- putStrLn :: Encoding a => CompactString a -> IO ()
- interact :: Encoding a => (CompactString a -> CompactString a) -> IO ()
- readFile :: Encoding a => FilePath -> IO (CompactString a)
- readFile' :: Encoding a => FilePath -> IO (CompactString a)
- writeFile :: Encoding a => FilePath -> CompactString a -> IO ()
- writeFile' :: Encoding a => FilePath -> CompactString a -> IO ()
- appendFile :: Encoding a => FilePath -> CompactString a -> IO ()
- appendFile' :: Encoding a => FilePath -> CompactString a -> IO ()
- hGetLine :: Encoding a => Handle -> IO (CompactString a)
- hGetContents :: Encoding a => Handle -> IO (CompactString a)
- hGetContents' :: Encoding a => Handle -> IO (CompactString a)
- hGet :: Encoding a => Handle -> Int -> IO (CompactString a)
- hGetNonBlocking :: Encoding a => Handle -> Int -> IO (CompactString a)
- hPut :: Encoding a => Handle -> CompactString a -> IO ()
- hPutStr :: Encoding a => Handle -> CompactString a -> IO ()
- hPutStrLn :: Encoding a => Handle -> CompactString a -> IO ()
The CompactString
type
class Encoding a
A way to encode characters into bytes
data CompactString a
A String using a compact, strict representation.
A CompactString a
is encoded using encoding a
, for example CompactString
.
UTF8
Instances
Encoding a => Eq (CompactString a) | |
(Eq (CompactString a), Encoding a) => Ord (CompactString a) | |
Encoding a => Show (CompactString a) | |
Encoding a => IsString (CompactString a) | |
Encoding a => Monoid (CompactString a) |
Introducing and eliminating CompactString
s
empty :: CompactString a
O(1) The empty CompactString
singleton :: Encoding a => Char -> CompactString a
O(1) Convert a Char
into a CompactString
pack :: Encoding a => String -> CompactString a
O(n) Convert a String
into a CompactString
.
unpack :: Encoding a => CompactString a -> String
O(n) Converts a CompactString
to a String
.
Basic interface
cons :: Encoding a => Char -> CompactString a -> CompactString a
O(n) cons
is analogous to (:) for lists, but of different
complexity, as it requires a memcpy.
snoc :: Encoding a => CompactString a -> Char -> CompactString a
O(n) Append a byte to the end of a CompactString
append :: Encoding a => CompactString a -> CompactString a -> CompactString a
O(n) Append two CompactStrings
head :: Encoding a => CompactString a -> Char
O(1) Extract the first element of a CompactString, which must be non-empty. An exception will be thrown in the case of an empty CompactString.
last :: Encoding a => CompactString a -> Char
O(1) Extract the last element of a ByteString, which must be finite and non-empty. An exception will be thrown in the case of an empty ByteString.
tail :: Encoding a => CompactString a -> CompactString a
O(1) Extract the elements after the head of a CompactString, which must be non-empty. An exception will be thrown in the case of an empty CompactString.
init :: Encoding a => CompactString a -> CompactString a
O(1) Return all the elements of a CompactString
except the last one.
An exception will be thrown in the case of an empty ByteString.
headView :: Encoding a => CompactString a -> Maybe (Char, CompactString a)
O(1) A view of the front of a CompactString
.
headView s = if null s then Nothing else Just (head s, tail s)
lastView :: Encoding a => CompactString a -> Maybe (CompactString a, Char)
O(1) A view of the back of a CompactString
.
lastView s = if null s then Nothing else Just (init s, last s)
null :: Encoding a => CompactString a -> Bool
O(1) Test whether a CompactString is empty.
length :: Encoding a => CompactString a -> Int
Transforming CompactString
s
map :: Encoding a => (Char -> Char) -> CompactString a -> CompactString a
O(n) map
f xs
is the CompactString obtained by applying f
to each
element of xs
. This function is subject to array fusion.
reverse :: Encoding a => CompactString a -> CompactString a
Reverse a CompactString
intersperse :: Encoding a => Char -> CompactString a -> CompactString a
O(n) The intersperse
function takes a Char
and a
CompactString
and `intersperses' that character between the elements of
the CompactString
. It is analogous to the intersperse function on
Lists.
intercalate :: Encoding a => CompactString a -> [CompactString a] -> CompactString a
O(n) The intercalate
function takes a CompactString
and a list of
CompactString
s and concatenates the list after interspersing the first
argument between each element of the list.
transpose :: Encoding a => [CompactString a] -> [CompactString a]
The transpose
function transposes the rows and columns of its
CompactString
argument.
Reducing CompactString
s (folds)
foldl :: Encoding a => (acc -> Char -> acc) -> acc -> CompactString a -> acc
foldl
, applied to a binary operator, a starting value (typically
the left-identity of the operator), and a CompactString, reduces the
CompactString using the binary operator, from left to right.
This function is subject to array fusion.
foldl' :: Encoding a => (acc -> Char -> acc) -> acc -> CompactString a -> acc
'foldl\'' is like foldl
, but strict in the accumulator.
Though actually foldl is also strict in the accumulator.
foldl1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
foldl1
is a variant of foldl
that has no starting value
argument, and thus must be applied to non-empty CompactString
.
This function is subject to array fusion.
An exception will be thrown in the case of an empty CompactString.
foldl1' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
'foldl1\'' is like foldl1
, but strict in the accumulator.
An exception will be thrown in the case of an empty CompactString.
foldr :: Encoding a => (Char -> acc -> acc) -> acc -> CompactString a -> acc
foldr
, applied to a binary operator, a starting value
(typically the right-identity of the operator), and a CompactString,
reduces the CompactString using the binary operator, from right to left.
foldr' :: Encoding a => (Char -> acc -> acc) -> acc -> CompactString a -> acc
foldr
, applied to a binary operator, a starting value
(typically the right-identity of the operator), and a CompactString,
reduces the CompactString using the binary operator, from right to left.
foldr1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
foldr1
is a variant of foldr
that has no starting value argument,
and thus must be applied to non-empty CompactString
s
An exception will be thrown in the case of an empty CompactString.
foldr1' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> Char
'foldr1\'' is a variant of foldr1
, but is strict in the
accumulator.
An exception will be thrown in the case of an empty CompactString.
Special folds
concat :: Encoding a => [CompactString a] -> CompactString a
O(n) Concatenate a list of CompactString
s.
concatMap :: Encoding a => (Char -> CompactString a) -> CompactString a -> CompactString a
Map a function over a CompactString
and concatenate the results
any :: Encoding a => (Char -> Bool) -> CompactString a -> Bool
O(n) Applied to a predicate and a CompactString, any
determines if
any element of the CompactString
satisfies the predicate.
all :: Encoding a => (Char -> Bool) -> CompactString a -> Bool
O(n) Applied to a predicate and a CompactString, any
determines if
all elements of the CompactString
satisfy the predicate.
maximum :: Encoding a => CompactString a -> Char
O(n) maximum
returns the maximum value from a CompactString
An exception will be thrown in the case of an empty CompactString.
minimum :: Encoding a => CompactString a -> Char
O(n) minimum
returns the minimum value from a CompactString
An exception will be thrown in the case of an empty CompactString.
Building CompactStrings
Scans
scanl :: Encoding a => (Char -> Char -> Char) -> Char -> CompactString a -> CompactString a
scanl1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a
scanr :: Encoding a => (Char -> Char -> Char) -> Char -> CompactString a -> CompactString a
scanr is the right-to-left dual of scanl.
scanr1 :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a
Accumulating maps
mapAccumL :: Encoding a => (acc -> Char -> (acc, Char)) -> acc -> CompactString a -> (acc, CompactString a)
mapAccumR :: Encoding a => (acc -> Char -> (acc, Char)) -> acc -> CompactString a -> (acc, CompactString a)
mapIndexed :: Encoding a => (Int -> Char -> Char) -> CompactString a -> CompactString a
O(n) map Char functions, provided with the index at each position.
Unfolding CompactStrings
replicate :: Encoding a => Int -> Char -> CompactString a
O(n) replicate
n x
is a CompactString of length n
with x
the value of every element. The following holds:
replicate w c = unfoldr w (\u -> Just (u,u)) c
unfoldr :: Encoding a => (acc -> Maybe (Char, acc)) -> acc -> CompactString a
O(n), where n is the length of the result. The unfoldr
function is analogous to the List 'unfoldr'. unfoldr
builds a
ByteString from a seed value. The function takes the element and
returns Nothing
if it is done producing the CompactString or returns
Just
(a,b)
, in which case, a
is the next byte in the string,
and b
is the seed value for further production.
Examples:
unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0 == pack [0, 1, 2, 3, 4, 5]
unfoldrN :: Encoding a => Int -> (acc -> Maybe (Char, acc)) -> acc -> (CompactString a, Maybe acc)
O(n) Like unfoldr
, unfoldrN
builds a ByteString from a seed
value. However, the length of the result is limited by the first
argument to unfoldrN
. This function is more efficient than unfoldr
when the maximum length of the result is known.
The following equation relates unfoldrN
and unfoldr
:
fst (unfoldrN n f s) == take n (unfoldr f s)
Substrings
Breaking strings
take :: Encoding a => Int -> CompactString a -> CompactString a
drop :: Encoding a => Int -> CompactString a -> CompactString a
splitAt :: Encoding a => Int -> CompactString a -> (CompactString a, CompactString a)
takeWhile :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
takeWhile
, applied to a predicate p
and a CompactString xs
,
returns the longest prefix (possibly empty) of xs
of elements that
satisfy p
.
dropWhile :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
span :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
spanEnd :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
spanEnd
behaves like span
but from the end of the CompactString
We have
spanEnd (not.isSpace) "x y z" == ("x y ","z")
and
spanEnd (not . isSpace) cs == let (x,y) = span (not.isSpace) (reverse cs) in (reverse y, reverse x)
break :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
breakEnd :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
breakEnd
behaves like break
but from the end of the CompactString
breakEnd p == spanEnd (not.p)
group :: Encoding a => CompactString a -> [CompactString a]
The group
function takes a CompactString
and returns a list of
CompactStrings such that the concatenation of the result is equal to the
argument. Moreover, each sublist in the result contains only equal
elements. For example,
group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
It is a special case of groupBy
, which allows the programmer to
supply their own equality test.
groupBy :: Encoding a => (Char -> Char -> Bool) -> CompactString a -> [CompactString a]
inits :: Encoding a => CompactString a -> [CompactString a]
O(n) Return all initial segments of the given CompactString
, shortest first.
tails :: Encoding a => CompactString a -> [CompactString a]
O(n) Return all final segments of the given CompactString
, longest first.
Breaking into many substrings
split :: Encoding a => Char -> CompactString a -> [CompactString a]
O(n) Break a ByteString
into pieces separated by the byte
argument, consuming the delimiter. I.e.
split '\n' "a\nb\nd\ne" == ["a","b","d","e"] split 'a' "aXaXaXa" == ["","X","X","X",""] split 'x' "x" == ["",""]
and
intercalate [c] . split c == id split == splitWith . (==)
As for all splitting functions in this library, this function does
not copy the substrings, it just constructs new CompactString
that
are slices of the original.
splitWith :: Encoding a => (Char -> Bool) -> CompactString a -> [CompactString a]
O(n) Splits a CompactString
into components delimited by
separators, where the predicate returns True for a separator element.
The resulting components do not contain the separators. Two adjacent
separators result in an empty component in the output. eg.
splitWith (=='a') "aabbaca" == ["","","bb","c",""] splitWith (=='a') [] == []
Breaking into lines and words
lines :: Encoding a => CompactString a -> [CompactString a]
lines
breaks a CompactString
up into a list of CompactStrings at
newline Chars. The resulting strings do not contain newlines.
words :: Encoding a => CompactString a -> [CompactString a]
words
breaks a ByteString up into a list of words, which
were delimited by Chars representing white space. And
words = filter (not . null) . splitWith isSpace
unlines :: Encoding a => [CompactString a] -> CompactString a
unwords :: Encoding a => [CompactString a] -> CompactString a
Predicates
isPrefixOf :: CompactString a -> CompactString a -> Bool
O(n) The isPrefixOf
function takes two CompactString and returns True
iff the first is a prefix of the second.
isSuffixOf :: Encoding a => CompactString a -> CompactString a -> Bool
O(n) The isSuffixOf
function takes two CompactString and returns True
iff the first is a suffix of the second.
The following holds:
isSuffixOf x y == reverse x `isPrefixOf` reverse y
Arguments
:: Encoding a | |
=> CompactString a | String to search for. |
-> CompactString a | String to search in. |
-> Bool |
Check whether one string is a substring of another. isInfixOf
p s
is equivalent to not (null (findSubstrings p s))
.
Search for arbitrary substrings
Arguments
:: Encoding a | |
=> CompactString a | String to search for. |
-> CompactString a | String to seach in. |
-> Maybe Int |
Get the first index of a substring in another string,
or Nothing
if the string is not found.
findSubstring p s
is equivalent to listToMaybe (findSubstrings p s)
.
Arguments
:: Encoding a | |
=> CompactString a | String to search for. |
-> CompactString a | String to seach in. |
-> [Int] |
Find the indexes of all (possibly overlapping) occurances of a substring in a string. This function uses the Knuth-Morris-Pratt string matching algorithm.
Searching CompactStrings
Searching by equality
elem :: Encoding a => Char -> CompactString a -> Bool
O(n) elem
is the CompactString
membership predicate.
Searching with a predicate
find :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Char
O(n) The find
function takes a predicate and a CompactString
,
and returns the first element in matching the predicate, or Nothing
if there is no such element.
find f p = case findIndex f p of Just n -> Just (p `index` n) ; _ -> Nothing
filter :: Encoding a => (Char -> Bool) -> CompactString a -> CompactString a
O(n) filter
, applied to a predicate and a CompactString
,
returns a CompactString containing those characters that satisfy the
predicate. This function is subject to array fusion.
partition :: Encoding a => (Char -> Bool) -> CompactString a -> (CompactString a, CompactString a)
O(n) partition
, applied to a predicate and a CompactString
,
returns a pair of CompactStrings.
The first containing those characters that satisfy the predicate,
the second containg those that don't.
Indexing CompactStrings
index :: Encoding a => CompactString a -> Int -> Char
O(n) CompactString
index (subscript) operator, starting from 0.
elemIndex :: Encoding a => Char -> CompactString a -> Maybe Int
O(n) The elemIndex
function returns the index of the first
element in the given ByteString
which is equal to the query
element, or Nothing
if there is no such element.
elemIndices :: Encoding a => Char -> CompactString a -> [Int]
O(n) The elemIndices
function extends elemIndex
, by returning
the indices of all elements equal to the query element, in ascending order.
elemIndexEnd :: Encoding a => Char -> CompactString a -> Maybe Int
O(n) The elemIndexEnd
function returns the last index of the
element in the given CompactString
which is equal to the query
element, or Nothing
if there is no such element. The following
holds:
elemIndexEnd c xs == (-) (length xs - 1) `fmap` elemIndex c (reverse xs)
findIndex :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Int
The findIndex
function takes a predicate and a CompactString
and
returns the index of the first element in the CompactString
satisfying the predicate.
findIndexEnd :: Encoding a => (Char -> Bool) -> CompactString a -> Maybe Int
O(n) The findIndexEnd
function returns the last index of the
element in the given CompactString
which satisfies the predicate,
or Nothing
if there is no such element. The following holds:
findIndexEnd c xs == (-) (length xs - 1) `fmap` findIndex c (reverse xs)
findIndices :: Encoding a => (Char -> Bool) -> CompactString a -> [Int]
The findIndices
function extends findIndex
, by returning the
indices of all elements satisfying the predicate, in ascending order.
count :: Encoding a => Char -> CompactString a -> Int
count returns the number of times its argument appears in the CompactString
count c = length . elemIndices c
Zipping and unzipping CompactStrings
zip :: Encoding a => CompactString a -> CompactString a -> [(Char, Char)]
zipWith :: Encoding a => (Char -> Char -> b) -> CompactString a -> CompactString a -> [b]
zipWith' :: Encoding a => (Char -> Char -> Char) -> CompactString a -> CompactString a -> CompactString a
A specialised version of zipWith
for the common case of a
simultaneous map over two CompactString
s, to build a 3rd. Rewrite rules
are used to automatically covert zipWith into zipWith' when a pack is
performed on the result of zipWith, but we also export it for
convenience.
unzip :: Encoding a => [(Char, Char)] -> (CompactString a, CompactString a)
Ordered CompactStrings
sort :: Encoding a => CompactString a -> CompactString a
O(n log n) Sort a CompactString
compare' :: (Encoding a, Encoding b) => CompactString a -> CompactString b -> Ordering
Compare two bytestrings, possibly with a different encoding.
Encoding
toByteString :: Encoding a => CompactString a -> ByteString
Convert a CompactString to a ByteString
fromByteString :: (Encoding a, MonadPlus m) => ByteString -> m (CompactString a)
Convert a ByteString to a CompactString. Fails if the ByteString is not a valid encoded string.
fromByteString_ :: Encoding a => ByteString -> CompactString a
Convert a ByteString to a CompactString. Raises an error if the ByteString is not a valid encoded string.
validate :: (Encoding a, MonadPlus m) => CompactString a -> m (CompactString a)
Validates a CompactString. If the string is invalid, fails, otherwise returns the input.
validate_ :: Encoding a => CompactString a -> CompactString a
Validates a CompactString. If the string is invalid, throws an error, otherwise returns the input.
Encoding conversion
module Data.CompactString.Encodings
recode :: (Encoding a, Encoding b, MonadPlus m) => CompactString a -> m (CompactString b)
Convert between two different encodings, fails if conversion is not possible.
recode_ :: (Encoding a, Encoding b) => CompactString a -> CompactString b
Convert between two different encodings, raises an error if conversion is not possible.
encode :: (Encoding a, Encoding e, MonadPlus m) => e -> CompactString a -> m ByteString
Encode a CompactString to a ByteString using the given encoding.
encode e = liftM toByteString . recode
But it might be faster for some combinations of encodings.
Fails if the string is cannot be encoded in the target encoding.
encode_ :: (Encoding a, Encoding e) => e -> CompactString a -> ByteString
Encode a CompactString to a ByteString using the given encoding.
encode_ e = toByteString . recode
But it might be faster for some combinations of encodings.
Raises an error if the string is cannot be encoded in the target encoding.
decode :: (Encoding a, Encoding e, MonadPlus m) => e -> ByteString -> m (CompactString a)
Decode a ByteString to a CompactString using the given encoding.
decode e = recode =<< fromByteString
but it might be faster for some combinations of encodings.
Fails if the ByteString is not a valid encoded string
or if the string can not be represented in the encoding a
.
decode_ :: (Encoding a, Encoding e) => e -> ByteString -> CompactString a
Decode a ByteString to a CompactString using the given encoding.
decode_ e = recode_ . fromByteString_
but it might be faster for some combinations of encodings.
Raises an error if the ByteString is not a valid encoded string
or if the string can not be represented in the encoding a
.
encodeBOM :: (Encoding a, Encoding e, MonadPlus m) => e -> CompactString a -> m ByteString
Encode a CompactString
using the given encoding, and add a Byte Order Mark.
Byte Order Marks are common on Windows, but not on other platforms.
Fails if the string is cannot be encoded in the target encoding.
encodeBOM_ :: (Encoding a, Encoding e) => e -> CompactString a -> ByteString
Encode a CompactString
using the given encoding, and add a Byte Order Mark.
Byte Order Marks are common on Windows, but not on other platforms.
Raises an error if the string is cannot be encoded in the target encoding.
decodeBOM :: (Encoding a, MonadPlus m) => ByteString -> m (CompactString a)
Decode a ByteString
into a CompactString
, by investigating the Byte Order Mark.
If there is no BOM assumes UTF-8.
Fails if the input is not a valid encoded string
or if the string can not be represented in the encoding a
.
For portability, this function should be prefered over decode UTF8
when reading files.
decodeBOM_ :: Encoding a => ByteString -> CompactString a
Decode a ByteString
into a CompactString
, by investigating the Byte Order Mark.
If there is no BOM assumes UTF-8.
Raises an error if the input is not a valid encoded string
or if the string can not be represented in the encoding a
.
For portability, this function should be prefered over decode UTF8
when reading files.
I/O with CompactString
s
Standard input and output
getLine :: Encoding a => IO (CompactString a)
Read a line from stdin.
getContents :: Encoding a => IO (CompactString a)
getContents. Equivalent to hGetContents stdin
Input is assumed to be in the encoding a
, this may not be appropriate.
putStr :: Encoding a => CompactString a -> IO ()
Write a CompactString
to stdout.
Output is written in the encoding a
, this may not be appropriate.
putStrLn :: Encoding a => CompactString a -> IO ()
Write a CompactString
to stdout, appending a newline character.
Output is written in the encoding a
, this may not be appropriate.
interact :: Encoding a => (CompactString a -> CompactString a) -> IO ()
The interact function takes a function of type CompactString -> CompactString
as its argument. The entire input from the standard input device is passed
to this function as its argument, and the resulting string is output on the
standard output device. It's great for writing one line programs!
Files
readFile :: Encoding a => FilePath -> IO (CompactString a)
Read an entire file strictly into a CompactString
. This is far more
efficient than reading the characters into a String
and then using
pack
. Files are read using 'text mode' on Windows.
Files are assumed to be in the encoding a
.
readFile' :: Encoding a => FilePath -> IO (CompactString a)
Read an entire file strictly into a CompactString
. This is far more
efficient than reading the characters into a String
and then using
pack
. Files are read using 'text mode' on Windows.
The encoding of the file is determined based on a Byte Order Mark, see decodeBOM
.
writeFile :: Encoding a => FilePath -> CompactString a -> IO ()
Write a CompactString
to a file.
Files are written using the encoding a
.
writeFile' :: Encoding a => FilePath -> CompactString a -> IO ()
Write a CompactString
to a file.
Files are written using the encoding a
.
A Byte Order Mark is also written.
appendFile :: Encoding a => FilePath -> CompactString a -> IO ()
Append a CompactString
to a file.
Files are written using the encoding a
.
appendFile' :: Encoding a => FilePath -> CompactString a -> IO ()
Append a CompactString
to a file.
The encoding of the file is determined based on a Byte Order Mark.
If the file is empty, it is written using the encoding a
with a Byte Order Mark.
If the encoding can not be determined the file is assumed to be UTF-8.
I/O with Handles
hGetLine :: Encoding a => Handle -> IO (CompactString a)
Read a line from a handle
hGetContents :: Encoding a => Handle -> IO (CompactString a)
Read entire handle contents into a CompactString
.
The handle is interpreted as the encoding a
.
hGetContents' :: Encoding a => Handle -> IO (CompactString a)
Read entire handle contents into a CompactString
.
The encoding is determined based on a Byte Order Mark, see decodeBOM
.
hGet :: Encoding a => Handle -> Int -> IO (CompactString a)
Read a CompactString
directly from the specified Handle
.
The handle is interpreted as the encoding a
.
hGetNonBlocking :: Encoding a => Handle -> Int -> IO (CompactString a)
hGetNonBlocking is identical to hGet
, except that it will never block
waiting for data to become available, instead it returns only whatever data
is available.
The handle is interpreted as the encoding a
.
hPut :: Encoding a => Handle -> CompactString a -> IO ()
Outputs a CompactString
to the specified Handle
.
Output is written in the encoding a
.
hPutStr :: Encoding a => Handle -> CompactString a -> IO ()
A synonym for hPut
, for compatibility
hPutStrLn :: Encoding a => Handle -> CompactString a -> IO ()
Write a CompactString
to a handle, appending a newline byte
Output is written in the encoding a
.