From 871e6f2d2d851d364bae846e9b3a846628be0e93 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 14 Mar 2023 10:20:03 +0100 Subject: [PATCH] Improve name aliases Encode the explicit length of the aliases. --- ucd2haskell/exe/Parser/Text.hs | 80 +- unicode-data-names/bench/Main.hs | 124 ++- .../lib/Unicode/Char/General/Names.hs | 88 +- .../Unicode/Char/General/Names/ByteString.hs | 86 +- .../lib/Unicode/Char/General/Names/Text.hs | 121 +-- .../lib/Unicode/Internal/Bits/Names.hs | 11 +- .../Internal/Char/UnicodeData/NameAliases.hs | 792 +++++++++--------- .../test/Unicode/Char/General/NamesSpec.hs | 45 +- unicode-data-names/unicode-data-names.cabal | 9 +- 9 files changed, 769 insertions(+), 587 deletions(-) diff --git a/ucd2haskell/exe/Parser/Text.hs b/ucd2haskell/exe/Parser/Text.hs index e5ebfad3..d294b0b7 100644 --- a/ucd2haskell/exe/Parser/Text.hs +++ b/ucd2haskell/exe/Parser/Text.hs @@ -1732,8 +1732,7 @@ genNamesModule moduleName = where shows' = \case '\0' -> \s -> '\\' : '0' : s - -- Note: names are ASCII - c -> (c :) + c -> (c :) -- Note: names are ASCII genAliasesModule :: Monad m @@ -1750,12 +1749,14 @@ genAliasesModule moduleName = , showHexCodepoint char , "'# -> \"" , mkCharAliasesLiteral char aliases - , "\"#\n" + , "\"#" ] mkCharAliasesLiteral :: Char -> Aliases -> String mkCharAliasesLiteral char aliasesList = - enumMapToAddrLiteral 0 0xfff (reverse index) (mconcat (reverse aliases)) + enumMapToAddrLiteral 0 0xfff + (reverse index) + (mconcat (reverse ("\\0":aliases))) where (index, aliases, _) = Map.foldlWithKey' (addAliasType char) @@ -1779,40 +1780,39 @@ genAliasesModule moduleName = ( 0 : index , aliasesAcc , lastAliasIndex ) - aliases -> -- traceShow (char, ty, fromIntegral lastAliasIndex :: Word8) - ( fromIntegral lastAliasIndex : index - , encodedAliases - , lastAliasIndex' ) + aliases -> if lastAliasIndex < 0xff + then + ( fromIntegral lastAliasIndex : index + , encodedAliases + , lastAliasIndex' ) + else error . mconcat $ + [ "Cannot encode char ", show char + , "aliases. Offset: ", show lastAliasIndex, " >= 0xff" ] where (encodedAliases, lastAliasIndex') = addEncodedAliases (aliasesAcc, lastAliasIndex) aliases - addEncodedAliases acc@(as, offset) = \case - Alias alias : rest -> if offset' < 0xff - then addEncodedAliases - -- next offset : null-terminated string - ( mconcat ["\\", show nextAliasOffset, alias, "\\0"]:as - , offset' ) - rest - else error . mconcat $ - [ "Cannot encode alias “", alias, "” offset for char : " - , show char - , " . Offset: ", show offset', " >= 0xff" ] + addEncodedAliases (as, offset) = \case + Alias alias : rest -> addEncodedAliases + ( mconcat ["\\", show len, alias] : as + , offset' ) + rest where - -- offset + length + null - offset' = offset + length alias + 2 - nextAliasOffset = if null rest then 0 else offset' - [] -> acc + len = length alias + offset' = offset + len + 1 + [] -> ("\\0" : as, offset + 1) done names = unlines [ apacheLicense 2022 moduleName + , "{-# LANGUAGE DeriveGeneric, PatternSynonyms #-}" , "{-# OPTIONS_HADDOCK hide #-}" , "" , "module " <> moduleName - , "(NameAliasType(..), maxNameAliasType, nameAliases)" + , "(NameAliasType(..), pattern MaxNameAliasType, nameAliases)" , "where" , "" , "import Data.Ix (Ix)" - , "import GHC.Exts (Addr#, Char#)" + , "import GHC.Exts (Addr#, Char#, Int#)" + , "import GHC.Generics (Generic)" , "" , "-- | Type of name alias. See Unicode Standard 15.0.0, section 4.8." , "--" @@ -1831,21 +1831,39 @@ genAliasesModule moduleName = , " | Abbreviation" , " -- ^ Commonly occurring abbreviations (or acronyms) for control codes," , " -- format characters, spaces, and variation selectors." - , " deriving (Enum, Bounded, Eq, Ord, Ix, Show)" + , " deriving (Generic, Enum, Bounded, Eq, Ord, Ix, Show)" + , "" + , "-- $setup" + , "-- >>> import GHC.Exts (Int(..))" , "" - , "-- | >>> maxNameAliasType == fromEnum (maxBound :: NameAliasType)" - , "maxNameAliasType :: Int" - , "maxNameAliasType = 4" + , "-- |" + , "-- >>> I# MaxNameAliasType == fromEnum (maxBound :: NameAliasType)" + , "-- True" + , "pattern MaxNameAliasType :: Int#" + , "pattern MaxNameAliasType = " + <> show (fromEnum (maxBound :: AliasType)) <> "#" , "" , "-- | Detailed character names aliases." , "-- The names are listed in the original order of the UCD." , "--" - , "-- See 'nameAliases' if the alias type is not required." + , "-- Encoding:" + , "--" + , "-- * If there is no alias, return @\"\\\\xff\"#@." + , "-- * For each type of alias, the aliases are encoded as list of (length, alias)." + , "-- The list terminates with @\\\\0@." + , "-- * The list are then concatenated in order of type of alias and" + , "-- terminates with @\\\\0@." + , "-- * The first " + <> show (fromEnum (maxBound :: AliasType) + 1) + <> " bytes represent each one the index of the first element of the" + , "-- corresponding list of aliases. When the list is empty, then the index is 0." + , "-- * Example: @\\\"\\\\5\\\\0\\\\13\\\\0\\\\0\\\\3XXX\\\\2YY\\\\0\\\\4ZZZZ\\\\0\\\\0\\\"#@" + , "-- represents: @[('Correction',[\\\"XXX\\\", \\\"YY\\\"]),('Alternate', [\\\"ZZZZ\\\"])]@." , "--" , "-- @since 0.1.0" , "nameAliases :: Char# -> Addr#" , "nameAliases = \\case" - , mconcat names + , mconcat (intersperse "\n" names) , " _ -> \"\\xff\"#" ] diff --git a/unicode-data-names/bench/Main.hs b/unicode-data-names/bench/Main.hs index 1b8d8438..5525c87e 100644 --- a/unicode-data-names/bench/Main.hs +++ b/unicode-data-names/bench/Main.hs @@ -1,10 +1,22 @@ {-# LANGUAGE CPP, ExistentialQuantification #-} +{-# OPTIONS_GHC -Wno-orphans #-} -import Control.DeepSeq (NFData, deepseq) +import Control.DeepSeq (NFData, deepseq, force) +import Control.Exception (evaluate) import Data.Ix (Ix(..)) -import Test.Tasty.Bench (Benchmark, bgroup, bcompare, bench, nf, defaultMain) +import Data.Proxy (Proxy(..)) +import GHC.Exts (Char(..), indexCharOffAddr#) +import Test.Tasty (askOption, includingOptions) +import Test.Tasty.Bench (Benchmark, bgroup, bcompare, bench, benchIngredients, nf, env) +import Test.Tasty.Options + ( IsOption(defaultValue, optionHelp, optionName, parseValue) + , OptionDescription(..) ) +import Test.Tasty.Runners (TestTree, defaultMainWithIngredients) +import qualified Unicode.Char as UChar import qualified Unicode.Char.General.Names as String +import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName +import qualified Unicode.Internal.Char.UnicodeData.NameAliases as NameAliases #ifdef HAS_BYTESTRING import qualified Unicode.Char.General.Names.ByteString as ByteString import Data.ByteString () @@ -20,6 +32,44 @@ import qualified ICU.Names.Text as ICUText #endif #endif +-------------------------------------------------------------------------------- +-- CLI options +-------------------------------------------------------------------------------- + +data CharRange = CharRange !Char !Char + +instance IsOption CharRange where + defaultValue = CharRange minBound maxBound + parseValue = \case + "ascii" -> Just (CharRange minBound '\x7f') + "bmp" -> Just (CharRange minBound '\xffff') + "planes0To3" -> Just (CharRange minBound '\x3FFFF') + -- [TODO] handle errors + s -> + let (l, u) = drop 1 <$> break (== '-') s + in Just (CharRange (UChar.chr (read l)) (UChar.chr (read u))) + optionName = pure "chars" + optionHelp = pure "Range of chars to test" + +data Filter = NoFilter | WithName | WithNameAlias + +instance IsOption Filter where + defaultValue = WithName + parseValue = \case + "name" -> Just WithName + "alias" -> Just WithNameAlias + "none" -> Just NoFilter + _ -> Nothing + optionName = pure "chars-filter" + optionHelp = pure "Filter the chars to test" + +-------------------------------------------------------------------------------- +-- Benchmark utils +-------------------------------------------------------------------------------- + +-- Orphan instance +instance NFData String.NameAliasType + -- | A unit benchmark data Bench = forall a. (NFData a) => Bench { -- | Name @@ -27,8 +77,32 @@ data Bench = forall a. (NFData a) => Bench -- | Function to benchmark , _func :: Char -> a } +hasName :: Char -> Bool +hasName (C# c#) = case DerivedName.name c# of + (# _, 0# #) -> False + _ -> True + +hasNameAlias :: Char -> Bool +hasNameAlias (C# c#) = + let addr# = NameAliases.nameAliases c# + in case indexCharOffAddr# addr# 0# of + '\xff'# -> False + _ -> True + +-------------------------------------------------------------------------------- +-- Benchmark +-------------------------------------------------------------------------------- + main :: IO () -main = defaultMain +main = do + let customOpts = [ Option (Proxy :: Proxy CharRange) + , Option (Proxy :: Proxy Filter)] + ingredients = includingOptions customOpts : benchIngredients + defaultMainWithIngredients ingredients + (askOption (askOption . benchmarks)) + +benchmarks :: CharRange -> Filter -> TestTree +benchmarks charRange charFilter = bgroup "All" [ bgroup "Unicode.Char.General.Names" [ bgroup "name" [ bgroup' "name" "String" @@ -96,33 +170,33 @@ main = defaultMain , bgroup "nameAliasesByType" [ bgroup' "nameAliasesByType" "String" [ Bench "unicode-data" - (\c -> (`String.nameAliasesByType` c) <$> [minBound..maxBound]) + (\c -> fold_ (`String.nameAliasesByType` c)) ] #ifdef HAS_BYTESTRING , bgroup' "nameAliasesByType" "ByteString" [ Bench "unicode-data" - (\c -> (`ByteString.nameAliasesByType` c) <$> [minBound..maxBound]) + (\c -> fold_ (`String.nameAliasesByType` c)) ] #endif #ifdef HAS_TEXT , bgroup' "nameAliasesByType" "Text" [ Bench "unicode-data" - (\c -> (`Text.nameAliasesByType` c) <$> [minBound..maxBound]) + (\c -> fold_ (`String.nameAliasesByType` c)) ] #endif ] , bgroup "nameAliasesWithTypes" [ bgroup' "nameAliasesWithTypes" "String" - [ Bench "unicode-data" (show . String.nameAliasesWithTypes) + [ Bench "unicode-data" String.nameAliasesWithTypes ] #ifdef HAS_BYTESTRING , bgroup' "nameAliasesWithTypes" "ByteString" - [ Bench "unicode-data" (show . ByteString.nameAliasesWithTypes) + [ Bench "unicode-data" ByteString.nameAliasesWithTypes ] #endif #ifdef HAS_TEXT , bgroup' "nameAliasesWithTypes" "Text" - [ Bench "unicode-data" (show . Text.nameAliasesWithTypes) + [ Bench "unicode-data" Text.nameAliasesWithTypes ] #endif ] @@ -151,7 +225,7 @@ main = defaultMain -- [NOTE] Works if groupTitle uniquely identifies the benchmark group. benchNF' superGroupTitle groupTitle title = case title of - "unicode-data" -> benchNF title + "unicode-data" -> benchCharsNF title _ -> bcompare ( mconcat [ "$NF == \"unicode-data\" && $(NF-1) == \"" @@ -159,10 +233,30 @@ main = defaultMain , "\" && $(NF-2) == \"" , superGroupTitle , "\"" ] ) - . benchNF title + . benchCharsNF title - benchNF :: forall a. (NFData a) => String -> (Char -> a) -> Benchmark - benchNF t f = bench t $ nf (fold_ f) (minBound, maxBound) + {-# INLINE benchCharsNF #-} + benchCharsNF + :: forall a. (NFData a) + => String + -> (Char -> a) + -> Benchmark + benchCharsNF t f = + -- Avoid side-effects with garbage collection (see tasty-bench doc) + env + (evaluate (force chars')) -- initialize + (bench t . nf (foldr (deepseq . f) ())) -- benchmark + where + CharRange l u = charRange + extraFilter = case charFilter of + NoFilter -> const True + WithName -> hasName + WithNameAlias -> hasNameAlias + chars = filter isValid [l..u] + -- Ensure to have sufficiently chars + n = 0x10FFFF `div` length chars + chars' = mconcat (replicate n chars) + isValid c = UChar.generalCategory c < UChar.Surrogate && extraFilter c - fold_ :: forall a. (NFData a) => (Char -> a) -> (Char, Char) -> () - fold_ f = foldr (deepseq . f) () . range + fold_ :: forall a. (NFData a) => (String.NameAliasType -> a) -> () + fold_ f = foldr (deepseq . f) () (range (minBound, maxBound)) diff --git a/unicode-data-names/lib/Unicode/Char/General/Names.hs b/unicode-data-names/lib/Unicode/Char/General/Names.hs index c3e54afe..a7856838 100644 --- a/unicode-data-names/lib/Unicode/Char/General/Names.hs +++ b/unicode-data-names/lib/Unicode/Char/General/Names.hs @@ -29,11 +29,11 @@ module Unicode.Char.General.Names import Control.Applicative ((<|>)) import GHC.Exts - ( Addr#, Char(..), Int#, Int(..) + ( Addr#, Char(..), Char#, Int# , indexCharOffAddr#, plusAddr#, (+#), (-#), (<#), isTrue#, quotRemInt# - , dataToTag#, ord#, Char# ) + , dataToTag#, ord# ) -import Unicode.Internal.Bits.Names (SPEC(..), unpackCString#) +import Unicode.Internal.Bits.Names (unpackNBytes#) import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName import qualified Unicode.Internal.Char.UnicodeData.NameAliases as NameAliases @@ -82,23 +82,7 @@ showHex !c# = showIt [] (quotRemInt# (ord# c#) 16#) 0# -> acc' _ -> showIt acc' (quotRemInt# q 16#) where - !c = case r of - 0# -> '0' - 1# -> '1' - 2# -> '2' - 3# -> '3' - 4# -> '4' - 5# -> '5' - 6# -> '6' - 7# -> '7' - 8# -> '8' - 9# -> '9' - 10# -> 'A' - 11# -> 'B' - 12# -> 'C' - 13# -> 'D' - 14# -> 'E' - _ -> 'F' + !c = C# (indexCharOffAddr# "0123456789ABCDEF"# r) !acc' = c : acc -- | Returns /corrected/ name of a character (see 'NameAliases.Correction'), @@ -114,7 +98,7 @@ correctedName c@(C# c#) = corrected <|> name c '\xff'# -> Nothing -- no aliases '\x00'# -> Nothing -- no correction i# -> - let !n = unpackCString# (addr# `plusAddr#` (ord# i# +# 1#)) + let !n = unpackNBytes'# (addr# `plusAddr#` ord# i#) in Just n !addr# = NameAliases.nameAliases c# @@ -125,16 +109,14 @@ correctedName c@(C# c#) = corrected <|> name c nameOrAlias :: Char -> Maybe String nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of '\xff'# -> Nothing -- no aliases - '\x00'# -> let !n = unpackCString# (go 1#) in Just n - _ -> let !n = unpackCString# (go 0#) in Just n + '\x00'# -> let !n = go 1# in Just n + _ -> let !n = go 0# in Just n where !addr# = NameAliases.nameAliases c# - !(I# maxNameAliasType#) = NameAliases.maxNameAliasType - go t# = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of - '\0'# -> if isTrue# (t# <# maxNameAliasType#) - then go (t# +# 1#) - else "\0"# -- impossible: there is at least one alias - i# -> addr# `plusAddr#` (ord# i# +# 1#) + go t# = case ord# (indexCharOffAddr# (addr# `plusAddr#` t#) 0#) of + -- No bound check for t#: there is at least one alias + 0# -> go (t# +# 1#) + i# -> unpackNBytes'# (addr# `plusAddr#` i#) -- | All name aliases of a character, if defined. -- The names are listed in the original order of the UCD. @@ -144,11 +126,23 @@ nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of -- @since 0.1.0 {-# INLINE nameAliases #-} nameAliases :: Char -> [String] -nameAliases (C# c#) = case indexCharOffAddr# addr# 0# of +nameAliases (C# c#) = case indexCharOffAddr# addr0# 0# of '\xff'# -> [] -- no aliases - _ -> foldMap (nameAliasesByType# addr#) [minBound..maxBound] + _ -> go (addr0# `plusAddr#` (NameAliases.MaxNameAliasType +# 1#)) + where + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> case ord# (indexCharOffAddr# (addr# `plusAddr#` 1#) 0#) of + -- End of list + 0# -> [] + -- skip empty entry + l# -> + let !s = unpackNBytes# (addr# `plusAddr#` 2#) l# + in s : go (addr# `plusAddr#` (l# +# 2#)) + l# -> + let !s = unpackNBytes# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) where - addr# = NameAliases.nameAliases c# + addr0# = NameAliases.nameAliases c# -- | Name aliases of a character for a specific name alias type. -- @@ -176,24 +170,28 @@ nameAliasesWithTypes (C# c#) = case indexCharOffAddr# addr# 0# of where addr# = NameAliases.nameAliases c# mk t acc = case nameAliasesByType# addr# t of - [] -> acc - as -> (t, as) : acc + [] -> acc + !as -> (t, as) : acc + +{-# INLINE unpackNBytes'# #-} +unpackNBytes'# :: Addr# -> String +unpackNBytes'# addr# = unpackNBytes# + (addr# `plusAddr#` 1#) + (ord# (indexCharOffAddr# addr# 0#)) {-# INLINE nameAliasesByType# #-} nameAliasesByType# :: Addr# -> NameAliases.NameAliasType -> [String] nameAliasesByType# addr# t = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of '\0'# -> [] -- no aliases for this type - i# -> unpackCStrings addr# (ord# i#) + i# -> unpackCStrings# (addr# `plusAddr#` ord# i#) where t# = dataToTag# t -{-# INLINE unpackCStrings #-} -unpackCStrings :: Addr# -> Int# -> [String] -unpackCStrings addr# = go SPEC +{-# INLINE unpackCStrings# #-} +unpackCStrings# :: Addr# -> [String] +unpackCStrings# = go where - go !_ i# = - let !s = unpackCString# (addr# `plusAddr#` (i# +# 1#)) - in s : case indexCharOffAddr# (addr# `plusAddr#` i#) 0# of - '\0'# -> [] - j# -> go SPEC (ord# j#) - - + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> [] + l# -> + let !s = unpackNBytes# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) diff --git a/unicode-data-names/lib/Unicode/Char/General/Names/ByteString.hs b/unicode-data-names/lib/Unicode/Char/General/Names/ByteString.hs index fc00f944..9d7f2c38 100644 --- a/unicode-data-names/lib/Unicode/Char/General/Names/ByteString.hs +++ b/unicode-data-names/lib/Unicode/Char/General/Names/ByteString.hs @@ -31,11 +31,11 @@ import Data.Word (Word8) import Foreign.Storable (Storable(..)) import Foreign.Ptr (plusPtr) import GHC.Exts - ( Addr#, Ptr(..), indexCharOffAddr#, plusAddr# + ( Addr#, Ptr(..), indexCharOffAddr#, indexWord8OffAddr#, plusAddr# , Char(..), ord# , Int#, Int(..), (+#), (-#), (<#), isTrue#, quotRemInt#, dataToTag# ) +import GHC.Word (Word8(..)) -import Unicode.Internal.Bits.Names (SPEC(..)) import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName import qualified Unicode.Internal.Char.UnicodeData.NameAliases as NameAliases @@ -60,8 +60,9 @@ name (C# c#) = case DerivedName.name c# of let !n = unpackAddr# name# len# in Just n | otherwise -> - let !len = I# (len# -# DerivedName.HangulSyllable +# 16#) - !n = BS.unsafeCreate len (\ptr -> + let !n = BS.unsafeCreate + (I# (len# -# DerivedName.HangulSyllable +# 16#)) + (\ptr -> BS.memcpy ptr (Ptr "HANGUL SYLLABLE "#) 16 *> BS.memcpy (ptr `plusPtr` 16) (Ptr name#) @@ -82,17 +83,16 @@ mkNameFromTemplate template# len# cp# = ) -- [NOTE] We assume c# >= '\x1000' to avoid to check for padding +{-# INLINE writeHex #-} writeHex :: Int# -> Int# -> Ptr Word8 -> IO () writeHex cp# offset0# !ptr = showIt offset0# (quotRemInt# cp# 16#) where - showIt offset# (# q, r #) = pokeByteOff ptr (I# offset#) c *> case q of - 0# -> pure () - _ -> showIt (offset# -# 1#) (quotRemInt# q 16#) - where - c :: Word8 - !c = if isTrue# (r <# 10#) - then fromIntegral (I# (0x30# +# r)) - else fromIntegral (I# (0x37# +# r)) + showIt offset# (# q, r #) + = pokeByteOff ptr (I# offset#) + (W8# (indexWord8OffAddr# "0123456789ABCDEF"# r)) + *> case q of + 0# -> pure () + _ -> showIt (offset# -# 1#) (quotRemInt# q 16#) -- | Returns /corrected/ name of a character (see 'NameAliases.Correction'), -- if defined, otherwise returns its original 'name' if defined. @@ -107,7 +107,9 @@ correctedName c@(C# c#) = corrected <|> name c '\xff'# -> Nothing -- no aliases '\x00'# -> Nothing -- no correction i# -> - let !n = BS.unsafePackLiteral (addr# `plusAddr#` (ord# i# +# 1#)) + let !n = unpackAddr# + (addr# `plusAddr#` (ord# i# +# 1#)) + (ord# (indexCharOffAddr# (addr# `plusAddr#` ord# i#) 0#)) in Just n !addr# = NameAliases.nameAliases c# @@ -118,16 +120,16 @@ correctedName c@(C# c#) = corrected <|> name c nameOrAlias :: Char -> Maybe BS.ByteString nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of '\xff'# -> Nothing -- no aliases - '\x00'# -> let !n = BS.unsafePackLiteral (go 1#) in Just n - _ -> let !n = BS.unsafePackLiteral (go 0#) in Just n + '\x00'# -> let !n = go 1# in Just n + _ -> let !n = go 0# in Just n where !addr# = NameAliases.nameAliases c# - !(I# maxNameAliasType#) = NameAliases.maxNameAliasType - go t# = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of - '\0'# -> if isTrue# (t# <# maxNameAliasType#) - then go (t# +# 1#) - else "\0"# -- impossible: there is at least one alias - i# -> addr# `plusAddr#` (ord# i# +# 1#) + go t# = case ord# (indexCharOffAddr# (addr# `plusAddr#` t#) 0#) of + -- No bound check for t#: there is at least one alias + 0# -> go (t# +# 1#) + i# -> unpackAddr# + (addr# `plusAddr#` (i# +# 1#)) + (ord# (indexCharOffAddr# (addr# `plusAddr#` i#) 0#)) -- | All name aliases of a character, if defined. -- The names are listed in the original order of the UCD. @@ -137,11 +139,23 @@ nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of -- @since 0.3.0 {-# INLINE nameAliases #-} nameAliases :: Char -> [BS.ByteString] -nameAliases (C# c#) = case indexCharOffAddr# addr# 0# of +nameAliases (C# c#) = case indexCharOffAddr# addr0# 0# of '\xff'# -> [] -- no aliases - _ -> foldMap (nameAliasesByType# addr#) [minBound..maxBound] + _ -> go (addr0# `plusAddr#` (NameAliases.MaxNameAliasType +# 1#)) + where + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> case ord# (indexCharOffAddr# (addr# `plusAddr#` 1#) 0#) of + -- End of list + 0# -> [] + -- Skip empty entry + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 2#) l# + in s : go (addr# `plusAddr#` (l# +# 2#)) + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) where - addr# = NameAliases.nameAliases c# + addr0# = NameAliases.nameAliases c# -- | Name aliases of a character for a specific name alias type. -- @@ -174,17 +188,13 @@ nameAliasesWithTypes (C# c#) = case indexCharOffAddr# addr# 0# of {-# INLINE nameAliasesByType# #-} nameAliasesByType# :: Addr# -> NameAliases.NameAliasType -> [BS.ByteString] -nameAliasesByType# addr# t = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of - '\0'# -> [] -- no aliases for this type - i# -> unpackCStrings addr# (ord# i#) - where t# = dataToTag# t - -{-# INLINE unpackCStrings #-} -unpackCStrings :: Addr# -> Int# -> [BS.ByteString] -unpackCStrings addr# = go SPEC - where - go !_ i# = - let !s = BS.unsafePackLiteral (addr# `plusAddr#` (i# +# 1#)) - in s : case indexCharOffAddr# (addr# `plusAddr#` i#) 0# of - '\0'# -> [] - j# -> go SPEC (ord# j#) +nameAliasesByType# addr0# t = + case ord# (indexCharOffAddr# (addr0# `plusAddr#` dataToTag# t) 0#) of + 0# -> [] -- no aliases for this type + i# -> go (addr0# `plusAddr#` i#) + where + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> [] + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) diff --git a/unicode-data-names/lib/Unicode/Char/General/Names/Text.hs b/unicode-data-names/lib/Unicode/Char/General/Names/Text.hs index 83c7087f..277acc79 100644 --- a/unicode-data-names/lib/Unicode/Char/General/Names/Text.hs +++ b/unicode-data-names/lib/Unicode/Char/General/Names/Text.hs @@ -30,11 +30,11 @@ import qualified Data.Text as T import qualified Data.Text.Internal as T import qualified Data.Text.Array as A import GHC.Exts - ( Addr#, Ptr(..), indexCharOffAddr#, plusAddr# + ( Addr#, Ptr(..), indexCharOffAddr#, indexWord8OffAddr#, plusAddr# , Char(..), ord# , Int#, Int(..), (+#), (-#), (<#), isTrue#, quotRemInt#, dataToTag# ) +import GHC.Word (Word8(..)) -import Unicode.Internal.Bits.Names (SPEC(..)) import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName import qualified Unicode.Internal.Char.UnicodeData.NameAliases as NameAliases @@ -60,44 +60,46 @@ name (C# c#) = case DerivedName.name c# of in Just n | otherwise -> let !len = I# (len# -# DerivedName.HangulSyllable +# 16#) - !n = ST.runST (do + ba = ST.runST (do marr <- A.new len A.copyFromPointer marr 0 (Ptr "HANGUL SYLLABLE "#) 16 A.copyFromPointer marr 16 (Ptr name#) (I# (len# -# DerivedName.HangulSyllable)) - arr <- A.unsafeFreeze marr - pure (T.Text arr 0 len)) + A.unsafeFreeze marr) + !n = T.Text ba 0 len in Just n -- See: unpackCStringAscii#. Here we know the length. unpackAddr# :: Addr# -> Int# -> T.Text -unpackAddr# addr# len# = ST.runST (do - marr <- A.new (I# len#) - A.copyFromPointer marr 0 (Ptr addr#) (I# len#) - arr <- A.unsafeFreeze marr - pure (T.Text arr 0 (I# len#))) +unpackAddr# addr# len# = T.Text ba 0 (I# len#) + where + ba = ST.runST (do + marr <- A.new (I# len#) + A.copyFromPointer marr 0 (Ptr addr#) (I# len#) + A.unsafeFreeze marr) mkNameFromTemplate :: Addr# -> Int# -> Int# -> T.Text -mkNameFromTemplate template# len# cp# = ST.runST (do - let len'# = len# +# if isTrue# (cp# <# 0x10000#) then 4# else 5# - marr <- A.new (I# len'#) - A.copyFromPointer marr 0 (Ptr template#) (I# len#) - writeHex cp# (len'# -# 1#) marr - arr <- A.unsafeFreeze marr - pure (T.Text arr 0 (I# len'#))) +mkNameFromTemplate template# len# cp# = T.Text ba 0 (I# len'#) + where + len'# = len# +# if isTrue# (cp# <# 0x10000#) then 4# else 5# + ba = ST.runST (do + marr <- A.new (I# len'#) + A.copyFromPointer marr 0 (Ptr template#) (I# len#) + writeHex cp# (len'# -# 1#) marr + A.unsafeFreeze marr) -- [NOTE] We assume c# >= '\x1000' to avoid to check for padding +{-# INLINE writeHex #-} writeHex :: Int# -> Int# -> A.MArray s -> ST.ST s () writeHex cp# offset0# !marr = showIt offset0# (quotRemInt# cp# 16#) where - showIt offset# (# q, r #) = A.unsafeWrite marr (I# offset#) c *> case q of - 0# -> pure () - _ -> showIt (offset# -# 1#) (quotRemInt# q 16#) - where - !c = if isTrue# (r <# 10#) - then fromIntegral (I# (0x30# +# r)) - else fromIntegral (I# (0x37# +# r)) + showIt offset# (# q, r #) + = A.unsafeWrite marr (I# offset#) + (W8# (indexWord8OffAddr# "0123456789ABCDEF"# r)) + *> case q of + 0# -> pure () + _ -> showIt (offset# -# 1#) (quotRemInt# q 16#) -- | Returns /corrected/ name of a character (see 'NameAliases.Correction'), -- if defined, otherwise returns its original 'name' if defined. @@ -108,11 +110,12 @@ correctedName :: Char -> Maybe T.Text correctedName c@(C# c#) = corrected <|> name c where -- Assumption: fromEnum NameAliases.Correction == 0 - !corrected = case indexCharOffAddr# addr# 0# of - '\xff'# -> Nothing -- no aliases - '\x00'# -> Nothing -- no correction - i# -> - let !n = T.unpackCStringAscii# (addr# `plusAddr#` (ord# i# +# 1#)) + !corrected = case ord# (indexCharOffAddr# addr# 0#) of + 0xff# -> Nothing -- no aliases + 0x00# -> Nothing -- no correction + i# -> + let l# = ord# (indexCharOffAddr# (addr# `plusAddr#` i#) 0#) + !n = unpackAddr# (addr# `plusAddr#` (i# +# 1#)) l# in Just n !addr# = NameAliases.nameAliases c# @@ -123,16 +126,16 @@ correctedName c@(C# c#) = corrected <|> name c nameOrAlias :: Char -> Maybe T.Text nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of '\xff'# -> Nothing -- no aliases - '\x00'# -> let !n = T.unpackCStringAscii# (go 1#) in Just n - _ -> let !n = T.unpackCStringAscii# (go 0#) in Just n + '\x00'# -> let !n = go 1# in Just n + _ -> let !n = go 0# in Just n where !addr# = NameAliases.nameAliases c# - !(I# maxNameAliasType#) = NameAliases.maxNameAliasType - go t# = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of - '\0'# -> if isTrue# (t# <# maxNameAliasType#) - then go (t# +# 1#) - else "\0"# -- impossible: there is at least one alias - i# -> addr# `plusAddr#` (ord# i# +# 1#) + go t# = case ord# (indexCharOffAddr# (addr# `plusAddr#` t#) 0#) of + -- No bound check for t#: there is at least one alias + 0# -> go (t# +# 1#) + i# -> + let l# = ord# (indexCharOffAddr# (addr# `plusAddr#` i#) 0#) + in unpackAddr# (addr# `plusAddr#` (i# +# 1#)) l# -- | All name aliases of a character, if defined. -- The names are listed in the original order of the UCD. @@ -142,11 +145,23 @@ nameOrAlias c@(C# c#) = name c <|> case indexCharOffAddr# addr# 0# of -- @since 0.3.0 {-# INLINE nameAliases #-} nameAliases :: Char -> [T.Text] -nameAliases (C# c#) = case indexCharOffAddr# addr# 0# of +nameAliases (C# c#) = case indexCharOffAddr# addr0# 0# of '\xff'# -> [] -- no aliases - _ -> foldMap (nameAliasesByType# addr#) [minBound..maxBound] + _ -> go (addr0# `plusAddr#` (NameAliases.MaxNameAliasType +# 1#)) + where + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> case ord# (indexCharOffAddr# (addr# `plusAddr#` 1#) 0#) of + -- End of list + 0# -> [] + -- Skip empty entry + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 2#) l# + in s : go (addr# `plusAddr#` (l# +# 2#)) + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) where - addr# = NameAliases.nameAliases c# + addr0# = NameAliases.nameAliases c# -- | Name aliases of a character for a specific name alias type. -- @@ -179,19 +194,13 @@ nameAliasesWithTypes (C# c#) = case indexCharOffAddr# addr# 0# of {-# INLINE nameAliasesByType# #-} nameAliasesByType# :: Addr# -> NameAliases.NameAliasType -> [T.Text] -nameAliasesByType# addr# t = case indexCharOffAddr# (addr# `plusAddr#` t#) 0# of - '\0'# -> [] -- no aliases for this type - i# -> unpackCStrings addr# (ord# i#) - where t# = dataToTag# t - -{-# INLINE unpackCStrings #-} -unpackCStrings :: Addr# -> Int# -> [T.Text] -unpackCStrings addr# = go SPEC - where - go !_ i# = - let !s = T.unpackCStringAscii# (addr# `plusAddr#` (i# +# 1#)) - in s : case indexCharOffAddr# (addr# `plusAddr#` i#) 0# of - '\0'# -> [] - j# -> go SPEC (ord# j#) - - +nameAliasesByType# addr0# t = + case ord# (indexCharOffAddr# (addr0# `plusAddr#` dataToTag# t) 0#) of + 0# -> [] -- no aliases for this type + i# -> go (addr0# `plusAddr#` i#) + where + go addr# = case ord# (indexCharOffAddr# addr# 0#) of + 0# -> [] + l# -> + let !s = unpackAddr# (addr# `plusAddr#` 1#) l# + in s : go (addr# `plusAddr#` (l# +# 1#)) diff --git a/unicode-data-names/lib/Unicode/Internal/Bits/Names.hs b/unicode-data-names/lib/Unicode/Internal/Bits/Names.hs index c0b0960f..8ccea8e1 100644 --- a/unicode-data-names/lib/Unicode/Internal/Bits/Names.hs +++ b/unicode-data-names/lib/Unicode/Internal/Bits/Names.hs @@ -1,4 +1,6 @@ {-# LANGUAGE CPP #-} +{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} +{-# HLINT ignore "Use fewer imports" #-} -- | -- Module : Unicode.Internal.Bits @@ -15,9 +17,7 @@ module Unicode.Internal.Bits.Names ( -- * Bitmap lookup lookupInt32# -- * CString - , unpackCString# - -- * Miscellaneous - , SPEC(..) + , unpackNBytes# ) where #include "MachDeps.h" @@ -41,10 +41,9 @@ import GHC.Exts (int32ToInt#) #endif #if MIN_VERSION_base(4,15,0) -import GHC.Exts (unpackCString#, SPEC(..)) +import GHC.Exts (unpackNBytes#) #else -import GHC.CString (unpackCString#) -import GHC.Types (SPEC(..)) +import GHC.CString (unpackNBytes#) #endif {-| @lookupInt32# addr index@ looks up for the @index@-th 32-bits word in diff --git a/unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/NameAliases.hs b/unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/NameAliases.hs index 625d7c7a..95a0823a 100644 --- a/unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/NameAliases.hs +++ b/unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/NameAliases.hs @@ -6,14 +6,16 @@ -- Maintainer : streamly@composewell.com -- Stability : experimental +{-# LANGUAGE DeriveGeneric, PatternSynonyms #-} {-# OPTIONS_HADDOCK hide #-} module Unicode.Internal.Char.UnicodeData.NameAliases -(NameAliasType(..), maxNameAliasType, nameAliases) +(NameAliasType(..), pattern MaxNameAliasType, nameAliases) where import Data.Ix (Ix) -import GHC.Exts (Addr#, Char#) +import GHC.Exts (Addr#, Char#, Int#) +import GHC.Generics (Generic) -- | Type of name alias. See Unicode Standard 15.0.0, section 4.8. -- @@ -32,399 +34,413 @@ data NameAliasType | Abbreviation -- ^ Commonly occurring abbreviations (or acronyms) for control codes, -- format characters, spaces, and variation selectors. - deriving (Enum, Bounded, Eq, Ord, Ix, Show) + deriving (Generic, Enum, Bounded, Eq, Ord, Ix, Show) --- | >>> maxNameAliasType == fromEnum (maxBound :: NameAliasType) -maxNameAliasType :: Int -maxNameAliasType = 4 +-- $setup +-- >>> import GHC.Exts (Int(..)) + +-- | +-- >>> I# MaxNameAliasType == fromEnum (maxBound :: NameAliasType) +-- True +pattern MaxNameAliasType :: Int# +pattern MaxNameAliasType = 4# -- | Detailed character names aliases. -- The names are listed in the original order of the UCD. -- --- See 'nameAliases' if the alias type is not required. +-- Encoding: +-- +-- * If there is no alias, return @"\\xff"#@. +-- * For each type of alias, the aliases are encoded as list of (length, alias). +-- The list terminates with @\\0@. +-- * The list are then concatenated in order of type of alias and +-- terminates with @\\0@. +-- * The first 5 bytes represent each one the index of the first element of the +-- corresponding list of aliases. When the list is empty, then the index is 0. +-- * Example: @\"\\5\\0\\13\\0\\0\\3XXX\\2YY\\0\\4ZZZZ\\0\\0\"#@ +-- represents: @[('Correction',[\"XXX\", \"YY\"]),('Alternate', [\"ZZZZ\"])]@. -- -- @since 0.1.0 nameAliases :: Char# -> Addr# nameAliases = \case - '\x0000'# -> "\0\5\0\0\11\0NULL\0\0NUL\0"# - '\x0001'# -> "\0\5\0\0\23\0START OF HEADING\0\0SOH\0"# - '\x0002'# -> "\0\5\0\0\20\0START OF TEXT\0\0STX\0"# - '\x0003'# -> "\0\5\0\0\18\0END OF TEXT\0\0ETX\0"# - '\x0004'# -> "\0\5\0\0\26\0END OF TRANSMISSION\0\0EOT\0"# - '\x0005'# -> "\0\5\0\0\14\0ENQUIRY\0\0ENQ\0"# - '\x0006'# -> "\0\5\0\0\18\0ACKNOWLEDGE\0\0ACK\0"# - '\x0007'# -> "\0\5\0\0\12\0ALERT\0\0BEL\0"# - '\x0008'# -> "\0\5\0\0\16\0BACKSPACE\0\0BS\0"# - '\x0009'# -> "\0\5\0\0\50\27CHARACTER TABULATION\0\0HORIZONTAL TABULATION\0\54HT\0\0TAB\0"# - '\x000A'# -> "\0\5\0\0\39\16LINE FEED\0\26NEW LINE\0\0END OF LINE\0\43LF\0\47NL\0\0EOL\0"# - '\x000B'# -> "\0\5\0\0\43\22LINE TABULATION\0\0VERTICAL TABULATION\0\0VT\0"# - '\x000C'# -> "\0\5\0\0\16\0FORM FEED\0\0FF\0"# - '\x000D'# -> "\0\5\0\0\22\0CARRIAGE RETURN\0\0CR\0"# - '\x000E'# -> "\0\5\0\0\35\16SHIFT OUT\0\0LOCKING-SHIFT ONE\0\0SO\0"# - '\x000F'# -> "\0\5\0\0\35\15SHIFT IN\0\0LOCKING-SHIFT ZERO\0\0SI\0"# - '\x0010'# -> "\0\5\0\0\23\0DATA LINK ESCAPE\0\0DLE\0"# - '\x0011'# -> "\0\5\0\0\25\0DEVICE CONTROL ONE\0\0DC1\0"# - '\x0012'# -> "\0\5\0\0\25\0DEVICE CONTROL TWO\0\0DC2\0"# - '\x0013'# -> "\0\5\0\0\27\0DEVICE CONTROL THREE\0\0DC3\0"# - '\x0014'# -> "\0\5\0\0\26\0DEVICE CONTROL FOUR\0\0DC4\0"# - '\x0015'# -> "\0\5\0\0\27\0NEGATIVE ACKNOWLEDGE\0\0NAK\0"# - '\x0016'# -> "\0\5\0\0\23\0SYNCHRONOUS IDLE\0\0SYN\0"# - '\x0017'# -> "\0\5\0\0\32\0END OF TRANSMISSION BLOCK\0\0ETB\0"# - '\x0018'# -> "\0\5\0\0\13\0CANCEL\0\0CAN\0"# - '\x0019'# -> "\0\5\0\0\20\0END OF MEDIUM\0\25EOM\0\0EM\0"# - '\x001A'# -> "\0\5\0\0\17\0SUBSTITUTE\0\0SUB\0"# - '\x001B'# -> "\0\5\0\0\13\0ESCAPE\0\0ESC\0"# - '\x001C'# -> "\0\5\0\0\49\33INFORMATION SEPARATOR FOUR\0\0FILE SEPARATOR\0\0FS\0"# - '\x001D'# -> "\0\5\0\0\51\34INFORMATION SEPARATOR THREE\0\0GROUP SEPARATOR\0\0GS\0"# - '\x001E'# -> "\0\5\0\0\50\32INFORMATION SEPARATOR TWO\0\0RECORD SEPARATOR\0\0RS\0"# - '\x001F'# -> "\0\5\0\0\48\32INFORMATION SEPARATOR ONE\0\0UNIT SEPARATOR\0\0US\0"# - '\x0020'# -> "\0\0\0\0\5\0SP\0"# - '\x007F'# -> "\0\5\0\0\13\0DELETE\0\0DEL\0"# - '\x0080'# -> "\0\0\0\5\24\0PADDING CHARACTER\0\0PAD\0"# - '\x0081'# -> "\0\0\0\5\24\0HIGH OCTET PRESET\0\0HOP\0"# - '\x0082'# -> "\0\5\0\0\27\0BREAK PERMITTED HERE\0\0BPH\0"# - '\x0083'# -> "\0\5\0\0\20\0NO BREAK HERE\0\0NBH\0"# - '\x0084'# -> "\0\5\0\0\12\0INDEX\0\0IND\0"# - '\x0085'# -> "\0\5\0\0\16\0NEXT LINE\0\0NEL\0"# - '\x0086'# -> "\0\5\0\0\29\0START OF SELECTED AREA\0\0SSA\0"# - '\x0087'# -> "\0\5\0\0\27\0END OF SELECTED AREA\0\0ESA\0"# - '\x0088'# -> "\0\5\0\0\58\31CHARACTER TABULATION SET\0\0HORIZONTAL TABULATION SET\0\0HTS\0"# - '\x0089'# -> "\0\5\0\0\88\46CHARACTER TABULATION WITH JUSTIFICATION\0\0HORIZONTAL TABULATION WITH JUSTIFICATION\0\0HTJ\0"# - '\x008A'# -> "\0\5\0\0\51\26LINE TABULATION SET\0\0VERTICAL TABULATION SET\0\0VTS\0"# - '\x008B'# -> "\0\5\0\0\46\27PARTIAL LINE FORWARD\0\0PARTIAL LINE DOWN\0\0PLD\0"# - '\x008C'# -> "\0\5\0\0\45\28PARTIAL LINE BACKWARD\0\0PARTIAL LINE UP\0\0PLU\0"# - '\x008D'# -> "\0\5\0\0\39\24REVERSE LINE FEED\0\0REVERSE INDEX\0\0RI\0"# - '\x008E'# -> "\0\5\0\0\39\23SINGLE SHIFT TWO\0\0SINGLE-SHIFT-2\0\0SS2\0"# - '\x008F'# -> "\0\5\0\0\41\25SINGLE SHIFT THREE\0\0SINGLE-SHIFT-3\0\0SS3\0"# - '\x0090'# -> "\0\5\0\0\28\0DEVICE CONTROL STRING\0\0DCS\0"# - '\x0091'# -> "\0\5\0\0\37\22PRIVATE USE ONE\0\0PRIVATE USE-1\0\0PU1\0"# - '\x0092'# -> "\0\5\0\0\37\22PRIVATE USE TWO\0\0PRIVATE USE-2\0\0PU2\0"# - '\x0093'# -> "\0\5\0\0\25\0SET TRANSMIT STATE\0\0STS\0"# - '\x0094'# -> "\0\5\0\0\23\0CANCEL CHARACTER\0\0CCH\0"# - '\x0095'# -> "\0\5\0\0\22\0MESSAGE WAITING\0\0MW\0"# - '\x0096'# -> "\0\5\0\0\53\28START OF GUARDED AREA\0\0START OF PROTECTED AREA\0\0SPA\0"# - '\x0097'# -> "\0\5\0\0\49\26END OF GUARDED AREA\0\0END OF PROTECTED AREA\0\0EPA\0"# - '\x0098'# -> "\0\5\0\0\22\0START OF STRING\0\0SOS\0"# - '\x0099'# -> "\0\0\0\5\42\0SINGLE GRAPHIC CHARACTER INTRODUCER\0\0SGC\0"# - '\x009A'# -> "\0\5\0\0\34\0SINGLE CHARACTER INTRODUCER\0\0SCI\0"# - '\x009B'# -> "\0\5\0\0\34\0CONTROL SEQUENCE INTRODUCER\0\0CSI\0"# - '\x009C'# -> "\0\5\0\0\24\0STRING TERMINATOR\0\0ST\0"# - '\x009D'# -> "\0\5\0\0\31\0OPERATING SYSTEM COMMAND\0\0OSC\0"# - '\x009E'# -> "\0\5\0\0\22\0PRIVACY MESSAGE\0\0PM\0"# - '\x009F'# -> "\0\5\0\0\34\0APPLICATION PROGRAM COMMAND\0\0APC\0"# - '\x00A0'# -> "\0\0\0\0\5\0NBSP\0"# - '\x00AD'# -> "\0\0\0\0\5\0SHY\0"# - '\x01A2'# -> "\5\0\0\0\0\0LATIN CAPITAL LETTER GHA\0"# - '\x01A3'# -> "\5\0\0\0\0\0LATIN SMALL LETTER GHA\0"# - '\x034F'# -> "\0\0\0\0\5\0CGJ\0"# - '\x0616'# -> "\5\0\0\0\0\0ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE\0"# - '\x061C'# -> "\0\0\0\0\5\0ALM\0"# - '\x0709'# -> "\5\0\0\0\0\0SYRIAC SUBLINEAR COLON SKEWED LEFT\0"# - '\x0CDE'# -> "\5\0\0\0\0\0KANNADA LETTER LLLA\0"# - '\x0E9D'# -> "\5\0\0\0\0\0LAO LETTER FO FON\0"# - '\x0E9F'# -> "\5\0\0\0\0\0LAO LETTER FO FAY\0"# - '\x0EA3'# -> "\5\0\0\0\0\0LAO LETTER RO\0"# - '\x0EA5'# -> "\5\0\0\0\0\0LAO LETTER LO\0"# - '\x0FD0'# -> "\5\0\0\0\0\0TIBETAN MARK BKA- SHOG GI MGO RGYAN\0"# - '\x11EC'# -> "\5\0\0\0\0\0HANGUL JONGSEONG YESIEUNG-KIYEOK\0"# - '\x11ED'# -> "\5\0\0\0\0\0HANGUL JONGSEONG YESIEUNG-SSANGKIYEOK\0"# - '\x11EE'# -> "\5\0\0\0\0\0HANGUL JONGSEONG SSANGYESIEUNG\0"# - '\x11EF'# -> "\5\0\0\0\0\0HANGUL JONGSEONG YESIEUNG-KHIEUKH\0"# - '\x180B'# -> "\0\0\0\0\5\0FVS1\0"# - '\x180C'# -> "\0\0\0\0\5\0FVS2\0"# - '\x180D'# -> "\0\0\0\0\5\0FVS3\0"# - '\x180E'# -> "\0\0\0\0\5\0MVS\0"# - '\x180F'# -> "\0\0\0\0\5\0FVS4\0"# - '\x1BBD'# -> "\5\0\0\0\0\0SUNDANESE LETTER ARCHAIC I\0"# - '\x200B'# -> "\0\0\0\0\5\0ZWSP\0"# - '\x200C'# -> "\0\0\0\0\5\0ZWNJ\0"# - '\x200D'# -> "\0\0\0\0\5\0ZWJ\0"# - '\x200E'# -> "\0\0\0\0\5\0LRM\0"# - '\x200F'# -> "\0\0\0\0\5\0RLM\0"# - '\x202A'# -> "\0\0\0\0\5\0LRE\0"# - '\x202B'# -> "\0\0\0\0\5\0RLE\0"# - '\x202C'# -> "\0\0\0\0\5\0PDF\0"# - '\x202D'# -> "\0\0\0\0\5\0LRO\0"# - '\x202E'# -> "\0\0\0\0\5\0RLO\0"# - '\x202F'# -> "\0\0\0\0\5\0NNBSP\0"# - '\x205F'# -> "\0\0\0\0\5\0MMSP\0"# - '\x2060'# -> "\0\0\0\0\5\0WJ\0"# - '\x2066'# -> "\0\0\0\0\5\0LRI\0"# - '\x2067'# -> "\0\0\0\0\5\0RLI\0"# - '\x2068'# -> "\0\0\0\0\5\0FSI\0"# - '\x2069'# -> "\0\0\0\0\5\0PDI\0"# - '\x2118'# -> "\5\0\0\0\0\0WEIERSTRASS ELLIPTIC FUNCTION\0"# - '\x2448'# -> "\5\0\0\0\0\0MICR ON US SYMBOL\0"# - '\x2449'# -> "\5\0\0\0\0\0MICR DASH SYMBOL\0"# - '\x2B7A'# -> "\5\0\0\0\0\0LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE\0"# - '\x2B7C'# -> "\5\0\0\0\0\0RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE\0"# - '\xA015'# -> "\5\0\0\0\0\0YI SYLLABLE ITERATION MARK\0"# - '\xAA6E'# -> "\5\0\0\0\0\0MYANMAR LETTER KHAMTI LLA\0"# - '\xFE00'# -> "\0\0\0\0\5\0VS1\0"# - '\xFE01'# -> "\0\0\0\0\5\0VS2\0"# - '\xFE02'# -> "\0\0\0\0\5\0VS3\0"# - '\xFE03'# -> "\0\0\0\0\5\0VS4\0"# - '\xFE04'# -> "\0\0\0\0\5\0VS5\0"# - '\xFE05'# -> "\0\0\0\0\5\0VS6\0"# - '\xFE06'# -> "\0\0\0\0\5\0VS7\0"# - '\xFE07'# -> "\0\0\0\0\5\0VS8\0"# - '\xFE08'# -> "\0\0\0\0\5\0VS9\0"# - '\xFE09'# -> "\0\0\0\0\5\0VS10\0"# - '\xFE0A'# -> "\0\0\0\0\5\0VS11\0"# - '\xFE0B'# -> "\0\0\0\0\5\0VS12\0"# - '\xFE0C'# -> "\0\0\0\0\5\0VS13\0"# - '\xFE0D'# -> "\0\0\0\0\5\0VS14\0"# - '\xFE0E'# -> "\0\0\0\0\5\0VS15\0"# - '\xFE0F'# -> "\0\0\0\0\5\0VS16\0"# - '\xFE18'# -> "\5\0\0\0\0\0PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET\0"# - '\xFEFF'# -> "\0\0\5\0\22\0BYTE ORDER MARK\0\27BOM\0\0ZWNBSP\0"# - '\x122D4'# -> "\5\0\0\0\0\0CUNEIFORM SIGN NU11 TENU\0"# - '\x122D5'# -> "\5\0\0\0\0\0CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR\0"# - '\x16E56'# -> "\5\0\0\0\0\0MEDEFAIDRIN CAPITAL LETTER H\0"# - '\x16E57'# -> "\5\0\0\0\0\0MEDEFAIDRIN CAPITAL LETTER NG\0"# - '\x16E76'# -> "\5\0\0\0\0\0MEDEFAIDRIN SMALL LETTER H\0"# - '\x16E77'# -> "\5\0\0\0\0\0MEDEFAIDRIN SMALL LETTER NG\0"# - '\x1B001'# -> "\5\0\0\0\0\0HENTAIGANA LETTER E-1\0"# - '\x1D0C5'# -> "\5\0\0\0\0\0BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS\0"# - '\xE0100'# -> "\0\0\0\0\5\0VS17\0"# - '\xE0101'# -> "\0\0\0\0\5\0VS18\0"# - '\xE0102'# -> "\0\0\0\0\5\0VS19\0"# - '\xE0103'# -> "\0\0\0\0\5\0VS20\0"# - '\xE0104'# -> "\0\0\0\0\5\0VS21\0"# - '\xE0105'# -> "\0\0\0\0\5\0VS22\0"# - '\xE0106'# -> "\0\0\0\0\5\0VS23\0"# - '\xE0107'# -> "\0\0\0\0\5\0VS24\0"# - '\xE0108'# -> "\0\0\0\0\5\0VS25\0"# - '\xE0109'# -> "\0\0\0\0\5\0VS26\0"# - '\xE010A'# -> "\0\0\0\0\5\0VS27\0"# - '\xE010B'# -> "\0\0\0\0\5\0VS28\0"# - '\xE010C'# -> "\0\0\0\0\5\0VS29\0"# - '\xE010D'# -> "\0\0\0\0\5\0VS30\0"# - '\xE010E'# -> "\0\0\0\0\5\0VS31\0"# - '\xE010F'# -> "\0\0\0\0\5\0VS32\0"# - '\xE0110'# -> "\0\0\0\0\5\0VS33\0"# - '\xE0111'# -> "\0\0\0\0\5\0VS34\0"# - '\xE0112'# -> "\0\0\0\0\5\0VS35\0"# - '\xE0113'# -> "\0\0\0\0\5\0VS36\0"# - '\xE0114'# -> "\0\0\0\0\5\0VS37\0"# - '\xE0115'# -> "\0\0\0\0\5\0VS38\0"# - '\xE0116'# -> "\0\0\0\0\5\0VS39\0"# - '\xE0117'# -> "\0\0\0\0\5\0VS40\0"# - '\xE0118'# -> "\0\0\0\0\5\0VS41\0"# - '\xE0119'# -> "\0\0\0\0\5\0VS42\0"# - '\xE011A'# -> "\0\0\0\0\5\0VS43\0"# - '\xE011B'# -> "\0\0\0\0\5\0VS44\0"# - '\xE011C'# -> "\0\0\0\0\5\0VS45\0"# - '\xE011D'# -> "\0\0\0\0\5\0VS46\0"# - '\xE011E'# -> "\0\0\0\0\5\0VS47\0"# - '\xE011F'# -> "\0\0\0\0\5\0VS48\0"# - '\xE0120'# -> "\0\0\0\0\5\0VS49\0"# - '\xE0121'# -> "\0\0\0\0\5\0VS50\0"# - '\xE0122'# -> "\0\0\0\0\5\0VS51\0"# - '\xE0123'# -> "\0\0\0\0\5\0VS52\0"# - '\xE0124'# -> "\0\0\0\0\5\0VS53\0"# - '\xE0125'# -> "\0\0\0\0\5\0VS54\0"# - '\xE0126'# -> "\0\0\0\0\5\0VS55\0"# - '\xE0127'# -> "\0\0\0\0\5\0VS56\0"# - '\xE0128'# -> "\0\0\0\0\5\0VS57\0"# - '\xE0129'# -> "\0\0\0\0\5\0VS58\0"# - '\xE012A'# -> "\0\0\0\0\5\0VS59\0"# - '\xE012B'# -> "\0\0\0\0\5\0VS60\0"# - '\xE012C'# -> "\0\0\0\0\5\0VS61\0"# - '\xE012D'# -> "\0\0\0\0\5\0VS62\0"# - '\xE012E'# -> "\0\0\0\0\5\0VS63\0"# - '\xE012F'# -> "\0\0\0\0\5\0VS64\0"# - '\xE0130'# -> "\0\0\0\0\5\0VS65\0"# - '\xE0131'# -> "\0\0\0\0\5\0VS66\0"# - '\xE0132'# -> "\0\0\0\0\5\0VS67\0"# - '\xE0133'# -> "\0\0\0\0\5\0VS68\0"# - '\xE0134'# -> "\0\0\0\0\5\0VS69\0"# - '\xE0135'# -> "\0\0\0\0\5\0VS70\0"# - '\xE0136'# -> "\0\0\0\0\5\0VS71\0"# - '\xE0137'# -> "\0\0\0\0\5\0VS72\0"# - '\xE0138'# -> "\0\0\0\0\5\0VS73\0"# - '\xE0139'# -> "\0\0\0\0\5\0VS74\0"# - '\xE013A'# -> "\0\0\0\0\5\0VS75\0"# - '\xE013B'# -> "\0\0\0\0\5\0VS76\0"# - '\xE013C'# -> "\0\0\0\0\5\0VS77\0"# - '\xE013D'# -> "\0\0\0\0\5\0VS78\0"# - '\xE013E'# -> "\0\0\0\0\5\0VS79\0"# - '\xE013F'# -> "\0\0\0\0\5\0VS80\0"# - '\xE0140'# -> "\0\0\0\0\5\0VS81\0"# - '\xE0141'# -> "\0\0\0\0\5\0VS82\0"# - '\xE0142'# -> "\0\0\0\0\5\0VS83\0"# - '\xE0143'# -> "\0\0\0\0\5\0VS84\0"# - '\xE0144'# -> "\0\0\0\0\5\0VS85\0"# - '\xE0145'# -> "\0\0\0\0\5\0VS86\0"# - '\xE0146'# -> "\0\0\0\0\5\0VS87\0"# - '\xE0147'# -> "\0\0\0\0\5\0VS88\0"# - '\xE0148'# -> "\0\0\0\0\5\0VS89\0"# - '\xE0149'# -> "\0\0\0\0\5\0VS90\0"# - '\xE014A'# -> "\0\0\0\0\5\0VS91\0"# - '\xE014B'# -> "\0\0\0\0\5\0VS92\0"# - '\xE014C'# -> "\0\0\0\0\5\0VS93\0"# - '\xE014D'# -> "\0\0\0\0\5\0VS94\0"# - '\xE014E'# -> "\0\0\0\0\5\0VS95\0"# - '\xE014F'# -> "\0\0\0\0\5\0VS96\0"# - '\xE0150'# -> "\0\0\0\0\5\0VS97\0"# - '\xE0151'# -> "\0\0\0\0\5\0VS98\0"# - '\xE0152'# -> "\0\0\0\0\5\0VS99\0"# - '\xE0153'# -> "\0\0\0\0\5\0VS100\0"# - '\xE0154'# -> "\0\0\0\0\5\0VS101\0"# - '\xE0155'# -> "\0\0\0\0\5\0VS102\0"# - '\xE0156'# -> "\0\0\0\0\5\0VS103\0"# - '\xE0157'# -> "\0\0\0\0\5\0VS104\0"# - '\xE0158'# -> "\0\0\0\0\5\0VS105\0"# - '\xE0159'# -> "\0\0\0\0\5\0VS106\0"# - '\xE015A'# -> "\0\0\0\0\5\0VS107\0"# - '\xE015B'# -> "\0\0\0\0\5\0VS108\0"# - '\xE015C'# -> "\0\0\0\0\5\0VS109\0"# - '\xE015D'# -> "\0\0\0\0\5\0VS110\0"# - '\xE015E'# -> "\0\0\0\0\5\0VS111\0"# - '\xE015F'# -> "\0\0\0\0\5\0VS112\0"# - '\xE0160'# -> "\0\0\0\0\5\0VS113\0"# - '\xE0161'# -> "\0\0\0\0\5\0VS114\0"# - '\xE0162'# -> "\0\0\0\0\5\0VS115\0"# - '\xE0163'# -> "\0\0\0\0\5\0VS116\0"# - '\xE0164'# -> "\0\0\0\0\5\0VS117\0"# - '\xE0165'# -> "\0\0\0\0\5\0VS118\0"# - '\xE0166'# -> "\0\0\0\0\5\0VS119\0"# - '\xE0167'# -> "\0\0\0\0\5\0VS120\0"# - '\xE0168'# -> "\0\0\0\0\5\0VS121\0"# - '\xE0169'# -> "\0\0\0\0\5\0VS122\0"# - '\xE016A'# -> "\0\0\0\0\5\0VS123\0"# - '\xE016B'# -> "\0\0\0\0\5\0VS124\0"# - '\xE016C'# -> "\0\0\0\0\5\0VS125\0"# - '\xE016D'# -> "\0\0\0\0\5\0VS126\0"# - '\xE016E'# -> "\0\0\0\0\5\0VS127\0"# - '\xE016F'# -> "\0\0\0\0\5\0VS128\0"# - '\xE0170'# -> "\0\0\0\0\5\0VS129\0"# - '\xE0171'# -> "\0\0\0\0\5\0VS130\0"# - '\xE0172'# -> "\0\0\0\0\5\0VS131\0"# - '\xE0173'# -> "\0\0\0\0\5\0VS132\0"# - '\xE0174'# -> "\0\0\0\0\5\0VS133\0"# - '\xE0175'# -> "\0\0\0\0\5\0VS134\0"# - '\xE0176'# -> "\0\0\0\0\5\0VS135\0"# - '\xE0177'# -> "\0\0\0\0\5\0VS136\0"# - '\xE0178'# -> "\0\0\0\0\5\0VS137\0"# - '\xE0179'# -> "\0\0\0\0\5\0VS138\0"# - '\xE017A'# -> "\0\0\0\0\5\0VS139\0"# - '\xE017B'# -> "\0\0\0\0\5\0VS140\0"# - '\xE017C'# -> "\0\0\0\0\5\0VS141\0"# - '\xE017D'# -> "\0\0\0\0\5\0VS142\0"# - '\xE017E'# -> "\0\0\0\0\5\0VS143\0"# - '\xE017F'# -> "\0\0\0\0\5\0VS144\0"# - '\xE0180'# -> "\0\0\0\0\5\0VS145\0"# - '\xE0181'# -> "\0\0\0\0\5\0VS146\0"# - '\xE0182'# -> "\0\0\0\0\5\0VS147\0"# - '\xE0183'# -> "\0\0\0\0\5\0VS148\0"# - '\xE0184'# -> "\0\0\0\0\5\0VS149\0"# - '\xE0185'# -> "\0\0\0\0\5\0VS150\0"# - '\xE0186'# -> "\0\0\0\0\5\0VS151\0"# - '\xE0187'# -> "\0\0\0\0\5\0VS152\0"# - '\xE0188'# -> "\0\0\0\0\5\0VS153\0"# - '\xE0189'# -> "\0\0\0\0\5\0VS154\0"# - '\xE018A'# -> "\0\0\0\0\5\0VS155\0"# - '\xE018B'# -> "\0\0\0\0\5\0VS156\0"# - '\xE018C'# -> "\0\0\0\0\5\0VS157\0"# - '\xE018D'# -> "\0\0\0\0\5\0VS158\0"# - '\xE018E'# -> "\0\0\0\0\5\0VS159\0"# - '\xE018F'# -> "\0\0\0\0\5\0VS160\0"# - '\xE0190'# -> "\0\0\0\0\5\0VS161\0"# - '\xE0191'# -> "\0\0\0\0\5\0VS162\0"# - '\xE0192'# -> "\0\0\0\0\5\0VS163\0"# - '\xE0193'# -> "\0\0\0\0\5\0VS164\0"# - '\xE0194'# -> "\0\0\0\0\5\0VS165\0"# - '\xE0195'# -> "\0\0\0\0\5\0VS166\0"# - '\xE0196'# -> "\0\0\0\0\5\0VS167\0"# - '\xE0197'# -> "\0\0\0\0\5\0VS168\0"# - '\xE0198'# -> "\0\0\0\0\5\0VS169\0"# - '\xE0199'# -> "\0\0\0\0\5\0VS170\0"# - '\xE019A'# -> "\0\0\0\0\5\0VS171\0"# - '\xE019B'# -> "\0\0\0\0\5\0VS172\0"# - '\xE019C'# -> "\0\0\0\0\5\0VS173\0"# - '\xE019D'# -> "\0\0\0\0\5\0VS174\0"# - '\xE019E'# -> "\0\0\0\0\5\0VS175\0"# - '\xE019F'# -> "\0\0\0\0\5\0VS176\0"# - '\xE01A0'# -> "\0\0\0\0\5\0VS177\0"# - '\xE01A1'# -> "\0\0\0\0\5\0VS178\0"# - '\xE01A2'# -> "\0\0\0\0\5\0VS179\0"# - '\xE01A3'# -> "\0\0\0\0\5\0VS180\0"# - '\xE01A4'# -> "\0\0\0\0\5\0VS181\0"# - '\xE01A5'# -> "\0\0\0\0\5\0VS182\0"# - '\xE01A6'# -> "\0\0\0\0\5\0VS183\0"# - '\xE01A7'# -> "\0\0\0\0\5\0VS184\0"# - '\xE01A8'# -> "\0\0\0\0\5\0VS185\0"# - '\xE01A9'# -> "\0\0\0\0\5\0VS186\0"# - '\xE01AA'# -> "\0\0\0\0\5\0VS187\0"# - '\xE01AB'# -> "\0\0\0\0\5\0VS188\0"# - '\xE01AC'# -> "\0\0\0\0\5\0VS189\0"# - '\xE01AD'# -> "\0\0\0\0\5\0VS190\0"# - '\xE01AE'# -> "\0\0\0\0\5\0VS191\0"# - '\xE01AF'# -> "\0\0\0\0\5\0VS192\0"# - '\xE01B0'# -> "\0\0\0\0\5\0VS193\0"# - '\xE01B1'# -> "\0\0\0\0\5\0VS194\0"# - '\xE01B2'# -> "\0\0\0\0\5\0VS195\0"# - '\xE01B3'# -> "\0\0\0\0\5\0VS196\0"# - '\xE01B4'# -> "\0\0\0\0\5\0VS197\0"# - '\xE01B5'# -> "\0\0\0\0\5\0VS198\0"# - '\xE01B6'# -> "\0\0\0\0\5\0VS199\0"# - '\xE01B7'# -> "\0\0\0\0\5\0VS200\0"# - '\xE01B8'# -> "\0\0\0\0\5\0VS201\0"# - '\xE01B9'# -> "\0\0\0\0\5\0VS202\0"# - '\xE01BA'# -> "\0\0\0\0\5\0VS203\0"# - '\xE01BB'# -> "\0\0\0\0\5\0VS204\0"# - '\xE01BC'# -> "\0\0\0\0\5\0VS205\0"# - '\xE01BD'# -> "\0\0\0\0\5\0VS206\0"# - '\xE01BE'# -> "\0\0\0\0\5\0VS207\0"# - '\xE01BF'# -> "\0\0\0\0\5\0VS208\0"# - '\xE01C0'# -> "\0\0\0\0\5\0VS209\0"# - '\xE01C1'# -> "\0\0\0\0\5\0VS210\0"# - '\xE01C2'# -> "\0\0\0\0\5\0VS211\0"# - '\xE01C3'# -> "\0\0\0\0\5\0VS212\0"# - '\xE01C4'# -> "\0\0\0\0\5\0VS213\0"# - '\xE01C5'# -> "\0\0\0\0\5\0VS214\0"# - '\xE01C6'# -> "\0\0\0\0\5\0VS215\0"# - '\xE01C7'# -> "\0\0\0\0\5\0VS216\0"# - '\xE01C8'# -> "\0\0\0\0\5\0VS217\0"# - '\xE01C9'# -> "\0\0\0\0\5\0VS218\0"# - '\xE01CA'# -> "\0\0\0\0\5\0VS219\0"# - '\xE01CB'# -> "\0\0\0\0\5\0VS220\0"# - '\xE01CC'# -> "\0\0\0\0\5\0VS221\0"# - '\xE01CD'# -> "\0\0\0\0\5\0VS222\0"# - '\xE01CE'# -> "\0\0\0\0\5\0VS223\0"# - '\xE01CF'# -> "\0\0\0\0\5\0VS224\0"# - '\xE01D0'# -> "\0\0\0\0\5\0VS225\0"# - '\xE01D1'# -> "\0\0\0\0\5\0VS226\0"# - '\xE01D2'# -> "\0\0\0\0\5\0VS227\0"# - '\xE01D3'# -> "\0\0\0\0\5\0VS228\0"# - '\xE01D4'# -> "\0\0\0\0\5\0VS229\0"# - '\xE01D5'# -> "\0\0\0\0\5\0VS230\0"# - '\xE01D6'# -> "\0\0\0\0\5\0VS231\0"# - '\xE01D7'# -> "\0\0\0\0\5\0VS232\0"# - '\xE01D8'# -> "\0\0\0\0\5\0VS233\0"# - '\xE01D9'# -> "\0\0\0\0\5\0VS234\0"# - '\xE01DA'# -> "\0\0\0\0\5\0VS235\0"# - '\xE01DB'# -> "\0\0\0\0\5\0VS236\0"# - '\xE01DC'# -> "\0\0\0\0\5\0VS237\0"# - '\xE01DD'# -> "\0\0\0\0\5\0VS238\0"# - '\xE01DE'# -> "\0\0\0\0\5\0VS239\0"# - '\xE01DF'# -> "\0\0\0\0\5\0VS240\0"# - '\xE01E0'# -> "\0\0\0\0\5\0VS241\0"# - '\xE01E1'# -> "\0\0\0\0\5\0VS242\0"# - '\xE01E2'# -> "\0\0\0\0\5\0VS243\0"# - '\xE01E3'# -> "\0\0\0\0\5\0VS244\0"# - '\xE01E4'# -> "\0\0\0\0\5\0VS245\0"# - '\xE01E5'# -> "\0\0\0\0\5\0VS246\0"# - '\xE01E6'# -> "\0\0\0\0\5\0VS247\0"# - '\xE01E7'# -> "\0\0\0\0\5\0VS248\0"# - '\xE01E8'# -> "\0\0\0\0\5\0VS249\0"# - '\xE01E9'# -> "\0\0\0\0\5\0VS250\0"# - '\xE01EA'# -> "\0\0\0\0\5\0VS251\0"# - '\xE01EB'# -> "\0\0\0\0\5\0VS252\0"# - '\xE01EC'# -> "\0\0\0\0\5\0VS253\0"# - '\xE01ED'# -> "\0\0\0\0\5\0VS254\0"# - '\xE01EE'# -> "\0\0\0\0\5\0VS255\0"# - '\xE01EF'# -> "\0\0\0\0\5\0VS256\0"# - + '\x0000'# -> "\0\5\0\0\11\4NULL\0\3NUL\0\0"# + '\x0001'# -> "\0\5\0\0\23\16START OF HEADING\0\3SOH\0\0"# + '\x0002'# -> "\0\5\0\0\20\13START OF TEXT\0\3STX\0\0"# + '\x0003'# -> "\0\5\0\0\18\11END OF TEXT\0\3ETX\0\0"# + '\x0004'# -> "\0\5\0\0\26\19END OF TRANSMISSION\0\3EOT\0\0"# + '\x0005'# -> "\0\5\0\0\14\7ENQUIRY\0\3ENQ\0\0"# + '\x0006'# -> "\0\5\0\0\18\11ACKNOWLEDGE\0\3ACK\0\0"# + '\x0007'# -> "\0\5\0\0\12\5ALERT\0\3BEL\0\0"# + '\x0008'# -> "\0\5\0\0\16\9BACKSPACE\0\2BS\0\0"# + '\x0009'# -> "\0\5\0\0\49\20CHARACTER TABULATION\21HORIZONTAL TABULATION\0\2HT\3TAB\0\0"# + '\x000A'# -> "\0\5\0\0\37\9LINE FEED\8NEW LINE\11END OF LINE\0\2LF\2NL\3EOL\0\0"# + '\x000B'# -> "\0\5\0\0\42\15LINE TABULATION\19VERTICAL TABULATION\0\2VT\0\0"# + '\x000C'# -> "\0\5\0\0\16\9FORM FEED\0\2FF\0\0"# + '\x000D'# -> "\0\5\0\0\22\15CARRIAGE RETURN\0\2CR\0\0"# + '\x000E'# -> "\0\5\0\0\34\9SHIFT OUT\17LOCKING-SHIFT ONE\0\2SO\0\0"# + '\x000F'# -> "\0\5\0\0\34\8SHIFT IN\18LOCKING-SHIFT ZERO\0\2SI\0\0"# + '\x0010'# -> "\0\5\0\0\23\16DATA LINK ESCAPE\0\3DLE\0\0"# + '\x0011'# -> "\0\5\0\0\25\18DEVICE CONTROL ONE\0\3DC1\0\0"# + '\x0012'# -> "\0\5\0\0\25\18DEVICE CONTROL TWO\0\3DC2\0\0"# + '\x0013'# -> "\0\5\0\0\27\20DEVICE CONTROL THREE\0\3DC3\0\0"# + '\x0014'# -> "\0\5\0\0\26\19DEVICE CONTROL FOUR\0\3DC4\0\0"# + '\x0015'# -> "\0\5\0\0\27\20NEGATIVE ACKNOWLEDGE\0\3NAK\0\0"# + '\x0016'# -> "\0\5\0\0\23\16SYNCHRONOUS IDLE\0\3SYN\0\0"# + '\x0017'# -> "\0\5\0\0\32\25END OF TRANSMISSION BLOCK\0\3ETB\0\0"# + '\x0018'# -> "\0\5\0\0\13\6CANCEL\0\3CAN\0\0"# + '\x0019'# -> "\0\5\0\0\20\13END OF MEDIUM\0\3EOM\2EM\0\0"# + '\x001A'# -> "\0\5\0\0\17\10SUBSTITUTE\0\3SUB\0\0"# + '\x001B'# -> "\0\5\0\0\13\6ESCAPE\0\3ESC\0\0"# + '\x001C'# -> "\0\5\0\0\48\26INFORMATION SEPARATOR FOUR\14FILE SEPARATOR\0\2FS\0\0"# + '\x001D'# -> "\0\5\0\0\50\27INFORMATION SEPARATOR THREE\15GROUP SEPARATOR\0\2GS\0\0"# + '\x001E'# -> "\0\5\0\0\49\25INFORMATION SEPARATOR TWO\16RECORD SEPARATOR\0\2RS\0\0"# + '\x001F'# -> "\0\5\0\0\47\25INFORMATION SEPARATOR ONE\14UNIT SEPARATOR\0\2US\0\0"# + '\x0020'# -> "\0\0\0\0\5\2SP\0\0"# + '\x007F'# -> "\0\5\0\0\13\6DELETE\0\3DEL\0\0"# + '\x0080'# -> "\0\0\0\5\24\17PADDING CHARACTER\0\3PAD\0\0"# + '\x0081'# -> "\0\0\0\5\24\17HIGH OCTET PRESET\0\3HOP\0\0"# + '\x0082'# -> "\0\5\0\0\27\20BREAK PERMITTED HERE\0\3BPH\0\0"# + '\x0083'# -> "\0\5\0\0\20\13NO BREAK HERE\0\3NBH\0\0"# + '\x0084'# -> "\0\5\0\0\12\5INDEX\0\3IND\0\0"# + '\x0085'# -> "\0\5\0\0\16\9NEXT LINE\0\3NEL\0\0"# + '\x0086'# -> "\0\5\0\0\29\22START OF SELECTED AREA\0\3SSA\0\0"# + '\x0087'# -> "\0\5\0\0\27\20END OF SELECTED AREA\0\3ESA\0\0"# + '\x0088'# -> "\0\5\0\0\57\24CHARACTER TABULATION SET\25HORIZONTAL TABULATION SET\0\3HTS\0\0"# + '\x0089'# -> "\0\5\0\0\87\39CHARACTER TABULATION WITH JUSTIFICATION\40HORIZONTAL TABULATION WITH JUSTIFICATION\0\3HTJ\0\0"# + '\x008A'# -> "\0\5\0\0\50\19LINE TABULATION SET\23VERTICAL TABULATION SET\0\3VTS\0\0"# + '\x008B'# -> "\0\5\0\0\45\20PARTIAL LINE FORWARD\17PARTIAL LINE DOWN\0\3PLD\0\0"# + '\x008C'# -> "\0\5\0\0\44\21PARTIAL LINE BACKWARD\15PARTIAL LINE UP\0\3PLU\0\0"# + '\x008D'# -> "\0\5\0\0\38\17REVERSE LINE FEED\13REVERSE INDEX\0\2RI\0\0"# + '\x008E'# -> "\0\5\0\0\38\16SINGLE SHIFT TWO\14SINGLE-SHIFT-2\0\3SS2\0\0"# + '\x008F'# -> "\0\5\0\0\40\18SINGLE SHIFT THREE\14SINGLE-SHIFT-3\0\3SS3\0\0"# + '\x0090'# -> "\0\5\0\0\28\21DEVICE CONTROL STRING\0\3DCS\0\0"# + '\x0091'# -> "\0\5\0\0\36\15PRIVATE USE ONE\13PRIVATE USE-1\0\3PU1\0\0"# + '\x0092'# -> "\0\5\0\0\36\15PRIVATE USE TWO\13PRIVATE USE-2\0\3PU2\0\0"# + '\x0093'# -> "\0\5\0\0\25\18SET TRANSMIT STATE\0\3STS\0\0"# + '\x0094'# -> "\0\5\0\0\23\16CANCEL CHARACTER\0\3CCH\0\0"# + '\x0095'# -> "\0\5\0\0\22\15MESSAGE WAITING\0\2MW\0\0"# + '\x0096'# -> "\0\5\0\0\52\21START OF GUARDED AREA\23START OF PROTECTED AREA\0\3SPA\0\0"# + '\x0097'# -> "\0\5\0\0\48\19END OF GUARDED AREA\21END OF PROTECTED AREA\0\3EPA\0\0"# + '\x0098'# -> "\0\5\0\0\22\15START OF STRING\0\3SOS\0\0"# + '\x0099'# -> "\0\0\0\5\42\35SINGLE GRAPHIC CHARACTER INTRODUCER\0\3SGC\0\0"# + '\x009A'# -> "\0\5\0\0\34\27SINGLE CHARACTER INTRODUCER\0\3SCI\0\0"# + '\x009B'# -> "\0\5\0\0\34\27CONTROL SEQUENCE INTRODUCER\0\3CSI\0\0"# + '\x009C'# -> "\0\5\0\0\24\17STRING TERMINATOR\0\2ST\0\0"# + '\x009D'# -> "\0\5\0\0\31\24OPERATING SYSTEM COMMAND\0\3OSC\0\0"# + '\x009E'# -> "\0\5\0\0\22\15PRIVACY MESSAGE\0\2PM\0\0"# + '\x009F'# -> "\0\5\0\0\34\27APPLICATION PROGRAM COMMAND\0\3APC\0\0"# + '\x00A0'# -> "\0\0\0\0\5\4NBSP\0\0"# + '\x00AD'# -> "\0\0\0\0\5\3SHY\0\0"# + '\x01A2'# -> "\5\0\0\0\0\24LATIN CAPITAL LETTER GHA\0\0"# + '\x01A3'# -> "\5\0\0\0\0\22LATIN SMALL LETTER GHA\0\0"# + '\x034F'# -> "\0\0\0\0\5\3CGJ\0\0"# + '\x0616'# -> "\5\0\0\0\0\47ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE\0\0"# + '\x061C'# -> "\0\0\0\0\5\3ALM\0\0"# + '\x0709'# -> "\5\0\0\0\0\34SYRIAC SUBLINEAR COLON SKEWED LEFT\0\0"# + '\x0CDE'# -> "\5\0\0\0\0\19KANNADA LETTER LLLA\0\0"# + '\x0E9D'# -> "\5\0\0\0\0\17LAO LETTER FO FON\0\0"# + '\x0E9F'# -> "\5\0\0\0\0\17LAO LETTER FO FAY\0\0"# + '\x0EA3'# -> "\5\0\0\0\0\13LAO LETTER RO\0\0"# + '\x0EA5'# -> "\5\0\0\0\0\13LAO LETTER LO\0\0"# + '\x0FD0'# -> "\5\0\0\0\0\35TIBETAN MARK BKA- SHOG GI MGO RGYAN\0\0"# + '\x11EC'# -> "\5\0\0\0\0\32HANGUL JONGSEONG YESIEUNG-KIYEOK\0\0"# + '\x11ED'# -> "\5\0\0\0\0\37HANGUL JONGSEONG YESIEUNG-SSANGKIYEOK\0\0"# + '\x11EE'# -> "\5\0\0\0\0\30HANGUL JONGSEONG SSANGYESIEUNG\0\0"# + '\x11EF'# -> "\5\0\0\0\0\33HANGUL JONGSEONG YESIEUNG-KHIEUKH\0\0"# + '\x180B'# -> "\0\0\0\0\5\4FVS1\0\0"# + '\x180C'# -> "\0\0\0\0\5\4FVS2\0\0"# + '\x180D'# -> "\0\0\0\0\5\4FVS3\0\0"# + '\x180E'# -> "\0\0\0\0\5\3MVS\0\0"# + '\x180F'# -> "\0\0\0\0\5\4FVS4\0\0"# + '\x1BBD'# -> "\5\0\0\0\0\26SUNDANESE LETTER ARCHAIC I\0\0"# + '\x200B'# -> "\0\0\0\0\5\4ZWSP\0\0"# + '\x200C'# -> "\0\0\0\0\5\4ZWNJ\0\0"# + '\x200D'# -> "\0\0\0\0\5\3ZWJ\0\0"# + '\x200E'# -> "\0\0\0\0\5\3LRM\0\0"# + '\x200F'# -> "\0\0\0\0\5\3RLM\0\0"# + '\x202A'# -> "\0\0\0\0\5\3LRE\0\0"# + '\x202B'# -> "\0\0\0\0\5\3RLE\0\0"# + '\x202C'# -> "\0\0\0\0\5\3PDF\0\0"# + '\x202D'# -> "\0\0\0\0\5\3LRO\0\0"# + '\x202E'# -> "\0\0\0\0\5\3RLO\0\0"# + '\x202F'# -> "\0\0\0\0\5\5NNBSP\0\0"# + '\x205F'# -> "\0\0\0\0\5\4MMSP\0\0"# + '\x2060'# -> "\0\0\0\0\5\2WJ\0\0"# + '\x2066'# -> "\0\0\0\0\5\3LRI\0\0"# + '\x2067'# -> "\0\0\0\0\5\3RLI\0\0"# + '\x2068'# -> "\0\0\0\0\5\3FSI\0\0"# + '\x2069'# -> "\0\0\0\0\5\3PDI\0\0"# + '\x2118'# -> "\5\0\0\0\0\29WEIERSTRASS ELLIPTIC FUNCTION\0\0"# + '\x2448'# -> "\5\0\0\0\0\17MICR ON US SYMBOL\0\0"# + '\x2449'# -> "\5\0\0\0\0\16MICR DASH SYMBOL\0\0"# + '\x2B7A'# -> "\5\0\0\0\0\59LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE\0\0"# + '\x2B7C'# -> "\5\0\0\0\0\60RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE\0\0"# + '\xA015'# -> "\5\0\0\0\0\26YI SYLLABLE ITERATION MARK\0\0"# + '\xAA6E'# -> "\5\0\0\0\0\25MYANMAR LETTER KHAMTI LLA\0\0"# + '\xFE00'# -> "\0\0\0\0\5\3VS1\0\0"# + '\xFE01'# -> "\0\0\0\0\5\3VS2\0\0"# + '\xFE02'# -> "\0\0\0\0\5\3VS3\0\0"# + '\xFE03'# -> "\0\0\0\0\5\3VS4\0\0"# + '\xFE04'# -> "\0\0\0\0\5\3VS5\0\0"# + '\xFE05'# -> "\0\0\0\0\5\3VS6\0\0"# + '\xFE06'# -> "\0\0\0\0\5\3VS7\0\0"# + '\xFE07'# -> "\0\0\0\0\5\3VS8\0\0"# + '\xFE08'# -> "\0\0\0\0\5\3VS9\0\0"# + '\xFE09'# -> "\0\0\0\0\5\4VS10\0\0"# + '\xFE0A'# -> "\0\0\0\0\5\4VS11\0\0"# + '\xFE0B'# -> "\0\0\0\0\5\4VS12\0\0"# + '\xFE0C'# -> "\0\0\0\0\5\4VS13\0\0"# + '\xFE0D'# -> "\0\0\0\0\5\4VS14\0\0"# + '\xFE0E'# -> "\0\0\0\0\5\4VS15\0\0"# + '\xFE0F'# -> "\0\0\0\0\5\4VS16\0\0"# + '\xFE18'# -> "\5\0\0\0\0\61PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET\0\0"# + '\xFEFF'# -> "\0\0\5\0\22\15BYTE ORDER MARK\0\3BOM\6ZWNBSP\0\0"# + '\x122D4'# -> "\5\0\0\0\0\24CUNEIFORM SIGN NU11 TENU\0\0"# + '\x122D5'# -> "\5\0\0\0\0\42CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR\0\0"# + '\x16E56'# -> "\5\0\0\0\0\28MEDEFAIDRIN CAPITAL LETTER H\0\0"# + '\x16E57'# -> "\5\0\0\0\0\29MEDEFAIDRIN CAPITAL LETTER NG\0\0"# + '\x16E76'# -> "\5\0\0\0\0\26MEDEFAIDRIN SMALL LETTER H\0\0"# + '\x16E77'# -> "\5\0\0\0\0\27MEDEFAIDRIN SMALL LETTER NG\0\0"# + '\x1B001'# -> "\5\0\0\0\0\21HENTAIGANA LETTER E-1\0\0"# + '\x1D0C5'# -> "\5\0\0\0\0\52BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS\0\0"# + '\xE0100'# -> "\0\0\0\0\5\4VS17\0\0"# + '\xE0101'# -> "\0\0\0\0\5\4VS18\0\0"# + '\xE0102'# -> "\0\0\0\0\5\4VS19\0\0"# + '\xE0103'# -> "\0\0\0\0\5\4VS20\0\0"# + '\xE0104'# -> "\0\0\0\0\5\4VS21\0\0"# + '\xE0105'# -> "\0\0\0\0\5\4VS22\0\0"# + '\xE0106'# -> "\0\0\0\0\5\4VS23\0\0"# + '\xE0107'# -> "\0\0\0\0\5\4VS24\0\0"# + '\xE0108'# -> "\0\0\0\0\5\4VS25\0\0"# + '\xE0109'# -> "\0\0\0\0\5\4VS26\0\0"# + '\xE010A'# -> "\0\0\0\0\5\4VS27\0\0"# + '\xE010B'# -> "\0\0\0\0\5\4VS28\0\0"# + '\xE010C'# -> "\0\0\0\0\5\4VS29\0\0"# + '\xE010D'# -> "\0\0\0\0\5\4VS30\0\0"# + '\xE010E'# -> "\0\0\0\0\5\4VS31\0\0"# + '\xE010F'# -> "\0\0\0\0\5\4VS32\0\0"# + '\xE0110'# -> "\0\0\0\0\5\4VS33\0\0"# + '\xE0111'# -> "\0\0\0\0\5\4VS34\0\0"# + '\xE0112'# -> "\0\0\0\0\5\4VS35\0\0"# + '\xE0113'# -> "\0\0\0\0\5\4VS36\0\0"# + '\xE0114'# -> "\0\0\0\0\5\4VS37\0\0"# + '\xE0115'# -> "\0\0\0\0\5\4VS38\0\0"# + '\xE0116'# -> "\0\0\0\0\5\4VS39\0\0"# + '\xE0117'# -> "\0\0\0\0\5\4VS40\0\0"# + '\xE0118'# -> "\0\0\0\0\5\4VS41\0\0"# + '\xE0119'# -> "\0\0\0\0\5\4VS42\0\0"# + '\xE011A'# -> "\0\0\0\0\5\4VS43\0\0"# + '\xE011B'# -> "\0\0\0\0\5\4VS44\0\0"# + '\xE011C'# -> "\0\0\0\0\5\4VS45\0\0"# + '\xE011D'# -> "\0\0\0\0\5\4VS46\0\0"# + '\xE011E'# -> "\0\0\0\0\5\4VS47\0\0"# + '\xE011F'# -> "\0\0\0\0\5\4VS48\0\0"# + '\xE0120'# -> "\0\0\0\0\5\4VS49\0\0"# + '\xE0121'# -> "\0\0\0\0\5\4VS50\0\0"# + '\xE0122'# -> "\0\0\0\0\5\4VS51\0\0"# + '\xE0123'# -> "\0\0\0\0\5\4VS52\0\0"# + '\xE0124'# -> "\0\0\0\0\5\4VS53\0\0"# + '\xE0125'# -> "\0\0\0\0\5\4VS54\0\0"# + '\xE0126'# -> "\0\0\0\0\5\4VS55\0\0"# + '\xE0127'# -> "\0\0\0\0\5\4VS56\0\0"# + '\xE0128'# -> "\0\0\0\0\5\4VS57\0\0"# + '\xE0129'# -> "\0\0\0\0\5\4VS58\0\0"# + '\xE012A'# -> "\0\0\0\0\5\4VS59\0\0"# + '\xE012B'# -> "\0\0\0\0\5\4VS60\0\0"# + '\xE012C'# -> "\0\0\0\0\5\4VS61\0\0"# + '\xE012D'# -> "\0\0\0\0\5\4VS62\0\0"# + '\xE012E'# -> "\0\0\0\0\5\4VS63\0\0"# + '\xE012F'# -> "\0\0\0\0\5\4VS64\0\0"# + '\xE0130'# -> "\0\0\0\0\5\4VS65\0\0"# + '\xE0131'# -> "\0\0\0\0\5\4VS66\0\0"# + '\xE0132'# -> "\0\0\0\0\5\4VS67\0\0"# + '\xE0133'# -> "\0\0\0\0\5\4VS68\0\0"# + '\xE0134'# -> "\0\0\0\0\5\4VS69\0\0"# + '\xE0135'# -> "\0\0\0\0\5\4VS70\0\0"# + '\xE0136'# -> "\0\0\0\0\5\4VS71\0\0"# + '\xE0137'# -> "\0\0\0\0\5\4VS72\0\0"# + '\xE0138'# -> "\0\0\0\0\5\4VS73\0\0"# + '\xE0139'# -> "\0\0\0\0\5\4VS74\0\0"# + '\xE013A'# -> "\0\0\0\0\5\4VS75\0\0"# + '\xE013B'# -> "\0\0\0\0\5\4VS76\0\0"# + '\xE013C'# -> "\0\0\0\0\5\4VS77\0\0"# + '\xE013D'# -> "\0\0\0\0\5\4VS78\0\0"# + '\xE013E'# -> "\0\0\0\0\5\4VS79\0\0"# + '\xE013F'# -> "\0\0\0\0\5\4VS80\0\0"# + '\xE0140'# -> "\0\0\0\0\5\4VS81\0\0"# + '\xE0141'# -> "\0\0\0\0\5\4VS82\0\0"# + '\xE0142'# -> "\0\0\0\0\5\4VS83\0\0"# + '\xE0143'# -> "\0\0\0\0\5\4VS84\0\0"# + '\xE0144'# -> "\0\0\0\0\5\4VS85\0\0"# + '\xE0145'# -> "\0\0\0\0\5\4VS86\0\0"# + '\xE0146'# -> "\0\0\0\0\5\4VS87\0\0"# + '\xE0147'# -> "\0\0\0\0\5\4VS88\0\0"# + '\xE0148'# -> "\0\0\0\0\5\4VS89\0\0"# + '\xE0149'# -> "\0\0\0\0\5\4VS90\0\0"# + '\xE014A'# -> "\0\0\0\0\5\4VS91\0\0"# + '\xE014B'# -> "\0\0\0\0\5\4VS92\0\0"# + '\xE014C'# -> "\0\0\0\0\5\4VS93\0\0"# + '\xE014D'# -> "\0\0\0\0\5\4VS94\0\0"# + '\xE014E'# -> "\0\0\0\0\5\4VS95\0\0"# + '\xE014F'# -> "\0\0\0\0\5\4VS96\0\0"# + '\xE0150'# -> "\0\0\0\0\5\4VS97\0\0"# + '\xE0151'# -> "\0\0\0\0\5\4VS98\0\0"# + '\xE0152'# -> "\0\0\0\0\5\4VS99\0\0"# + '\xE0153'# -> "\0\0\0\0\5\5VS100\0\0"# + '\xE0154'# -> "\0\0\0\0\5\5VS101\0\0"# + '\xE0155'# -> "\0\0\0\0\5\5VS102\0\0"# + '\xE0156'# -> "\0\0\0\0\5\5VS103\0\0"# + '\xE0157'# -> "\0\0\0\0\5\5VS104\0\0"# + '\xE0158'# -> "\0\0\0\0\5\5VS105\0\0"# + '\xE0159'# -> "\0\0\0\0\5\5VS106\0\0"# + '\xE015A'# -> "\0\0\0\0\5\5VS107\0\0"# + '\xE015B'# -> "\0\0\0\0\5\5VS108\0\0"# + '\xE015C'# -> "\0\0\0\0\5\5VS109\0\0"# + '\xE015D'# -> "\0\0\0\0\5\5VS110\0\0"# + '\xE015E'# -> "\0\0\0\0\5\5VS111\0\0"# + '\xE015F'# -> "\0\0\0\0\5\5VS112\0\0"# + '\xE0160'# -> "\0\0\0\0\5\5VS113\0\0"# + '\xE0161'# -> "\0\0\0\0\5\5VS114\0\0"# + '\xE0162'# -> "\0\0\0\0\5\5VS115\0\0"# + '\xE0163'# -> "\0\0\0\0\5\5VS116\0\0"# + '\xE0164'# -> "\0\0\0\0\5\5VS117\0\0"# + '\xE0165'# -> "\0\0\0\0\5\5VS118\0\0"# + '\xE0166'# -> "\0\0\0\0\5\5VS119\0\0"# + '\xE0167'# -> "\0\0\0\0\5\5VS120\0\0"# + '\xE0168'# -> "\0\0\0\0\5\5VS121\0\0"# + '\xE0169'# -> "\0\0\0\0\5\5VS122\0\0"# + '\xE016A'# -> "\0\0\0\0\5\5VS123\0\0"# + '\xE016B'# -> "\0\0\0\0\5\5VS124\0\0"# + '\xE016C'# -> "\0\0\0\0\5\5VS125\0\0"# + '\xE016D'# -> "\0\0\0\0\5\5VS126\0\0"# + '\xE016E'# -> "\0\0\0\0\5\5VS127\0\0"# + '\xE016F'# -> "\0\0\0\0\5\5VS128\0\0"# + '\xE0170'# -> "\0\0\0\0\5\5VS129\0\0"# + '\xE0171'# -> "\0\0\0\0\5\5VS130\0\0"# + '\xE0172'# -> "\0\0\0\0\5\5VS131\0\0"# + '\xE0173'# -> "\0\0\0\0\5\5VS132\0\0"# + '\xE0174'# -> "\0\0\0\0\5\5VS133\0\0"# + '\xE0175'# -> "\0\0\0\0\5\5VS134\0\0"# + '\xE0176'# -> "\0\0\0\0\5\5VS135\0\0"# + '\xE0177'# -> "\0\0\0\0\5\5VS136\0\0"# + '\xE0178'# -> "\0\0\0\0\5\5VS137\0\0"# + '\xE0179'# -> "\0\0\0\0\5\5VS138\0\0"# + '\xE017A'# -> "\0\0\0\0\5\5VS139\0\0"# + '\xE017B'# -> "\0\0\0\0\5\5VS140\0\0"# + '\xE017C'# -> "\0\0\0\0\5\5VS141\0\0"# + '\xE017D'# -> "\0\0\0\0\5\5VS142\0\0"# + '\xE017E'# -> "\0\0\0\0\5\5VS143\0\0"# + '\xE017F'# -> "\0\0\0\0\5\5VS144\0\0"# + '\xE0180'# -> "\0\0\0\0\5\5VS145\0\0"# + '\xE0181'# -> "\0\0\0\0\5\5VS146\0\0"# + '\xE0182'# -> "\0\0\0\0\5\5VS147\0\0"# + '\xE0183'# -> "\0\0\0\0\5\5VS148\0\0"# + '\xE0184'# -> "\0\0\0\0\5\5VS149\0\0"# + '\xE0185'# -> "\0\0\0\0\5\5VS150\0\0"# + '\xE0186'# -> "\0\0\0\0\5\5VS151\0\0"# + '\xE0187'# -> "\0\0\0\0\5\5VS152\0\0"# + '\xE0188'# -> "\0\0\0\0\5\5VS153\0\0"# + '\xE0189'# -> "\0\0\0\0\5\5VS154\0\0"# + '\xE018A'# -> "\0\0\0\0\5\5VS155\0\0"# + '\xE018B'# -> "\0\0\0\0\5\5VS156\0\0"# + '\xE018C'# -> "\0\0\0\0\5\5VS157\0\0"# + '\xE018D'# -> "\0\0\0\0\5\5VS158\0\0"# + '\xE018E'# -> "\0\0\0\0\5\5VS159\0\0"# + '\xE018F'# -> "\0\0\0\0\5\5VS160\0\0"# + '\xE0190'# -> "\0\0\0\0\5\5VS161\0\0"# + '\xE0191'# -> "\0\0\0\0\5\5VS162\0\0"# + '\xE0192'# -> "\0\0\0\0\5\5VS163\0\0"# + '\xE0193'# -> "\0\0\0\0\5\5VS164\0\0"# + '\xE0194'# -> "\0\0\0\0\5\5VS165\0\0"# + '\xE0195'# -> "\0\0\0\0\5\5VS166\0\0"# + '\xE0196'# -> "\0\0\0\0\5\5VS167\0\0"# + '\xE0197'# -> "\0\0\0\0\5\5VS168\0\0"# + '\xE0198'# -> "\0\0\0\0\5\5VS169\0\0"# + '\xE0199'# -> "\0\0\0\0\5\5VS170\0\0"# + '\xE019A'# -> "\0\0\0\0\5\5VS171\0\0"# + '\xE019B'# -> "\0\0\0\0\5\5VS172\0\0"# + '\xE019C'# -> "\0\0\0\0\5\5VS173\0\0"# + '\xE019D'# -> "\0\0\0\0\5\5VS174\0\0"# + '\xE019E'# -> "\0\0\0\0\5\5VS175\0\0"# + '\xE019F'# -> "\0\0\0\0\5\5VS176\0\0"# + '\xE01A0'# -> "\0\0\0\0\5\5VS177\0\0"# + '\xE01A1'# -> "\0\0\0\0\5\5VS178\0\0"# + '\xE01A2'# -> "\0\0\0\0\5\5VS179\0\0"# + '\xE01A3'# -> "\0\0\0\0\5\5VS180\0\0"# + '\xE01A4'# -> "\0\0\0\0\5\5VS181\0\0"# + '\xE01A5'# -> "\0\0\0\0\5\5VS182\0\0"# + '\xE01A6'# -> "\0\0\0\0\5\5VS183\0\0"# + '\xE01A7'# -> "\0\0\0\0\5\5VS184\0\0"# + '\xE01A8'# -> "\0\0\0\0\5\5VS185\0\0"# + '\xE01A9'# -> "\0\0\0\0\5\5VS186\0\0"# + '\xE01AA'# -> "\0\0\0\0\5\5VS187\0\0"# + '\xE01AB'# -> "\0\0\0\0\5\5VS188\0\0"# + '\xE01AC'# -> "\0\0\0\0\5\5VS189\0\0"# + '\xE01AD'# -> "\0\0\0\0\5\5VS190\0\0"# + '\xE01AE'# -> "\0\0\0\0\5\5VS191\0\0"# + '\xE01AF'# -> "\0\0\0\0\5\5VS192\0\0"# + '\xE01B0'# -> "\0\0\0\0\5\5VS193\0\0"# + '\xE01B1'# -> "\0\0\0\0\5\5VS194\0\0"# + '\xE01B2'# -> "\0\0\0\0\5\5VS195\0\0"# + '\xE01B3'# -> "\0\0\0\0\5\5VS196\0\0"# + '\xE01B4'# -> "\0\0\0\0\5\5VS197\0\0"# + '\xE01B5'# -> "\0\0\0\0\5\5VS198\0\0"# + '\xE01B6'# -> "\0\0\0\0\5\5VS199\0\0"# + '\xE01B7'# -> "\0\0\0\0\5\5VS200\0\0"# + '\xE01B8'# -> "\0\0\0\0\5\5VS201\0\0"# + '\xE01B9'# -> "\0\0\0\0\5\5VS202\0\0"# + '\xE01BA'# -> "\0\0\0\0\5\5VS203\0\0"# + '\xE01BB'# -> "\0\0\0\0\5\5VS204\0\0"# + '\xE01BC'# -> "\0\0\0\0\5\5VS205\0\0"# + '\xE01BD'# -> "\0\0\0\0\5\5VS206\0\0"# + '\xE01BE'# -> "\0\0\0\0\5\5VS207\0\0"# + '\xE01BF'# -> "\0\0\0\0\5\5VS208\0\0"# + '\xE01C0'# -> "\0\0\0\0\5\5VS209\0\0"# + '\xE01C1'# -> "\0\0\0\0\5\5VS210\0\0"# + '\xE01C2'# -> "\0\0\0\0\5\5VS211\0\0"# + '\xE01C3'# -> "\0\0\0\0\5\5VS212\0\0"# + '\xE01C4'# -> "\0\0\0\0\5\5VS213\0\0"# + '\xE01C5'# -> "\0\0\0\0\5\5VS214\0\0"# + '\xE01C6'# -> "\0\0\0\0\5\5VS215\0\0"# + '\xE01C7'# -> "\0\0\0\0\5\5VS216\0\0"# + '\xE01C8'# -> "\0\0\0\0\5\5VS217\0\0"# + '\xE01C9'# -> "\0\0\0\0\5\5VS218\0\0"# + '\xE01CA'# -> "\0\0\0\0\5\5VS219\0\0"# + '\xE01CB'# -> "\0\0\0\0\5\5VS220\0\0"# + '\xE01CC'# -> "\0\0\0\0\5\5VS221\0\0"# + '\xE01CD'# -> "\0\0\0\0\5\5VS222\0\0"# + '\xE01CE'# -> "\0\0\0\0\5\5VS223\0\0"# + '\xE01CF'# -> "\0\0\0\0\5\5VS224\0\0"# + '\xE01D0'# -> "\0\0\0\0\5\5VS225\0\0"# + '\xE01D1'# -> "\0\0\0\0\5\5VS226\0\0"# + '\xE01D2'# -> "\0\0\0\0\5\5VS227\0\0"# + '\xE01D3'# -> "\0\0\0\0\5\5VS228\0\0"# + '\xE01D4'# -> "\0\0\0\0\5\5VS229\0\0"# + '\xE01D5'# -> "\0\0\0\0\5\5VS230\0\0"# + '\xE01D6'# -> "\0\0\0\0\5\5VS231\0\0"# + '\xE01D7'# -> "\0\0\0\0\5\5VS232\0\0"# + '\xE01D8'# -> "\0\0\0\0\5\5VS233\0\0"# + '\xE01D9'# -> "\0\0\0\0\5\5VS234\0\0"# + '\xE01DA'# -> "\0\0\0\0\5\5VS235\0\0"# + '\xE01DB'# -> "\0\0\0\0\5\5VS236\0\0"# + '\xE01DC'# -> "\0\0\0\0\5\5VS237\0\0"# + '\xE01DD'# -> "\0\0\0\0\5\5VS238\0\0"# + '\xE01DE'# -> "\0\0\0\0\5\5VS239\0\0"# + '\xE01DF'# -> "\0\0\0\0\5\5VS240\0\0"# + '\xE01E0'# -> "\0\0\0\0\5\5VS241\0\0"# + '\xE01E1'# -> "\0\0\0\0\5\5VS242\0\0"# + '\xE01E2'# -> "\0\0\0\0\5\5VS243\0\0"# + '\xE01E3'# -> "\0\0\0\0\5\5VS244\0\0"# + '\xE01E4'# -> "\0\0\0\0\5\5VS245\0\0"# + '\xE01E5'# -> "\0\0\0\0\5\5VS246\0\0"# + '\xE01E6'# -> "\0\0\0\0\5\5VS247\0\0"# + '\xE01E7'# -> "\0\0\0\0\5\5VS248\0\0"# + '\xE01E8'# -> "\0\0\0\0\5\5VS249\0\0"# + '\xE01E9'# -> "\0\0\0\0\5\5VS250\0\0"# + '\xE01EA'# -> "\0\0\0\0\5\5VS251\0\0"# + '\xE01EB'# -> "\0\0\0\0\5\5VS252\0\0"# + '\xE01EC'# -> "\0\0\0\0\5\5VS253\0\0"# + '\xE01ED'# -> "\0\0\0\0\5\5VS254\0\0"# + '\xE01EE'# -> "\0\0\0\0\5\5VS255\0\0"# + '\xE01EF'# -> "\0\0\0\0\5\5VS256\0\0"# _ -> "\xff"# diff --git a/unicode-data-names/test/Unicode/Char/General/NamesSpec.hs b/unicode-data-names/test/Unicode/Char/General/NamesSpec.hs index ba027318..3d99977e 100644 --- a/unicode-data-names/test/Unicode/Char/General/NamesSpec.hs +++ b/unicode-data-names/test/Unicode/Char/General/NamesSpec.hs @@ -9,10 +9,10 @@ import Unicode.Char.General ( generalCategory, GeneralCategory(NotAssigned, Surrogate, PrivateUse) ) import Unicode.Char.General.Names - ( correctedName, name, nameOrAlias ) + ( NameAliasType (..), correctedName, name, nameOrAlias, nameAliasesWithTypes, nameAliases, nameAliasesByType ) import qualified Unicode.Internal.Char.UnicodeData.DerivedName as DerivedName import Data.Foldable (traverse_) -import Test.Hspec ( Spec, it, shouldBe, shouldSatisfy ) +import Test.Hspec ( Spec, it, shouldBe, shouldSatisfy, describe ) spec :: Spec spec = do @@ -85,6 +85,44 @@ spec = do -- Last name defined, as of Unicode 15.0.0 nameOrAlias '\xe01ef' `shouldBe` Just "VARIATION SELECTOR-256" nameOrAlias maxBound `shouldBe` Nothing + it "nameAliasesWithTypes: test some characters" do + nameAliasesWithTypes '\0' `shouldBe` + [(Control, ["NULL"]), (Abbreviation, ["NUL"])] + nameAliasesWithTypes '\x0A' `shouldBe` + [(Control, ["LINE FEED", "NEW LINE", "END OF LINE"]) + ,(Abbreviation, ["LF", "NL", "EOL"])] + nameAliasesWithTypes '\x80' `shouldBe` + [(Figment, ["PADDING CHARACTER"]), (Abbreviation, ["PAD"])] + nameAliasesWithTypes '\x01A2' `shouldBe` + [(Correction, ["LATIN CAPITAL LETTER GHA"])] + nameAliasesWithTypes '\xFEFF' `shouldBe` + [(Alternate, ["BYTE ORDER MARK"]), (Abbreviation, ["BOM", "ZWNBSP"])] + nameAliasesWithTypes '\xE01EF' `shouldBe` + [(Abbreviation, ["VS256"])] + it "nameAliasesByType" do + let f c = foldr + (\t -> case nameAliasesByType t c of {[] -> id;xs -> ((t,xs):)}) + mempty + [minBound..maxBound] + check c = f c == nameAliasesWithTypes c + traverse_ (`shouldSatisfy` check) [minBound..maxBound] + describe "nameAliases" do + it "test some characters" do + nameAliases '\0' `shouldBe` + ["NULL", "NUL"] + nameAliases '\x0A' `shouldBe` + ["LINE FEED", "NEW LINE", "END OF LINE", "LF", "NL", "EOL"] + nameAliases '\x80' `shouldBe` + ["PADDING CHARACTER", "PAD"] + nameAliases '\x01A2' `shouldBe` + ["LATIN CAPITAL LETTER GHA"] + nameAliases '\xFEFF' `shouldBe` + ["BYTE ORDER MARK", "BOM", "ZWNBSP"] + nameAliases '\xE01EF' `shouldBe` + ["VS256"] + it "compare to nameAliasesWithTypes" do + let check c = nameAliases c == mconcat (snd <$> nameAliasesWithTypes c) + traverse_ (`shouldSatisfy` check) [minBound..maxBound] it "Every defined character has at least a name or an alias" do let checkName c = case nameOrAlias c of Just _ -> True @@ -93,5 +131,4 @@ spec = do PrivateUse -> True NotAssigned -> True _ -> False - traverse_ (`shouldSatisfy` checkName) - [minBound..maxBound] + traverse_ (`shouldSatisfy` checkName) [minBound..maxBound] diff --git a/unicode-data-names/unicode-data-names.cabal b/unicode-data-names/unicode-data-names.cabal index dffd94ef..f3efd9b9 100644 --- a/unicode-data-names/unicode-data-names.cabal +++ b/unicode-data-names/unicode-data-names.cabal @@ -168,10 +168,11 @@ benchmark bench hs-source-dirs: bench main-is: Main.hs build-depends: - base >= 4.7 && < 4.19, - deepseq >= 1.1 && < 1.5, - tasty-bench >= 0.2.5 && < 0.4, - tasty >= 1.4.1 && < 1.5, + base >= 4.7 && < 4.19, + deepseq >= 1.1 && < 1.5, + tasty-bench >= 0.2.5 && < 0.4, + tasty >= 1.4.1 && < 1.5, + unicode-data >= 0.4 && < 0.5, unicode-data-names if flag(has-text) cpp-options: -DHAS_TEXT