Parsing and encoding of BCP47 language tags

[[ 🗃 ^zry1r language-tags ]] :: [📥 Inbox] [📤 Outbox] [🐤 Followers] [🤝 Collaborators] [🛠 Changes]

Clone

HTTPS: darcs clone https://vervis.peers.community/repos/zry1r

SSH: darcs clone USERNAME@vervis.peers.community:zry1r

Tags

TODO

src /

generate.hs

{- This file is part of language-tags.
 -
 - Written in 2018 by fr33domlover <fr33domlover@riseup.net>.
 -
 - ♡ Copying is an act of love. Please copy, reuse and share.
 -
 - The author(s) have dedicated all copyright and related and neighboring
 - rights to this software to the public domain worldwide. This software is
 - distributed without any warranty.
 -
 - You should have received a copy of the CC0 Public Domain Dedication along
 - with this software. If not, see
 - <http://creativecommons.org/publicdomain/zero/1.0/>.
 -}

import Data.Char (isLetter)
import Data.LanguageSubtagRegistry
import Data.List.NonEmpty (NonEmpty)
import Data.Text (Text)
import Language.Haskell.Exts.Pretty (prettyPrint)
import Language.Haskell.Exts.Syntax

import qualified Data.List.NonEmpty as N
import qualified Data.Text as T
import qualified Data.Text.IO as TIO (readFile)

prependUnderscore :: Text -> Text
prependUnderscore t =
    case T.uncons t of
        Nothing     -> t
        Just (c, _) ->
            if isLetter c
                then t
                else T.cons '_' t

subtagCon :: Text -> Text
subtagCon = prependUnderscore . T.concat . map T.toTitle . T.split (== '-')

enumTypeDecl :: String -> NonEmpty String -> l -> Decl l
enumTypeDecl n cs l =
    DataDecl l
        (DataType l)              -- @data@, not @newtype@
        Nothing                   -- context
        (DHead l (Ident l n))     -- type constructor
        (N.toList $ N.map con cs) -- data constructors
        []                        -- deriving
    where
    con c =
        QualConDecl l
            Nothing         -- forall
            Nothing         -- context
            (ConDecl l
                (Ident l c) -- name
                []          -- arg types
            )

moduleDecl :: String -> String -> Decl l -> l -> Module l
moduleDecl m n d l =
    Module l
        (Just
            (ModuleHead l
                (ModuleName l m)            -- name
                Nothing                     -- warning
                (Just
                    (ExportSpecList l
                        [EThingWith l       -- export list
                            (EWildcard l 0)
                            (UnQual l
                                (Ident l n)
                            )
                            []
                        ]
                    )
                )
            )
        )
        []                                  -- pragmas
        []                                  -- imports
        [d]                                 -- declarations

private :: Record -> Bool
private r = T.pack "Private use" `elem` recordDescription r

grabTag :: TagType -> [Record] -> NonEmpty Text
grabTag t =
    let match (Left t') = t == t'
        match (Right _) = False
        p r = match (recordType r) && not (private r)
    in  N.fromList . map recordTag . filter p

grabSubtag :: SubtagType -> [Record] -> NonEmpty Text
grabSubtag t =
    let match (Left _)   = False
        match (Right t') = t == t'
        p r = match (recordType r) && not (private r)
    in  N.fromList . map recordTag . filter p

writeModule :: String -> NonEmpty Text -> IO ()
writeModule typ tags = do
    let path = "src/Data/LanguageTag/Generated/" ++ typ ++ ".hs"
        moduleName = "Data.LanguageTag.Generated." ++ typ
        vals = N.map (T.unpack . subtagCon) tags
        decl = enumTypeDecl typ vals ()
        comment = "-- This file is auto generated\n"
        content = prettyPrint $ moduleDecl moduleName typ decl ()
    writeFile path $ comment ++ content
    putStrLn $ "Wrote " ++ typ ++ " to " ++ moduleName

writeTagModule :: TagType -> [Record] -> IO ()
writeTagModule t rs = writeModule (show t) (grabTag t rs)

writeSubtagModule :: SubtagType -> [Record] -> IO ()
writeSubtagModule t rs = writeModule (show t ++ "Reg") (grabSubtag t rs)

main :: IO ()
main = do
    reg <- TIO.readFile "data/language-subtag-registry"
    let rs =
            case parseRegistry reg of
                Left e -> error $ "Failed to parse registry: " ++ T.unpack e
                Right r -> registryTags r
    writeTagModule Grandfathered rs
    writeTagModule Redundant rs
    writeSubtagModule Language rs
    writeSubtagModule ExtLang rs
    writeSubtagModule Script rs
    writeSubtagModule Region rs
    writeSubtagModule Variant rs
[See repo JSON]