Eventually-decentralized project hosting and management platform

[[ 🗃 ^WvWbo vervis ]] :: [📥 Inbox] [📤 Outbox] [🐤 Followers] [🤝 Collaborators] [🛠 Changes]

Clone

HTTPS: darcs clone https://vervis.peers.community/repos/WvWbo

SSH: darcs clone USERNAME@vervis.peers.community:WvWbo

Tags

TODO

src / Web /

Text.hs

{- This file is part of Vervis.
 -
 - Written in 2016, 2018, 2019, 2022 by fr33domlover <fr33domlover@riseup.net>.
 -
 - ♡ Copying is an act of love. Please copy, reuse and share.
 -
 - The author(s) have dedicated all copyright and related and neighboring
 - rights to this software to the public domain worldwide. This software is
 - distributed without any warranty.
 -
 - You should have received a copy of the CC0 Public Domain Dedication along
 - with this software. If not, see
 - <http://creativecommons.org/publicdomain/zero/1.0/>.
 -}

module Web.Text
    ( HTML ()
    , PandocMarkdown ()
    , Escaped ()
    , renderHTML
    , markupHTML
    , renderPandocMarkdown
    , pandocMarkdownFromText
    , encodeEntities
    , decodeEntities
    )
where

import Control.Exception
import Data.Aeson
import Data.Bifunctor
import Data.Text (Text)
import Database.Persist
import Database.Persist.Sql
import HTMLEntities.Decoder
import Text.Blaze (preEscapedText)
import Text.Blaze.Html (Html)
import Text.Blaze.Html.Renderer.Text
import Text.HTML.SanitizeXSS
import Text.Pandoc.Class (runPure)
import Text.Pandoc.Highlighting
import Text.Pandoc.Options
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Writers.HTML

import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified HTMLEntities.Text as HET

newtype HTML = HTML { unHTML :: Text }
    deriving (ToJSON, PersistField, PersistFieldSql)

instance FromJSON HTML where
    parseJSON = fmap (HTML . sanitizeBalance) . parseJSON

newtype PandocMarkdown = PandocMarkdown { _unPandocMarkdown :: Text }
    deriving (FromJSON, ToJSON, PersistField, PersistFieldSql)

newtype Escaped = Escaped { unEscaped :: Text }
    deriving (ToJSON, PersistField, PersistFieldSql)

escape :: Text -> Text
escape = HET.text

unescape :: Text -> Text
unescape = TL.toStrict . TLB.toLazyText . htmlEncodedText

instance FromJSON Escaped where
    parseJSON =
        withText "Escaped" $ \ t ->
            let decoded = unescape t
            in  if escape decoded == t
                    then return $ Escaped t
                    else fail "HTML contains more than just HTML-escaped plain text"

renderHTML :: Html -> HTML
renderHTML = HTML . TL.toStrict . renderHtml

markupHTML :: HTML -> Html
markupHTML = preEscapedText . unHTML

readerOptions :: ReaderOptions
readerOptions = def
    { readerExtensions            = pandocExtensions
    , readerStandalone            = False
    , readerColumns               = 80
    , readerTabStop               = 4
--  , readerIndentedCodeClasses   = []
--  , readerAbbreviations         = defaultAbbrevs
--  , readerDefaultImageExtension = ""
--  , readerTrackChanges          = AcceptChanges
--  , readerStripComments         = False
    }

writerOptions :: WriterOptions
writerOptions = def
    {
--    writerTemplate          = Nothing
--  , writerVariables         = []
      writerTabStop           = 4
    , writerTableOfContents   = True
--  , writerIncremental       = False
--  , writerHTMLMathMethod    = PlainMath
--  , writerNumberSections    = False
--  , writerNumberOffset      = [0,0,0,0,0,0]
--  , writerSectionDivs       = False
    , writerExtensions        = pandocExtensions
--  , writerReferenceLinks    = False
--  , writerDpi               = 96
    , writerWrapText          = WrapAuto
    , writerColumns           = 79
    , writerEmailObfuscation  = ReferenceObfuscation
--  , writerIdentifierPrefix  = ""
--  , writerCiteMethod        = Citeproc
--  , writerHtmlQTags         = False
--  , writerSlideLevel        = Nothing
--  , writerTopLevelDivision  = TopLevelDefault
--  , writerListings          = False
    , writerHighlightStyle    = Just tango
--  , writerSetextHeaders     = True
--  , writerEpubSubdirectory  = "EPUB"
--  , writerEpubMetadata      = Nothing
--  , writerEpubFonts         = []
--  , writerEpubChapterLevel  = 1
--  , writerTOCDepth          = 3
--  , writerReferenceDoc      = Nothing
--  , writerReferenceLocation = EndOfDocument
--  , writerSyntaxMap         = defaultSyntaxMap
    }

renderPandocMarkdown :: PandocMarkdown -> Either Text HTML
renderPandocMarkdown (PandocMarkdown input) = do
    doc <- runPure' $ readMarkdown readerOptions input
    HTML . sanitizeBalance . TL.toStrict . renderHtml <$>
        runPure' (writeHtml5 writerOptions doc)
    where
    runPure' = first (T.pack . displayException) . runPure

pandocMarkdownFromText :: Text -> PandocMarkdown
pandocMarkdownFromText = PandocMarkdown

encodeEntities :: Text -> Escaped
encodeEntities = Escaped . escape

decodeEntities :: Escaped -> Text
decodeEntities = unescape . unEscaped
[See repo JSON]