I need to create an after insert trigger in my SQL Server 2008 R2 database.
The trigger needs to take some fields from INSERTED, process them and pu
Encoding five special characters with recursive CTE:
DECLARE
@unsafe NVARCHAR(MAX),
@safe NVARCHAR(MAX)
--
-- Create the unsafe html string
--
SET @unsafe = N'html''s encoding "method" is <= or >= & 1234 ' + NCHAR(129)
--
-- Use a recursive CTE to iterate through each character in the string
--
;WITH cte AS
(
--
-- The first row will contain the original
-- string, an empty string to be used to
-- build the "safe" string, and a position
-- column to mark the character position
-- of the loop
--
SELECT
@unsafe AS unsafe_html,
CONVERT(NVARCHAR(MAX), '') AS safe_html,
1 AS pos
WHERE @unsafe IS NOT NULL AND LEN(@unsafe) > 0
UNION ALL
--
-- Create a loop:
-- The anchor row starts at position one.
-- Increment the position by one for each pass.
-- Stop when the position value is equal to the string lenth.
-- Evaluate the character in each string
-- If the ASCII value > 128, use the format.
-- Otherwise, check for 5 special characters: " & ' < >
-- Use the encoding reference or just the original character
--
SELECT
@unsafe AS unsafe_html,
CONVERT(NVARCHAR(MAX), safe_html +
CASE WHEN UNICODE(SUBSTRING(unsafe_html, pos, 1)) > 128
THEN '' + CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(unsafe_html, pos, 1)))
ELSE CASE SUBSTRING(unsafe_html, pos, 1)
WHEN '"' THEN '"'
WHEN '&' THEN '&'
WHEN '''' THEN '&apos'
WHEN '<' THEN '<'
WHEN '>' THEN '>'
ELSE SUBSTRING(unsafe_html, pos, 1)
END
END ) AS safe_html,
pos + 1 AS pos
FROM cte
WHERE pos <= LEN(@unsafe)
)
--
-- Each pass through the string creates a row in the CTE
-- The last row will have the position value of the string length + 1
-- Use that row as the safe html string
-- SQL Server allows a max recursion of 32767
--
SELECT @safe = (
SELECT safe_html
FROM cte
WHERE pos = LEN(@unsafe) + 1
)
OPTION (MAXRECURSION 32767)
SELECT @safe
-- html&aposs encoding "method" is <= or >= & 1234
Initial version:
DECLARE @s NVARCHAR(100)
SET @s = 'unsafe & safe'
SELECT @s
SELECT (SELECT @s FOR XML PATH(''))
---------------------------------------
unsafe & safe
-----------------------------------------
<html>unsafe & safe<html>
Full encoding with all official references:
DECLARE
@unsafe NVARCHAR(MAX),
@safe NVARCHAR(MAX)
-- Build string with first 10,000 unicode chars
SELECT @unsafe = COALESCE(@unsafe, '') + NCHAR(number) + ' '
FROM (
SELECT TOP 10000 ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2
) t
-- Build table variable with character entity references defined in HTML 4.0
-- Reference: http://www.htmlcodetutorial.com/characterentities_famsupp_69.html
DECLARE @t TABLE (
name NVARCHAR(25) NOT NULL,
unicode_val INT NOT NULL PRIMARY KEY
)
INSERT @t
VALUES
('"', 34),
('&', 38),
('&apos', 39),
('<', 60),
('>', 62),
(' ', 160),
('¡', 161),
('¢', 162),
('£', 163),
('¤', 164),
('¥', 165),
('¦', 166),
('§', 167),
('¨', 168),
('©', 169),
('ª', 170),
('«', 171),
('¬', 172),
('­', 173),
('®', 174),
('¯', 175),
('°', 176),
('±', 177),
('²', 178),
('³', 179),
('´', 180),
('µ', 181),
('¶', 182),
('·', 183),
('¸', 184),
('¹', 185),
('º', 186),
('»', 187),
('¼', 188),
('½', 189),
('¾', 190),
('¿', 191),
('À', 192),
('Á', 193),
('Â', 194),
('Ã', 195),
('Ä', 196),
('Å', 197),
('Æ', 198),
('Ç', 199),
('È', 200),
('É', 201),
('Ê', 202),
('Ë', 203),
('Ì', 204),
('Í', 205),
('Î', 206),
('Ï', 207),
('Ð', 208),
('Ñ', 209),
('Ò', 210),
('Ó', 211),
('Ô', 212),
('Õ', 213),
('Ö', 214),
('×', 215),
('Ø', 216),
('Ù', 217),
('Ú', 218),
('Û', 219),
('Ü', 220),
('Ý', 221),
('Þ', 222),
('ß', 223),
('à', 224),
('á', 225),
('â', 226),
('ã', 227),
('ä', 228),
('å', 229),
('æ', 230),
('ç', 231),
('è', 232),
('é', 233),
('ê', 234),
('ë', 235),
('ì', 236),
('í', 237),
('î', 238),
('ï', 239),
('ð', 240),
('ñ', 241),
('ò', 242),
('ó', 243),
('ô', 244),
('õ', 245),
('ö', 246),
('÷', 247),
('ø', 248),
('ù', 249),
('ú', 250),
('û', 251),
('ü', 252),
('ý', 253),
('þ', 254),
('ÿ', 255),
('&OElig', 338),
('&oelig', 339),
('&Scaron', 352),
('&scaron', 353),
('&Yuml', 376),
('&fnof', 402),
('&circ', 710),
('&tilde', 732),
('&Alpha', 913),
('&Beta', 914),
('&Gamma', 915),
('&Delta', 916),
('&Epsilon', 917),
('&Zeta', 918),
('&Eta', 919),
('&Theta', 920),
('&Iota', 921),
('&Kappa', 922),
('&Lambda', 923),
('&Mu', 924),
('&Nu', 925),
('&Xi', 926),
('&Omicron', 927),
('&Pi', 928),
('&Rho', 929),
('&Sigma', 931),
('&Tau', 932),
('&Upsilon', 933),
('&Phi', 934),
('&Chi', 935),
('&Psi', 936),
('&Omega', 937),
('&alpha', 945),
('&beta', 946),
('&gamma', 947),
('&delta', 948),
('&epsilon', 949),
('&zeta', 950),
('&eta', 951),
('&theta', 952),
('&iota', 953),
('&kappa', 954),
('&lambda', 955),
('&mu', 956),
('&nu', 957),
('&xi', 958),
('&omicron', 959),
('&pi', 960),
('&rho', 961),
('&sigmaf', 962),
('&sigma', 963),
('&tau', 964),
('&upsilon', 965),
('&phi', 966),
('&chi', 967),
('&psi', 968),
('&omega', 969),
('&thetasym', 977),
('&upsih', 978),
('&piv', 982),
('&ensp', 8194),
('&emsp', 8195),
('&thinsp', 8201),
('&zwnj', 8204),
('&zwj', 8205),
('&lrm', 8206),
('&rlm', 8207),
('&ndash', 8211),
('&mdash', 8212),
('&lsquo', 8216),
('&rsquo', 8217),
('&sbquo', 8218),
('&ldquo', 8220),
('&rdquo', 8221),
('&bdquo', 8222),
('&dagger', 8224),
('&Dagger', 8225),
('&bull', 8226),
('&hellip', 8230),
('&permil', 8240),
('&prime', 8242),
('&Prime', 8243),
('&lsaquo', 8249),
('&rsaquo', 8250),
('&oline', 8254),
('&frasl', 8260),
('&euro', 8364),
('&image', 8465),
('&weierp', 8472),
('&real', 8476),
('&trade', 8482),
('&alefsym', 8501),
('&larr', 8592),
('&uarr', 8593),
('&rarr', 8594),
('&darr', 8595),
('&harr', 8596),
('&crarr', 8629),
('&lArr', 8656),
('&uArr', 8657),
('&rArr', 8658),
('&dArr', 8659),
('&hArr', 8660),
('&forall', 8704),
('&part', 8706),
('&exist', 8707),
('&empty', 8709),
('&nabla', 8711),
('&isin', 8712),
('¬in', 8713),
('&ni', 8715),
('&prod', 8719),
('&sum', 8721),
('&minus', 8722),
('&lowast', 8727),
('&radic', 8730),
('&prop', 8733),
('&infin', 8734),
('&ang', 8736),
('&and', 8743),
('&or', 8744),
('&cap', 8745),
('&cup', 8746),
('&int', 8747),
('&there4', 8756),
('&sim', 8764),
('&cong', 8773),
('&asymp', 8776),
('&ne', 8800),
('&equiv', 8801),
('&le', 8804),
('&ge', 8805),
('&sub', 8834),
('&sup', 8835),
('&nsub', 8836),
('&sube', 8838),
('&supe', 8839),
('&oplus', 8853),
('&otimes', 8855),
('&perp', 8869),
('&sdot', 8901),
('&lceil', 8968),
('&rceil', 8969),
('&lfloor', 8970),
('&rfloor', 8971),
('&lang', 9001),
('&rang', 9002),
('&loz', 9674),
('&spades', 9824),
('&clubs', 9827),
('&hearts', 9829),
('&diams', 9830)
-- Build numbers table to parse the string
DECLARE @numbers TABLE (number INT NOT NULL PRIMARY KEY)
INSERT @numbers
SELECT TOP (LEN(@unsafe)) ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS number
FROM sys.all_objects s1 CROSS JOIN sys.all_objects s2
-- Use numbers table to parse each character.
-- If a match is found in character entity reference table,
-- then use the safe substitute. Otherwise, if the unicode
-- value is greater than 128, use .
-- Finally, use the original character if nothing else
-- is a match
SELECT @safe = COALESCE(@safe,'')
+ COALESCE(name,
CASE WHEN UNICODE(SUBSTRING(@unsafe, number, 1)) > 128 THEN ''
+ CONVERT(NVARCHAR(10), UNICODE(SUBSTRING(@unsafe, number, 1)))
ELSE SUBSTRING(@unsafe, number, 1) END)
FROM @numbers
LEFT OUTER JOIN @t
ON UNICODE(SUBSTRING(@unsafe, number, 1)) = unicode_val
SELECT @safe AS [safe]
Results:
! " # $ % & &apos ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ;
< = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
[ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z {
| } ~
 
¡ ¢ £ ¤ ¥ ¦ § ¨ © ª
« ¬ ­ ® ¯ ° ± ² ³ ´ µ
¶ · ¸ ¹ º » ¼ ½ ¾
¿ À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï Ð Ñ
Ò Ó Ô Õ Ö × Ø Ù Ú
Û Ü Ý Þ ß à á â ã ä
å æ ç è é ê ë ì í î
ï ð ñ ò ó ô õ ö ÷ ø
ù ú û ü ý þ ÿ Ā ā Ă ă
Ą ą Ć ć Ĉ ĉ Ċ...