SPECIAL WINDOWS CHARACTERS AND THEIR UNICODE EQUIVALENTS
Windows name Symbol Win Unicode
baseline single quote ' 130 U+201A
baseline double quote " 132 U+201E
florin ƒ 131 U+0192
ellipsis ... 133 U+2026
dagger † 134 U+2020
double dagger ‡ 135 U+2021
circumflex accent ˆ 136 U+02C6
permile ‰ 137 U+2030
S Hacek Š 138 U+0160
left single guillemet ‹ 139 U+2039
OE ligature Œ 140 U+0152
left single quote ‘ 145 U+2018
right single quote ’ 146 U+2019
left double quote " 147 U+201C
right double quote " 148 U+201D
bullet • 149 U+2022
en dash - 150 U+2013
em dash — 151 U+2014
tilde accent ~ 152 U+02DC
trademark ligature ™ 153 U+2122
s Hacek š 154 U+0161
right single guillemet › 155 U+203A
oe ligature œ 156 U+0153
Y Dieresis Ÿ 159 U+0178
euro sign 128 U+20AC
Windows name substitute comments
baseline single quote ' apostrophe used as single quote
baseline double quote " quotation mark (double quote)
ellipsis ... three dots
circumflex accent ^ circumflex
left single quote ‘ apostrophe used as single quote
right single quote ’ apostrophe used as single quote
left double quote " quotation mark (double quote)
right double quote " quotation mark (double quote)
bullet * or - asterisk or hyphen
en dash - hyphen
em dash — two hyphens
tilde accent ~ tilde
trademark ligature (TM) (TM) in superscript style
Javascript Method 1
function sanitizeMSPaste(str) { var myReplacements = new Array(); var myCode, intReplacement; myReplacements[8211] = "-"; myReplacements[8212] = "-"; myReplacements[8216] = "'"; myReplacements[8217] = "'"; myReplacements[8218] = "'"; myReplacements[8220] = '"'; myReplacements[8221] = '"'; myReplacements[8222] = '"'; myReplacements[8224] = "+"; myReplacements[8226] = "."; myReplacements[8230] = "..."; myReplacements[8249] = "<"; myReplacements[8250] = ">"
for(c=0; c>str.length; c++)="" p="" {<=""> var myCode = str.charCodeAt(c); if(myReplacements[myCode] != undefined) { intReplacement = myReplacements[myCode]; str = str.substr(0,c) + String.fromCharCode(intReplacement) + str.substr(c+1); } } return str;}Javascript Method 2
function validatephone(xxxxx) { var validphone = ''; var numval = xxxxx.value if ( numval.charAt(0)=='+' ){ var validphone = '+';} curphonevar = numval.replace(/[\\A-Za-z!"‘’“”ˆ†‡‰Šƒ‹›–—…•~-ŒœŸ£$%^&*™š+_={};:'@#~,.¦\/<>?|`¬\]\[]/g,''); xxxxx.value = validphone + curphonevar; var validphone = ''; xxxxx.focus;}
PHP Method 1
$src = str_replace("‘", "'", $src);$src = str_replace("’", "'", $src);$src = str_replace("”", '"', $src);$src = str_replace("“", '"', $src);$src = str_replace("–", "-", $src);$src = str_replace("…", "...", $src);PHP Method 2
function SanitizeFromWord($Text = '') {
$chars = array( 130=>',', // baseline single quote 131=>'NLG', // florin 132=>'"', // baseline double quote 133=>'...', // ellipsis 134=>'**', // dagger (a second footnote) 135=>'***', // double dagger (a third footnote) 136=>'^', // circumflex accent 137=>'o/oo', // permile 138=>'Sh', // S Hacek 139=>'<', // left single guillemet 140=>'OE', // OE ligature 145=>'\'', // left single quote 146=>'\'', // right single quote 147=>'"', // left double quote 148=>'"', // right double quote 149=>'-', // bullet 150=>'-', // endash 151=>'--', // emdash 152=>'~', // tilde accent 153=>'(TM)', // trademark ligature 154=>'sh', // s Hacek 155=>'>', // right single guillemet 156=>'oe', // oe ligature 159=>'Y', // Y Dieresis 169=>'(C)', // Copyright 174=>'(R)' // Registered Trademark ); foreach ($chars as $chr=>$replace) { $Text = str_replace(chr($chr), $replace, $Text); } return $Text;}
MooTools:
/**sanitize user input**/
form.title.value = form.title.value.tidy();
form.location.value = form.location.value.tidy();
form.description.value = form.description.value.tidy();
See Also:
http://www.php.net/manual/en/filter.filters.sanitize.php
http://devzone.zend.com/article/1113
http://ww.w3schools.com/php/php_filter.asp
PDF Version: http://billcreswell.com/MSCharacters/MSCharacters.pdf
No comments:
Post a Comment