SPECIAL WINDOWS CHARACTERS AND THEIR UNICODE EQUIVALENTS
Windows name Symbol Win Unicode
baseline single quote ' 130 U+201A
baseline double quote " 132 U+201E
florin ƒ 131 U+0192
ellipsis ... 133 U+2026
dagger † 134 U+2020
double dagger ‡ 135 U+2021
circumflex accent ˆ 136 U+02C6
permile ‰ 137 U+2030
S Hacek Š 138 U+0160
left single guillemet ‹ 139 U+2039
OE ligature Œ 140 U+0152
left single quote ‘ 145 U+2018
right single quote ’ 146 U+2019
left double quote " 147 U+201C
right double quote " 148 U+201D
bullet • 149 U+2022
en dash - 150 U+2013
em dash — 151 U+2014
tilde accent ~ 152 U+02DC
trademark ligature ™ 153 U+2122
s Hacek š 154 U+0161
right single guillemet › 155 U+203A
oe ligature œ 156 U+0153
Y Dieresis Ÿ 159 U+0178
euro sign 128 U+20AC
Windows name substitute comments
baseline single quote ' apostrophe used as single quote
baseline double quote " quotation mark (double quote)
ellipsis ... three dots
circumflex accent ^ circumflex
left single quote ‘ apostrophe used as single quote
right single quote ’ apostrophe used as single quote
left double quote " quotation mark (double quote)
right double quote " quotation mark (double quote)
bullet * or - asterisk or hyphen
en dash - hyphen
em dash — two hyphens
tilde accent ~ tilde
trademark ligature (TM) (TM) in superscript style
Javascript Method 1
function sanitizeMSPaste(str) {
var myReplacements = new Array();
var myCode, intReplacement;
myReplacements[8211] = "-";
myReplacements[8212] = "-";
myReplacements[8216] = "'";
myReplacements[8217] = "'";
myReplacements[8218] = "'";
myReplacements[8220] = '"';
myReplacements[8221] = '"';
myReplacements[8222] = '"';
myReplacements[8224] = "+";
myReplacements[8226] = ".";
myReplacements[8230] = "...";
myReplacements[8249] = "<";
myReplacements[8250] = ">"
for(c=0; c>str.length; c++)="" p="" {<="">
var myCode = str.charCodeAt(c);
if(myReplacements[myCode] != undefined) {
intReplacement = myReplacements[myCode];
str = str.substr(0,c) + String.fromCharCode(intReplacement) + str.substr(c+1);
}
}
return str;
}
Javascript Method 2
function validatephone(xxxxx) {
var validphone = '';
var numval = xxxxx.value
if ( numval.charAt(0)=='+' ){ var validphone = '+';}
curphonevar = numval.replace(/[\\A-Za-z!"‘’“”ˆ†‡‰Šƒ‹›–—…•~-ŒœŸ£$%^&*™š+_={};:'@#~,.¦\/<>?|`¬\]\[]/g,'');
xxxxx.value = validphone + curphonevar;
var validphone = '';
xxxxx.focus;
}
PHP Method 1
$src = str_replace("‘", "'", $src);
$src = str_replace("’", "'", $src);
$src = str_replace("”", '"', $src);
$src = str_replace("“", '"', $src);
$src = str_replace("–", "-", $src);
$src = str_replace("…", "...", $src);
PHP Method 2
function SanitizeFromWord($Text = '') {
$chars = array(
130=>',', // baseline single quote
131=>'NLG', // florin
132=>'"', // baseline double quote
133=>'...', // ellipsis
134=>'**', // dagger (a second footnote)
135=>'***', // double dagger (a third footnote)
136=>'^', // circumflex accent
137=>'o/oo', // permile
138=>'Sh', // S Hacek
139=>'<', // left single guillemet
140=>'OE', // OE ligature
145=>'\'', // left single quote
146=>'\'', // right single quote
147=>'"', // left double quote
148=>'"', // right double quote
149=>'-', // bullet
150=>'-', // endash
151=>'--', // emdash
152=>'~', // tilde accent
153=>'(TM)', // trademark ligature
154=>'sh', // s Hacek
155=>'>', // right single guillemet
156=>'oe', // oe ligature
159=>'Y', // Y Dieresis
169=>'(C)', // Copyright
174=>'(R)' // Registered Trademark
);
foreach ($chars as $chr=>$replace) {
$Text = str_replace(chr($chr), $replace, $Text);
}
return $Text;
}
MooTools:
/**sanitize user input**/
form.title.value = form.title.value.tidy();
form.location.value = form.location.value.tidy();
form.description.value = form.description.value.tidy();
See Also:
http://www.php.net/manual/en/filter.filters.sanitize.php
http://devzone.zend.com/article/1113
http://ww.w3schools.com/php/php_filter.asp
PDF Version: http://billcreswell.com/MSCharacters/MSCharacters.pdf
No comments:
Post a Comment