﻿{"id":466,"date":"2011-11-18T16:14:30","date_gmt":"2011-11-18T08:14:30","guid":{"rendered":"http:\/\/notes.zerobox.org\/?p=466"},"modified":"2011-11-18T16:16:44","modified_gmt":"2011-11-18T08:16:44","slug":"%e6%bc%ab%e8%b0%88%e7%bd%91%e9%a1%b5%e7%bc%96%e7%a0%81%e8%af%86%e5%88%ab%e6%8a%80%e6%9c%af","status":"publish","type":"post","link":"http:\/\/zerobox.org\/notes\/466.html","title":{"rendered":"\u6f2b\u8c08\u7f51\u9875\u7f16\u7801\u8bc6\u522b\u6280\u672f"},"content":{"rendered":"<p>\u6765\u6e90\uff1a<a href=\"http:\/\/blog.hysia.com\/post\/6\/how-to-detect-webpage-charset\/\">http:\/\/blog.hysia.com\/post\/6\/how-to-detect-webpage-charset\/<\/a><\/p>\n<p>&nbsp;<\/p>\n<p>\u6700\u8fd1\u5904\u7406\u4e86\u5f88\u591a\u6709\u5173\u7f51\u9875\u7f16\u7801\u7684\u95ee\u9898\uff0c\u79ef\u7d2f\u4e86\u4e00\u70b9\u7ecf\u9a8c\uff0c\u5199\u51fa\u6765\u603b\u7ed3\u5206\u4eab\u4e0b\u3002<\/p>\n<p>\u6211\u4eec\u5728\u7528\u7a0b\u5e8f\u5904\u7406\u7f51\u9875\u7f16\u7801\u7684\u65f6\u5019\u603b\u662f\u4f1a\u8981\u8fdb\u884c\u4e00\u4e9b<code>decode<\/code>\uff0c<code>encode<\/code>\u64cd\u4f5c\uff0c\u6bd4\u5982\u628aGBK,UTF-8\uff0cGB2312\u7b49\u7f16\u7801\u4e92\u76f8\u8f6c\u6362\uff0c\u6216\u8005\u7a0b\u5e8f\u5185\u90e8\u7edf\u4e00\u7f16\u7801\u6210unicode\u8fdb\u884c\u5404\u79cd\u64cd\u4f5c\u3002\u800c\u8fd9\u4e2a\u8fc7\u7a0b\u4e2d\u7b2c\u4e00\u6b65\u5c31\u662f\u8981\u8bc6\u522b\u51fa\u76ee\u6807\u7f51\u9875\u4f7f\u7528\u7684\u662f\u4ec0\u4e48\u7f16\u7801\uff0c\u7136\u540e\u624d\u80fd\u8fdb\u884c\u5404\u79cd\u8f6c\u6362\u64cd\u4f5c\u3002\u5173\u952e\u662f\u5982\u4f55\u51c6\u786e\u8bc6\u522b\u51fa\u76ee\u6807\u7f51\u9875\u7684\u7f16\u7801\u5462\uff1f \u8fd9\u91cc\u6211\u8ba4\u4e3a\u6709\u4ee5\u4e0b\u51e0\u79cd\u65b9\u6cd5\uff1a\uff08\u611f\u8c22\u77e5\u9053\u521b\u5b87\u6570\u636e\u4e2d\u5fc3\u540c\u5b66\u63d0\u4f9b\u5404\u79cd\u6837\u4f8b\uff09<br \/>\n1.HTTP HEADER<br \/>\n\u8fd9\u4e2a\u4f8b\u5982\u4e0b\u56fe\u6240\u793a\uff1a<br \/>\n<a href=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/1.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"alignnone size-full wp-image-467\" title=\"1\" src=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/1.jpg\" alt=\"\" width=\"500\" height=\"218\" srcset=\"http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/1.jpg 500w, http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/1-300x130.jpg 300w\" sizes=\"auto, (max-width: 500px) 100vw, 500px\" \/><\/a><\/p>\n<p>&nbsp;<\/p>\n<p>\u901a\u8fc7\u8fd9\u4e2a\u53ef\u4ee5\u4f5c\u4e3a\u8bc6\u522b\u76ee\u6807\u7f51\u9875\u7684\u7f16\u7801\u7279\u5f81\u4e4b\u4e00\u3002\u4f46\u662f,(\u8bf7\u6ce8\u610f\uff0c\u6211\u8fd9\u91cc\u7528\u4e86\u4f46\u662f)\u901a\u5e38\u8fd8\u6709\u5982\u4e0b\u95ee\u9898\uff1a<\/p>\n<ul>\n<li>\u54cd\u5e94\u5934\u4e2d\u7684\u00a0<code>Content-Type<\/code>\u00a0\u6ca1\u6709 charset \u7279\u5f81<\/li>\n<li>WebServer\u00a0<code>Content-Type<\/code>\u00a0\u8fd4\u56de\u7684\u4e00\u4e2a\u4e0d\u89c4\u8303\u7684 charset\uff0c\u6bd4\u5982\u6211\u901a\u8fc7\u68c0\u6d4b\u5927\u91cf\u7f51\u9875\u6355\u83b7\u5230\u7684\u6709\uff1a<code>zh-ch<\/code>\u3001<code>.gb2312<\/code>\u3001<code>.gbk<\/code>\u3001<code>no<\/code>\u3001<code>zh_cn.gbk<\/code>\u3001<code>0ff<\/code>\u3001<code>on<\/code>\u7b49<\/li>\n<\/ul>\n<p>\u8fd9\u4e2a\u65f6\u5019 HTTP HEADER\u4e2d\u7684charset\u7279\u5f81\u5c31\u6ca1\u6709\u4e86\u6216\u8005\u8bf4\u53ea\u80fd\u4f5c\u4e3a\u53c2\u8003\uff08\u540e\u9762\u4f1a\u4ecb\u7ecd\u4e3a\u4ec0\u4e48\u80fd\u4f5c\u4e3a\u53c2\u8003\uff09<\/p>\n<ul>\n<li>2.HTML META CHARSET<\/li>\n<\/ul>\n<p>\u8fd8\u662f\u770b\u56fe\u8bf4\u8bdd\uff1a<\/p>\n<p><a href=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/2.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"alignnone size-full wp-image-468\" title=\"2\" src=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/2.jpg\" alt=\"\" width=\"500\" height=\"226\" srcset=\"http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/2.jpg 500w, http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/2-300x135.jpg 300w\" sizes=\"auto, (max-width: 500px) 100vw, 500px\" \/><\/a><\/p>\n<p>\u8fd9\u4e2a\u5c31\u4e0d\u7528\u4ecb\u7ecd\u4e86\uff0c\u641e\u5f00\u53d1\u7684\u90fd\u77e5\u9053\u3002\u4f46\u662f\u8fd9\u4e2a\u7279\u5f81\u4e5f\u53ea\u662f\u80fd\u4f5c\u4e3a\u53c2\u8003\uff0c\u5343\u4e07\u4e0d\u8981\u4ee5\u4e3a\u7528\u6b63\u5219\u5339\u914d\u51fa\u8fd9\u4e2ameta\u4e2d\u7684<code>charset<\/code>\u5c31\u662f\u76ee\u6807\u7f51\u9875\u7684\u7f16\u7801\u4e86\uff0c\u56e0\u4e3a\uff0c\u6709\u5f88\u591a\u7c97\u5fc3\u7684\u7a0b\u5e8f\u733f\u6572\u5b57\u6709\u70b9\u5feb\uff0c\u7ecf\u5e38\u6572\u9519\u4ee3\u7801\u3002\u8fd9\u91cc\u540c\u6837\u6536\u96c6\u4e86\u6837\u4f8b\uff0c\u6709\u56fe\u6709\u771f\u76f8\uff01<\/p>\n<p><a href=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/3.jpg\"><img loading=\"lazy\" decoding=\"async\" class=\"alignnone size-full wp-image-469\" title=\"3\" src=\"http:\/\/notes.zerobox.org\/wp-content\/uploads\/2011\/11\/3.jpg\" alt=\"\" width=\"500\" height=\"361\" srcset=\"http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/3.jpg 500w, http:\/\/zerobox.org\/notes\/wp-content\/uploads\/2011\/11\/3-300x216.jpg 300w\" sizes=\"auto, (max-width: 500px) 100vw, 500px\" \/><\/a><\/p>\n<p>&nbsp;<\/p>\n<p>\u5982\u679c\u4f60\u4ec5\u4ec5\u901a\u8fc7\u6b63\u5219\u5339\u914d\u51fa\u4e86meta charset \u7136\u540e\u8fdb\u884c decode\uff0cencode \u64cd\u4f5c\uff0c\u6ce8\u5b9a\u7a0b\u5e8f\u8981\u62a5\u9519\uff0c\u8981\u60b2\u5267\u3002\u90a3\u4e48\u5c31\u6ca1\u6709\u5176\u4ed6\u529e\u6cd5\u6765\u8bc6\u522b\u76ee\u6807\u7f51\u9875\u7f16\u7801\u4e86\u4e48\uff1f\u7b54\u6848\u662f\u5fc5\u987b\u6709\u554a<\/p>\n<ul>\n<li>Heuristic Detect \uff08\u542f\u53d1\u5f0f\u63a2\u6d4b\uff09<\/li>\n<\/ul>\n<p>\u6545\u540d\u601d\u8bae\uff0c\u5c31\u662f\u4e3b\u52a8\u63a2\u6d4b\u76ee\u6807\u7f16\u7801\u3002\u6211\u4ee5python\u4e3a\u4f8b\uff0cpython\u6709\u4e2a\u5f88\u597d\u7684\u6a21\u5757\u53eb<code>chardet<\/code>,\u53ef\u4ee5\u4e3b\u52a8\u8bc6\u522b\u76ee\u6807string\u7684\u7f16\u7801\uff0c\u540c\u5b66\u4eec\u53ef\u4ee5\u901a\u8fc7<\/p>\n<pre><code>sudo easy_install chardet <\/code><\/pre>\n<p>\u6216\u8005<\/p>\n<pre><code>sudo pip install chardet <\/code><\/pre>\n<p>\u6765\u5b89\u88c5\u8fd9\u4e2a\u6a21\u5757.\u8fd9\u4e2a\u6a21\u5757\u7684\u5177\u4f53\u4f7f\u7528\u4e0d\u662f\u6211\u4eca\u5929\u4ecb\u7ecd\u7684\u91cd\u70b9\uff0c\u540c\u5b66\u4eec\u81ea\u5df1\u67e5\u624b\u518c\u6216\u770b doc string. \u6709\u8fd9\u4e2a\u6a21\u5757\u4ee5\u540e\u5462\uff0c\u901a\u8fc7\u7efc\u5408\u00a0<code>HTTP HEADER<\/code>,<code>META CHARSET<\/code>\u00a0\u5c31\u80fd\u591f\u51c6\u786e\u8bc6\u522b\u76ee\u6807\u7f51\u9875\u7684\u7f16\u7801\u4e86\u3002 \u5728\u8fd9\u4e2a\u7efc\u5408\u5224\u522b\u7b97\u6cd5\u7684\u8fc7\u7a0b\u4e2d\u8981\u4fee\u6b63\u00a0<code>HTTP HEADER<\/code>,<code>META CHARSET<\/code>\u4e2d\u7684\u9519\u8bef\uff0c\u624b\u8bef\uff0c\u4e0d\u6807\u51c6\u7684\u7f16\u7801\uff0c\u540c\u65f6\u8fd8\u8981\u505a\u4e00\u4e9b\u8f6c\u6362\u7f16\u7801\u522b\u540d\u7684\u64cd\u4f5c\u3002python\u7684\u6807\u51c6\u7f16\u7801\u53c2\u89c1\uff1a<a href=\"http:\/\/docs.python.org\/library\/codecs.html#standard-encodings\">http:\/\/docs.python.org\/library\/codecs.html#standard-encodings<\/a><\/p>\n<p>\u8fd9\u91cc\u6211\u8d342\u6bb5python\u5904\u7406\u8f6c\u6362\u7f16\u7801\u522b\u540d\u4e3a\u6807\u51c6\u7f16\u7801\u7684\u4ee3\u7801\u7247\u6bb5<\/p>\n<pre><code>#http:\/\/docs.python.org\/library\/codecs.html#standard-encodings translate = { 'windows-874': 'iso-8859-11', 'en_us': 'utf8', 'macintosh': 'iso-8859-1',\\ 'euc_tw': 'big5_tw', 'th': 'tis-620','zh-cn': 'gbk','gb_2312-80':'gb2312',\\ 'iso-latin-1':'iso-8859-1','windows-31j':'shift_jis','x-sjis':'shift_jis',\\ 'none': 'null','no':'null','0ff':'null'} <\/code><\/pre>\n<p>\u8fd8\u6709<\/p>\n<pre><code># \u8c03\u6574\u4e3a\u6b63\u786e\u7684\u7f16\u7801\u65b9\u5f0f if encoding.startswith('8859'): encoding = 'iso-%s' % encoding elif encoding.startswith('cp-'): encoding = 'cp%s' % encoding[3:] elif encoding.startswith('euc-'): encoding = 'euc_%s' % encoding[4:] elif encoding.startswith('windows') and not encoding.startswith('windows-'): encoding = 'windows-%s' % encoding[7:] elif encoding.find('iso-88') &gt; 0: encoding = encoding[encoding.find('iso-88'):] elif encoding.startswith('is0-'): encoding = 'iso%s' % encoding[4:] elif encoding.find('ascii') &gt; 0: encoding = 'ascii' <\/code><\/pre>\n<p>\u901a\u8fc7\u7efc\u5408\u4e0a\u9762\u4ecb\u7ecd\u76843\u79cd\u65b9\u6cd5\uff0c\u518d\u901a\u8fc7\u4e00\u4e2a\u7c7b\u4f3c\u4e0e\u975e\u95e8\u7535\u8def\u7684\u4e00\u4e2a\u5224\u522b\u7b97\u6cd5\u57fa\u672c\u4e0a\u5c31\u80fd\u51c6\u786e\u8bc6\u522b\u51fa\u4e00\u4e2a\u76ee\u6807\u7f51\u9875\u7684\u7f16\u7801\u4e86\u3002\u8fd9\u91cc\u6211\u4e3a\u4ec0\u4e48\u8bf4\u662f\u4e00\u4e2a\u7c7b\u4f3c\u4e0e\u975e\u95e8\u7684\u7b97\u6cd5\u5462\uff1f\u56e0\u4e3a\u6211\u78b0\u5230\u51e0\u4f8bBT\u7684\u7f16\u7801\uff1aHEADER\u4e2d\u4e00\u4e2a\u7f16\u7801\uff0cMETA \u4e2d\u53e6\u5916\u4e00\u4e2a\u7f16\u7801\uff0c\u800c\u771f\u6b63\u7684\u7f51\u9875\u7f16\u7801\u662f\u8fd8\u662f\u53e6\u5916\u4e00\u4e2a\uff01<\/p>\n<p>\u76ee\u524d\u8fd9\u4e2a\u7f51\u9875\u7f16\u7801\u8bc6\u522b\u6a21\u5757\u5df2\u7ecf\u5728\u6211\u4eec\u6570\u636e\u4e2d\u5fc3\u5f97\u5230\u4e86\u9a8c\u8bc1\u3002\u800c\u8bc6\u522b\u51fa\u7f51\u9875\u7684\u7f16\u7801\u5c31\u80fd\u7ed9\u540e\u7eed\u7684\u5404\u79cd\u64cd\u4f5c\u51cf\u5c11\u5de5\u4f5c\u91cf\uff0c\u8c01\u7528\u8c01\u77e5\u9053\u3002<\/p>\n<p>\u5728\u5199\u8fd9\u4e2a\u6587\u7ae0\u4e2a\u8fc7\u7a0b\u4e2d\uff0c\u6240\u6709\u6837\u4f8b\u90fd\u662f\u7531<a href=\"http:\/\/www.knownsec.com\/\">\u77e5\u9053\u521b\u5b87<\/a>\u6570\u636e\u4e2d\u5fc3\u63d0\u4f9b\uff0c\u6211\u4eec\u4e13\u6ce8\u4e8e\u4e92\u8054\u7f51\uff0c\u662f\u4e3a\u4e86\u66f4\u597d\u66f4\u5b89\u5168\u7684\u4e92\u8054\u7f51\u3002<\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6765\u6e90\uff1ahttp:\/\/blog.hysia.com\/post\/6\/how-to-d &hellip;<\/p>\n<p class=\"read-more\"><a href=\"http:\/\/zerobox.org\/notes\/466.html\">\u7ee7\u7eed\u9605\u8bfb &raquo;<\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[27],"class_list":["post-466","post","type-post","status-publish","format-standard","hentry","tag-27"],"views":1160,"_links":{"self":[{"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/posts\/466","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/comments?post=466"}],"version-history":[{"count":0,"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/posts\/466\/revisions"}],"wp:attachment":[{"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/media?parent=466"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/categories?post=466"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/zerobox.org\/notes\/wp-json\/wp\/v2\/tags?post=466"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}