PHP string序列化与反序列化语法解析不一致带来的安全隐患

日期: 2012年11月21日 | 标签:PHP string | 阅读:1,030
PHP string序列化与反序列化语法解析不一致带来的安全隐患

author: ryat#www.wolvez.org
team:http://www.80vul.com
date:2012-11-19

   不久前 IPB 爆出了一个 unserialize() 漏洞［1］，漏洞本身没有什么特别的，但 IPB 官方发布的 patch 却很有意思［2］，很快 Stefan Esser 在其 twitter 上给出了 bypass 的方法［3］。随后 IPB 官方针对此 bypass 发布了新的 patch，但 Stefan Esser 表示新 patch 通过改变处理方式最终化解了此漏洞，但其提供的过滤函数 safeUnserialize() 依旧是存在安全问题的。虽然 Stefan Esser 没有透漏问题具体所在，但笔者通过查看相关 PHP 源码，发现 PHP 在对 string 进行序列化与反序列化处理过程中存在语法解析不一致的问题，这有可能会导致很严重的安全问题，同时也可以很容易的 bypass safeUnserialize() 函数的过滤。

i. PHP string serialize() 相关源码分析
------------------------------------

{% highlight c %}

	static inline void php_var_serialize_string(smart_str *buf, char *str, int len) /* {{{ */
	{
		smart_str_appendl(buf, "s:", 2);
		smart_str_append_long(buf, len);
		smart_str_appendl(buf, ":\"", 2);
		smart_str_appendl(buf, str, len);
		smart_str_appendl(buf, "\";", 2);
	}

{% endhighlight %}

通过上面的代码片段可以看到 serialize() 对 string 序列化处理方式如下：

{% highlight php %}

	$str = 'ryatsyne';
	var_dump(serialize($str));
	// $str serialized string output
	// s:8:"ryatsyne";

{% endhighlight %}

ii. PHP string unserialize() 相关源码分析
---------------------------------------

unserialize() 函数对 string 的反序列化则分为两种，一种是对 `s:` 格式的序列化 string 进行处理：

{% highlight c %}

	switch (yych) {
    	...
		case 's':	goto yy9;
		...
	yy9:
		yych = *(YYMARKER = ++YYCURSOR);
		if (yych == ':') goto yy46;
		goto yy3;
		...
	yy46:
		yych = *++YYCURSOR;
		if (yych == '+') goto yy47;
		if (yych <= '/') goto yy18;
		if (yych <= '9') goto yy48;
		goto yy18;
	yy47:
		yych = *++YYCURSOR;
		if (yych <= '/') goto yy18;
		if (yych >= ':') goto yy18;
	yy48:
		++YYCURSOR;
		if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
		yych = *YYCURSOR;
		if (yych <= '/') goto yy18;
		if (yych <= '9') goto yy48;
		if (yych >= ';') goto yy18;
		yych = *++YYCURSOR;
		if (yych != '"') goto yy18;
		++YYCURSOR;
		{
		size_t len, maxlen;
		char *str;

		len = parse_uiv(start + 2);
		maxlen = max - YYCURSOR;
		if (maxlen < len) {
			*p = start + 2;
			return 0;
		}

		str = (char*)YYCURSOR;

		YYCURSOR += len;

		if (*(YYCURSOR) != '"') {
			*p = YYCURSOR;
			return 0;
		}
	// 确保格式为 s:x:"x"

		YYCURSOR += 2;
		*p = YYCURSOR;
	// 注意这里，*p 指针直接后移了两位，也就是说没有判断 " 后面是否为 ;

		INIT_PZVAL(*rval);
		ZVAL_STRINGL(*rval, str, len, 1);
		return 1;

{% endhighlight %}

另一种是对 S: 格式的序列 string 进行处理（此格式在 serialize() 函数序列化处理中并没有定义）：

{% highlight c %}

	static char *unserialize_str(const unsigned char **p, size_t *len, size_t maxlen)
	{
		size_t i, j;
		char *str = safe_emalloc(*len, 1, 1);
		unsigned char *end = *(unsigned char **)p+maxlen;

		if (end < *p) {
			efree(str);
			return NULL;
		}

		for (i = 0; i < *len; i++) {
			if (*p >= end) {
				efree(str);
				return NULL;
			}
			if (**p != '\\') {
				str[i] = (char)**p;
			} else {
				unsigned char ch = 0;

				for (j = 0; j < 2; j++) {
					(*p)++;
					if (**p >= '0' && **p <= '9') {
						ch = (ch << 4) + (**p -'0');
					} else if (**p >= 'a' && **p <= 'f') {
						ch = (ch << 4) + (**p -'a'+10);
					} else if (**p >= 'A' && **p <= 'F') {
						ch = (ch << 4) + (**p -'A'+10);
					} else {
						efree(str);
						return NULL;
					}
				}
				str[i] = (char)ch;
			}
			(*p)++;
		}
		str[i] = 0;
		*len = i;
		return str;
	}
	// 上面的函数是对 e 这样十六进制形式字符串进行转换
	...
	switch (yych) {
		...
		case 'S':	goto yy10;
		// 处理过程与 s: 相同				
		if ((str = unserialize_str(&YYCURSOR, &len, maxlen)) == NULL) {
			return 0;
		}
		// 处理过程与 s: 相同

{% endhighlight %}

从上面的代码片段可以看到 unserialize() 对序列化后的 string 反序列化处理如下：

{% highlight php %}

	$str1 = 's:8:"ryatsyne";';
	$str2 = 's:8:"ryatsyne"t';
	$str3 = 'S:8:"e"';
	var_dump(unserialize($str));
	// $str1, $str2 and $str3 unserialized string output
	// ryatsyne;

{% endhighlight %}

iii. 语法解析处理不一致导致的安全隐患
-----------------------------

从上述分析过程可以看到 PHP 在反序列化 string 时没有严格按照序列化格式 s:x:"x"; 进行处理，没有对 " 后面的是否存在 ; 进行判断，同时增加了对十六进制形式字符串的处理，这样前后处理的不一致让人很费解，同时由于 PHP 手册中对此没有详细的说明，大部分程序员对此处理过程并不了解，这可能导致其在编码过程中出现疏漏，甚至导致严重的安全问题。

回到文章开头提到的 IPB 漏洞上，利用这个 funny feature of PHP 可以很容易的 bypass safeUnserialize() 函数的过滤：）

{% highlight php %}

    * mixed safe_unserialize(string $serialized)
    * Safely unserialize, that is only unserialize string, numbers and arrays, not objects
    *
    * @license Public Domain
    * @author dcz (at) phpbb-seo (dot) com
    */
    static public function safeUnserialize( $serialized )
    {
        // unserialize will return false for object declared with small cap o
        // as well as if there is any ws between O and :
        if ( is_string( $serialized ) && strpos( $serialized, "PHP string序列化与反序列化语法解析不一致带来的安全隐患

author: ryat#www.wolvez.org
team:http://www.80vul.com
date:2012-11-19

   不久前 IPB 爆出了一个 unserialize() 漏洞［1］，漏洞本身没有什么特别的，但 IPB 官方发布的 patch 却很有意思［2］，很快 Stefan Esser 在其 twitter 上给出了 bypass 的方法［3］。随后 IPB 官方针对此 bypass 发布了新的 patch，但 Stefan Esser 表示新 patch 通过改变处理方式最终化解了此漏洞，但其提供的过滤函数 safeUnserialize() 依旧是存在安全问题的。虽然 Stefan Esser 没有透漏问题具体所在，但笔者通过查看相关 PHP 源码，发现 PHP 在对 string 进行序列化与反序列化处理过程中存在语法解析不一致的问题，这有可能会导致很严重的安全问题，同时也可以很容易的 bypass safeUnserialize() 函数的过滤。

i. PHP string serialize() 相关源码分析
------------------------------------

{% highlight c %}

	static inline void php_var_serialize_string(smart_str *buf, char *str, int len) /* {{{ */
	{
		smart_str_appendl(buf, "s:", 2);
		smart_str_append_long(buf, len);
		smart_str_appendl(buf, ":\"", 2);
		smart_str_appendl(buf, str, len);
		smart_str_appendl(buf, "\";", 2);
	}

{% endhighlight %}

通过上面的代码片段可以看到 serialize() 对 string 序列化处理方式如下：

{% highlight php %}

	$str = 'ryatsyne';
	var_dump(serialize($str));
	// $str serialized string output
	// s:8:"ryatsyne";

{% endhighlight %}

ii. PHP string unserialize() 相关源码分析
---------------------------------------

unserialize() 函数对 string 的反序列化则分为两种，一种是对 `s:` 格式的序列化 string 进行处理：

{% highlight c %}

	switch (yych) {
    	...
		case 's':	goto yy9;
		...
	yy9:
		yych = *(YYMARKER = ++YYCURSOR);
		if (yych == ':') goto yy46;
		goto yy3;
		...
	yy46:
		yych = *++YYCURSOR;
		if (yych == '+') goto yy47;
		if (yych <= '/') goto yy18;
		if (yych <= '9') goto yy48;
		goto yy18;
	yy47:
		yych = *++YYCURSOR;
		if (yych <= '/') goto yy18;
		if (yych >= ':') goto yy18;
	yy48:
		++YYCURSOR;
		if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
		yych = *YYCURSOR;
		if (yych <= '/') goto yy18;
		if (yych <= '9') goto yy48;
		if (yych >= ';') goto yy18;
		yych = *++YYCURSOR;
		if (yych != '"') goto yy18;
		++YYCURSOR;
		{
		size_t len, maxlen;
		char *str;

		len = parse_uiv(start + 2);
		maxlen = max - YYCURSOR;
		if (maxlen < len) {
			*p = start + 2;
			return 0;
		}

		str = (char*)YYCURSOR;

		YYCURSOR += len;

		if (*(YYCURSOR) != '"') {
			*p = YYCURSOR;
			return 0;
		}
	// 确保格式为 s:x:"x"

		YYCURSOR += 2;
		*p = YYCURSOR;
	// 注意这里，*p 指针直接后移了两位，也就是说没有判断 " 后面是否为 ;

		INIT_PZVAL(*rval);
		ZVAL_STRINGL(*rval, str, len, 1);
		return 1;

{% endhighlight %}

另一种是对 S: 格式的序列 string 进行处理（此格式在 serialize() 函数序列化处理中并没有定义）：

{% highlight c %}

	static char *unserialize_str(const unsigned char **p, size_t *len, size_t maxlen)
	{
		size_t i, j;
		char *str = safe_emalloc(*len, 1, 1);
		unsigned char *end = *(unsigned char **)p+maxlen;

		if (end < *p) {
			efree(str);
			return NULL;
		}

		for (i = 0; i < *len; i++) {
			if (*p >= end) {
				efree(str);
				return NULL;
			}
			if (**p != '\\') {
				str[i] = (char)**p;
			} else {
				unsigned char ch = 0;

				for (j = 0; j < 2; j++) {
					(*p)++;
					if (**p >= '0' && **p <= '9') {
						ch = (ch << 4) + (**p -'0');
					} else if (**p >= 'a' && **p <= 'f') {
						ch = (ch << 4) + (**p -'a'+10);
					} else if (**p >= 'A' && **p <= 'F') {
						ch = (ch << 4) + (**p -'A'+10);
					} else {
						efree(str);
						return NULL;
					}
				}
				str[i] = (char)ch;
			}
			(*p)++;
		}
		str[i] = 0;
		*len = i;
		return str;
	}
	// 上面的函数是对 \72\79\61\74\73\79\6e\65 这样十六进制形式字符串进行转换
	...
	switch (yych) {
		...
		case 'S':	goto yy10;
		// 处理过程与 s: 相同				
		if ((str = unserialize_str(&YYCURSOR, &len, maxlen)) == NULL) {
			return 0;
		}
		// 处理过程与 s: 相同

{% endhighlight %}

从上面的代码片段可以看到 unserialize() 对序列化后的 string 反序列化处理如下：

{% highlight php %}

	$str1 = 's:8:"ryatsyne";';
	$str2 = 's:8:"ryatsyne"t';
	$str3 = 'S:8:"\72\79\61\74\73\79\6e\65"';
	var_dump(unserialize($str));
	// $str1, $str2 and $str3 unserialized string output
	// ryatsyne;

{% endhighlight %}

iii. 语法解析处理不一致导致的安全隐患
-----------------------------

从上述分析过程可以看到 PHP 在反序列化 string 时没有严格按照序列化格式 s:x:"x"; 进行处理，没有对 " 后面的是否存在 ; 进行判断，同时增加了对十六进制形式字符串的处理，这样前后处理的不一致让人很费解，同时由于 PHP 手册中对此没有详细的说明，大部分程序员对此处理过程并不了解，这可能导致其在编码过程中出现疏漏，甚至导致严重的安全问题。

回到文章开头提到的 IPB 漏洞上，利用这个 funny feature of PHP 可以很容易的 bypass safeUnserialize() 函数的过滤：）

{% highlight php %}

    * mixed safe_unserialize(string $serialized)
    * Safely unserialize, that is only unserialize string, numbers and arrays, not objects
    *
    * @license Public Domain
    * @author dcz (at) phpbb-seo (dot) com
    */
    static public function safeUnserialize( $serialized )
    {
        // unserialize will return false for object declared with small cap o
        // as well as if there is any ws between O and :
        if ( is_string( $serialized ) && strpos( $serialized, "\0" ) === false )
        {
            if ( strpos( $serialized, 'O:' ) === false )
            {
                // the easy case, nothing to worry about
                // let unserialize do the job
                return @unserialize( $serialized );
            }
            else if ( ! preg_match('/(^|;|{|})O:[+\-0-9]+:"/', $serialized ) )
            {
                // in case we did have a string with O: in it,
                // but it was not a true serialized object
                return @unserialize( $serialized );
            }
        }

        return false;
    }

    // a:1:{s:8:"ryatsyne"tO:8:"ryatsyne":0:{}}
    // 只要构造类似的序列化字符串就可以轻易突破这里的过滤了

{% endhighlight %}

iiii. 参考
----------

* ［1］[http://seclists.org/bugtraq/2012/Nov/17](http://seclists.org/bugtraq/2012/Nov/17)
* ［2］[http://adminextra.com/threads/ip-board-3-1-x-3-2-x-and-3-3-x-hacked.6125/](http://adminextra.com/threads/ip-board-3-1-x-3-2-x-and-3-3-x-hacked.6125/)
* ［3］[http://twitter.com/i0n1c](http://twitter.com/i0n1c)" ) === false )
        {
            if ( strpos( $serialized, 'O:' ) === false )
            {
                // the easy case, nothing to worry about
                // let unserialize do the job
                return @unserialize( $serialized );
            }
            else if ( ! preg_match('/(^|;|{|})O:[+\-0-9]+:"/', $serialized ) )
            {
                // in case we did have a string with O: in it,
                // but it was not a true serialized object
                return @unserialize( $serialized );
            }
        }

        return false;
    }

    // a:1:{s:8:"ryatsyne"tO:8:"ryatsyne":0:{}}
    // 只要构造类似的序列化字符串就可以轻易突破这里的过滤了

{% endhighlight %}

iiii. 参考
----------

* ［1］[http://seclists.org/bugtraq/2012/Nov/17](http://seclists.org/bugtraq/2012/Nov/17)
* ［2］[http://adminextra.com/threads/ip-board-3-1-x-3-2-x-and-3-3-x-hacked.6125/](http://adminextra.com/threads/ip-board-3-1-x-3-2-x-and-3-3-x-hacked.6125/)
* ［3］[http://twitter.com/i0n1c](http://twitter.com/i0n1c)
← zabbix中文配置指南
理想的公司 →
ZeroBox Notes

PHP string序列化与反序列化语法解析不一致带来的安全隐患

评论关闭。