1787 * @return string cleaned HTML |
1787 * @return string cleaned HTML |
1788 */ |
1788 */ |
1789 |
1789 |
1790 function sanitize_html($html, $filter_php = true) |
1790 function sanitize_html($html, $filter_php = true) |
1791 { |
1791 { |
|
1792 // Random seed for substitution |
|
1793 $rand_seed = md5( sha1(microtime()) . mt_rand() ); |
|
1794 |
|
1795 // Strip out comments that are already escaped |
|
1796 preg_match_all('/<!--(.*?)-->/', $html, $comment_match); |
|
1797 $i = 0; |
|
1798 foreach ( $comment_match[0] as $comment ) |
|
1799 { |
|
1800 $html = str_replace_once($comment, "{HTMLCOMMENT:$i:$rand_seed}", $html); |
|
1801 $i++; |
|
1802 } |
|
1803 |
|
1804 // Strip out code sections that will be postprocessed by Text_Wiki |
|
1805 preg_match_all(';^<code(\s[^>]*)?>((?:(?R)|.)*?)\n</code>(\s|$);msi', $html, $code_match); |
|
1806 $i = 0; |
|
1807 foreach ( $code_match[0] as $code ) |
|
1808 { |
|
1809 $html = str_replace_once($code, "{TW_CODE:$i:$rand_seed}", $html); |
|
1810 $i++; |
|
1811 } |
1792 |
1812 |
1793 $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>(.*?)</\\1>#is', '<\\1\\2\\3javascript:\\59>\\60</\\1>', $html); |
1813 $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>(.*?)</\\1>#is', '<\\1\\2\\3javascript:\\59>\\60</\\1>', $html); |
1794 $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>#is', '<\\1\\2\\3javascript:\\59>', $html); |
1814 $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>#is', '<\\1\\2\\3javascript:\\59>', $html); |
1795 |
1815 |
1796 if($filter_php) |
1816 if($filter_php) |
1898 // The rule is so specific because everything else will have been filtered by now |
1918 // The rule is so specific because everything else will have been filtered by now |
1899 $html = preg_replace('/<(script|iframe)(.+?)src=([^>]*)</i', '<\\1\\2src=\\3<', $html); |
1919 $html = preg_replace('/<(script|iframe)(.+?)src=([^>]*)</i', '<\\1\\2src=\\3<', $html); |
1900 |
1920 |
1901 // Unstrip comments |
1921 // Unstrip comments |
1902 $html = preg_replace('/<!--([^>]*?)-->/i', '', $html); |
1922 $html = preg_replace('/<!--([^>]*?)-->/i', '', $html); |
|
1923 |
|
1924 // Restore stripped comments |
|
1925 $i = 0; |
|
1926 foreach ( $comment_match[0] as $comment ) |
|
1927 { |
|
1928 $html = str_replace_once("{HTMLCOMMENT:$i:$rand_seed}", $comment, $html); |
|
1929 $i++; |
|
1930 } |
|
1931 |
|
1932 // Restore stripped code |
|
1933 $i = 0; |
|
1934 foreach ( $code_match[0] as $code ) |
|
1935 { |
|
1936 $html = str_replace_once("{TW_CODE:$i:$rand_seed}", $code, $html); |
|
1937 $i++; |
|
1938 } |
1903 |
1939 |
1904 return $html; |
1940 return $html; |
1905 |
1941 |
1906 } |
1942 } |
1907 |
1943 |