Changeset 1603 in subversion
- Timestamp:
- Jul 22, 2008 4:01:42 AM (5 years ago)
- Location:
- trunk/roundcubemail/program
- Files:
-
- 4 edited
-
include/rcube_message.php (modified) (1 diff)
-
lib/washtml.php (modified) (9 diffs)
-
steps/mail/func.inc (modified) (5 diffs)
-
steps/mail/get.inc (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/roundcubemail/program/include/rcube_message.php
r1534 r1603 58 58 59 59 $this->set_safe((intval($_GET['_safe']) || $_SESSION['safe_messages'][$uid])); 60 $this->set_safe(0);61 60 62 61 $this->opt = array( -
trunk/roundcubemail/program/lib/washtml.php
r1544 r1603 34 34 * SYNOPSIS: 35 35 * 36 * washtml::wash($html, $config, $full); 36 * $washer = new washtml($config); 37 * $washer->wash($html); 37 38 * It return a sanityzed string of the $html parameter without html and head tags. 38 39 * $html is a string containing the html code to wash. … … 43 44 * $config['cid_map'] is an array where cid urls index urls to replace them. 44 45 * $config['charset'] is a string containing the charset of the HTML document if it is not defined in it. 45 * $ full is a reference to a boolean that is set to true if no remote images are removed. (FE: show remote images link)46 * $washer->extlinks is a reference to a boolean that is set to true if remote images were removed. (FE: show remote images link) 46 47 * 47 48 * INTERNALS: 48 49 * 49 * Only tags and attributes in the globals $html_elements and $html_attributes50 * Only tags and attributes in the static lists $html_elements and $html_attributes 50 51 * are kept, inline styles are also filtered: all style identifiers matching 51 52 * /[a-z\-]/i are allowed. Values matching colors, sizes, /[a-z\-]/i and safe … … 73 74 class washtml 74 75 { 75 76 /* Allowed HTML elements (default) */ 77 static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'img'); 78 79 /* Ignore these HTML tags but process their content */ 80 static $ignore_elements = array('html', 'body'); 81 82 /* Allowed HTML attributes */ 83 static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir', 'background'); 84 85 /* State for linked objects in HTML */ 86 public $extlinks = false; 87 88 /* Current settings */ 89 private $config = array(); 90 91 /* Registered callback functions for tags */ 92 private $handlers = array(); 93 76 94 /* Allowed HTML elements */ 77 static $html_elements = array('a', 'abbr', 'acronym', 'address', 'area', 'b', 'basefont', 'bdo', 'big', 'blockquote', 'body', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'ins', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'p', 'pre', 'q', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'img'); 95 private $_html_elements = array(); 96 97 /* Ignore these HTML tags but process their content */ 98 private $_ignore_elements = array(); 78 99 79 100 /* Allowed HTML attributes */ 80 static $html_attribs = array('name', 'class', 'title', 'alt', 'width', 'height', 'align', 'nowrap', 'col', 'row', 'id', 'rowspan', 'colspan', 'cellspacing', 'cellpadding', 'valign', 'bgcolor', 'color', 'border', 'bordercolorlight', 'bordercolordark', 'face', 'marginwidth', 'marginheight', 'axis', 'border', 'abbr', 'char', 'charoff', 'clear', 'compact', 'coords', 'vspace', 'hspace', 'cellborder', 'size', 'lang', 'dir', 'background'); 81 101 private $_html_attribs = array(); 102 103 104 /* Constructor */ 105 public function __construct($p = array()) { 106 $this->_html_elements = array_flip((array)$p['html_elements']) + array_flip(self::$html_elements) ; 107 $this->_html_attribs = array_flip((array)$p['html_attribs']) + array_flip(self::$html_attribs); 108 $this->_ignore_elements = array_flip((array)$p['ignore_elements']) + array_flip(self::$ignore_elements); 109 unset($p['html_elements'], $p['html_attribs'], $p['ignore_elements']); 110 $this->config = $p + array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array()); 111 } 112 113 /* Register a callback function for a certain tag */ 114 public function add_callback($tagName, $callback) 115 { 116 $this->handlers[$tagName] = $callback; 117 } 118 82 119 /* Check CSS style */ 83 static function wash_style($style, $config, &$full) {120 private function wash_style($style) { 84 121 $s = ''; 85 122 … … 97 134 if($match[2]) { 98 135 if(preg_match('/^(http|https|ftp):.*$/i', $match[2], $url)) { 99 if($ config['allow_remote'])136 if($this->config['allow_remote']) 100 137 $value .= ' url(\''.htmlspecialchars($url[0], ENT_QUOTES).'\')'; 101 138 else 102 $ full = false;139 $this->extlinks = true; 103 140 } else if(preg_match('/^cid:(.*)$/i', $match[2], $cid)) 104 $value .= ' url(\''.htmlspecialchars($ config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')';141 $value .= ' url(\''.htmlspecialchars($this->config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '\')'; 105 142 } else if($match[0] != 'url' && $match[0] != 'rbg')//whitelist ? 106 143 $value .= ' ' . $match[0]; … … 115 152 116 153 /* Take a node and return allowed attributes and check values */ 117 static function wash_attribs($node, $config, &$full) {154 private function wash_attribs($node) { 118 155 $t = ''; 119 156 $washed; … … 122 159 $key = strtolower($key); 123 160 $value = $node->getAttribute($key); 124 if( (in_array($key, self::$html_attribs)) ||161 if(isset($this->_html_attribs[$key]) || 125 162 ($key == 'href' && preg_match('/^(http|https|ftp|mailto):.*/i', $value))) 126 163 $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"'; 127 else if($key == 'style' && ($style = self::wash_style($value, $config, $full)))164 else if($key == 'style' && ($style = $this->wash_style($value))) 128 165 $t .= ' style="' . $style . '"'; 129 166 else if($key == 'src' && strtolower($node->tagName) == 'img') { //check tagName anyway 130 167 if(preg_match('/^(http|https|ftp):.*/i', $value)) { 131 if($ config['allow_remote'])168 if($this->config['allow_remote']) 132 169 $t .= ' ' . $key . '="' . htmlspecialchars($value, ENT_QUOTES) . '"'; 133 170 else { 134 $ full = false;135 if ($ config['blocked_src'])136 $t .= ' src="' . htmlspecialchars($ config['blocked_src'], ENT_QUOTES) . '"';171 $this->extlinks = true; 172 if ($this->config['blocked_src']) 173 $t .= ' src="' . htmlspecialchars($this->config['blocked_src'], ENT_QUOTES) . '"'; 137 174 } 138 175 } else if(preg_match('/^cid:(.*)$/i', $value, $cid)) 139 $t .= ' ' . $key . '="' . htmlspecialchars($ config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"';176 $t .= ' ' . $key . '="' . htmlspecialchars($this->config['cid_map']['cid:'.$cid[1]], ENT_QUOTES) . '"'; 140 177 } else 141 178 $washed .= ($washed?' ':'') . $key; 142 179 } 143 return $t . ($washed && $ config['show_washed']?' x-washed="'.$washed.'"':'');180 return $t . ($washed && $this->config['show_washed']?' x-washed="'.$washed.'"':''); 144 181 } 145 182 … … 147 184 * It output only allowed tags with allowed attributes 148 185 * and allowed inline styles */ 149 static function dumpHtml($node, $config, &$full) {186 private function dumpHtml($node) { 150 187 if(!$node->hasChildNodes()) 151 188 return ''; … … 158 195 case XML_ELEMENT_NODE: //Check element 159 196 $tagName = strtolower($node->tagName); 160 if(in_array($tagName, self::$html_elements)) { 161 $content = self::dumpHtml($node, $config, $full); 162 $dump .= '<' . $tagName . self::wash_attribs($node, $config, $full) . 197 if($callback = $this->handlers[$tagName]) { 198 $dump .= call_user_func($callback, $tagName, $this->wash_attribs($node), $this->dumpHtml($node)); 199 } else if(isset($this->_html_elements[$tagName])) { 200 $content = $this->dumpHtml($node); 201 $dump .= '<' . $tagName . $this->wash_attribs($node) . 163 202 ($content?">$content</$tagName>":' />'); 164 } else if($tagName == 'html' || $tagName == 'body') { 165 $dump .= self::dumpHtml($node, $config, $full); //Just ignored 203 } else if(isset($this->_ignore_elements[$tagName])) { 204 $dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' ignored -->'; 205 $dump .= $this->dumpHtml($node); //Just ignored 166 206 } else 167 207 $dump .= '<!-- ' . htmlspecialchars($tagName, ENT_QUOTES) . ' not allowed -->'; 168 208 break; 209 case XML_CDATA_SECTION_NODE: 210 $dump .= $node->nodeValue; 211 break; 169 212 case XML_TEXT_NODE: 170 213 $dump .= htmlspecialchars($node->nodeValue); 171 214 break; 172 215 case XML_HTML_DOCUMENT_NODE: 173 $dump .= self::dumpHtml($node, $config, $full); 174 break; 175 case XML_DOCUMENT_TYPE_NODE: break; 216 $dump .= $this->dumpHtml($node); 217 break; 218 case XML_DOCUMENT_TYPE_NODE: 219 break; 176 220 default: 221 $dump . '<!-- node type ' . $node->nodeType . ' -->'; 177 222 } 178 223 } while($node = $node->nextSibling); … … 183 228 /* Main function, give it untrusted HTML, tell it if you allow loading 184 229 * remote images and give it a map to convert "cid:" urls. */ 185 static function wash($html, $config=array(), &$full=true) { 186 $config += array('show_washed'=>true, 'allow_remote'=>false, 'cid_map'=>array()); 230 public function wash($html) { 187 231 //Charset seems to be ignored (probably if defined in the HTML document) 188 $node = new DOMDocument('1.0', $ config['charset']);189 $ full = true;232 $node = new DOMDocument('1.0', $this->config['charset']); 233 $this->extlinks = false; 190 234 @$node->loadHTML($html); 191 return self::dumpHtml($node, $config, $full);235 return $this->dumpHtml($node); 192 236 } 193 237 -
trunk/roundcubemail/program/steps/mail/func.inc
r1601 r1603 540 540 * @return string Formatted HTML string 541 541 */ 542 function rcmail_print_body($part, $ safe=false, $plain=false)542 function rcmail_print_body($part, $p = array()) 543 543 { 544 544 global $REMOTE_OBJECTS; 545 545 546 $p += array('safe' => false, 'plain' => false, 'inline_html' => true); 547 546 548 // convert html to text/plain 547 if ($part->ctype_secondary == 'html' && $p lain) {549 if ($part->ctype_secondary == 'html' && $p['plain']) { 548 550 $txt = new html2text($part->body, false, true); 549 551 $body = $txt->get_text(); … … 554 556 // charset was converted to UTF-8 in rcube_imap::get_message_part() -> change charset specification in HTML accordingly 555 557 $html = $part->body; 556 if (preg_match('/(\s+content=[\'"]\w+\/\w+;\s+charset)=([a-z0-9-]+)/i', $html))557 $html = preg_replace('/(\s+content=[\'"]\w+\/\w+;\s +charset)=([a-z0-9-]+)/i', '\\1='.RCMAIL_CHARSET, $html);558 if (preg_match('/(\s+content=[\'"]\w+\/\w+;\s*charset)=([a-z0-9-]+)/i', $html)) 559 $html = preg_replace('/(\s+content=[\'"]\w+\/\w+;\s*charset)=([a-z0-9-]+)/i', '\\1='.RCMAIL_CHARSET, $html); 558 560 else { 559 561 // add <head> for malformed messages, washtml cannot work without that 560 if (!preg_match('/<head>(.*)<\ /head>/m', $html))562 if (!preg_match('/<head>(.*)<\\/head>/Uims', $html)) 561 563 $html = '<head></head>' . $html; 562 564 $html = substr_replace($html, '<meta http-equiv="Content-Type" content="text/html; charset='.RCMAIL_CHARSET.'" />', intval(stripos($html, '</head>')), 0); 563 565 } 564 566 565 567 // clean HTML with washhtml by Frederic Motte 566 $ body = washtml::wash($html,array(568 $wash_opts = array( 567 569 'show_washed' => false, 568 'allow_remote' => $ safe,570 'allow_remote' => $p['safe'], 569 571 'blocked_src' => "./program/blocked.gif", 570 572 'charset' => RCMAIL_CHARSET, 571 573 'cid_map' => $part->replaces, 572 ), $full_inline); 573 574 $REMOTE_OBJECTS = !$full_inline; 574 'html_elements' => array('body'), 575 ); 576 577 if (!$p['inline_html']) { 578 $wash_opts['html_elements'] = array('html','head','title','body'); 579 } 580 581 /* CSS styles need to be sanitized! 582 if ($p['safe']) { 583 $wash_opts['html_elements'][] = 'style'; 584 $wash_opts['html_attribs'] = array('type'); 585 } 586 */ 587 588 $washer = new washtml($wash_opts); 589 $washer->add_callback('form', 'rcmail_washtml_callback'); 590 $body = $washer->wash($html); 591 $REMOTE_OBJECTS = $washer->extlinks; 575 592 576 593 return $body; … … 638 655 639 656 return "<div class=\"pre\">".$body."\n</div>"; 640 } 641 642 657 } 643 658 644 659 /** … … 646 661 */ 647 662 function rcmail_str_replacement($str, &$rep) 648 {663 { 649 664 static $count = 0; 650 665 $rep[$count] = stripslashes($str); 651 666 return "##string_replacement{".($count++)."}##"; 652 } 653 667 } 668 669 670 /** 671 * Callback function for washtml cleaning class 672 */ 673 function rcmail_washtml_callback($tagname, $attrib, $content) 674 { 675 switch ($tagname) { 676 case 'form': 677 $out = html::div('form', $content); 678 break; 679 680 default: 681 $out = ''; 682 } 683 684 return $out; 685 } 654 686 655 687 … … 757 789 $part->body = $MESSAGE->get_part_content($part->mime_id); 758 790 759 $body = rcmail_print_body($part, $safe_mode, !$CONFIG['prefer_html']);791 $body = rcmail_print_body($part, array('safe' => $safe_mode, 'plain' => !$CONFIG['prefer_html'])); 760 792 761 793 if ($part->ctype_secondary == 'html') -
trunk/roundcubemail/program/steps/mail/get.inc
r1400 r1603 66 66 header("Content-Type: application/octet-stream"); 67 67 } 68 else if ($ctype_primary == 'text') 69 header("Content-Type: text/$ctype_secondary; charset=" . RCMAIL_CHARSET); 68 70 else 69 71 header("Content-Type: $mimetype"); … … 96 98 97 99 $OUTPUT = new rcube_html_page(); 98 $OUTPUT->write(rcmail_print_body($part, $MESSAGE->is_safe));100 $OUTPUT->write(rcmail_print_body($part, array('safe' => $MESSAGE->is_safe, 'inline_html' => false))); 99 101 } 100 102 else {
Note: See TracChangeset
for help on using the changeset viewer.
