Changeset 11bcac5 in github
- Timestamp:
- Sep 29, 2010 9:32:41 AM (3 years ago)
- Branches:
- master, HEAD, courier-fix, dev-browser-capabilities, pdo, release-0.6, release-0.7, release-0.8
- Children:
- c609784
- Parents:
- b3660bb
- Files:
-
- 3 edited
-
CHANGELOG (modified) (1 diff)
-
program/lib/html2text.php (modified) (9 diffs)
-
program/steps/utils/html2text.inc (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
CHANGELOG
rb3660bb r11bcac5 5 5 - Add link to identities in compose window (#1486729) 6 6 - Add Internationalized Domain Name (IDNA) support (#1483894) 7 - Add option to automatically send read notifications for known senders (1485883) 8 - Add option to "Return receipt" will be always checked (1486352) 7 - Add option to automatically send read notifications for known senders (#1485883) 8 - Add option to "Return receipt" will be always checked (#1486352) 9 - Fix HTML to plain text conversion doesn't handle citation blocks (#1486921) 9 10 10 11 RELEASE 0.4.1 -
program/lib/html2text.php
r5a74dc8 r11bcac5 202 202 "\n\t* ", // <li> 203 203 "\n-------------------------\n", // <hr> 204 "<div>\n", // <div>204 "<div>\n", // <div> 205 205 "\n\n", // <table> and </table> 206 206 "\n", // <tr> and </tr> … … 446 446 447 447 /** 448 * Workhorse function that does actual conversion (calls _converter() method). 449 * 450 * @access private 451 * @return void 452 */ 453 function _convert() 454 { 455 // Variables used for building the link list 456 $this->_link_count = 0; 457 $this->_link_list = ''; 458 459 $text = trim(stripslashes($this->html)); 460 461 // Convert HTML to TXT 462 $this->_converter($text); 463 464 // Add link list 465 if ( !empty($this->_link_list) ) { 466 $text .= "\n\nLinks:\n------\n" . $this->_link_list; 467 } 468 469 $this->text = $text; 470 471 $this->_converted = true; 472 } 473 474 /** 448 475 * Workhorse function that does actual conversion. 449 476 * … … 453 480 * $width characters. 454 481 * 455 * @access private 456 * @return void 457 */ 458 function _convert() 459 { 460 // Variables used for building the link list 461 $this->_link_count = 0; 462 $this->_link_list = ''; 463 464 $text = trim(stripslashes($this->html)); 482 * @param string Reference to HTML content string 483 * 484 * @access private 485 * @return void 486 */ 487 function _converter(&$text) 488 { 489 // Convert <BLOCKQUOTE> (before PRE!) 490 $this->_convert_blockquotes($text); 465 491 466 492 // Convert <PRE> … … 485 511 $text = preg_replace("/\n\s+\n/", "\n\n", $text); 486 512 $text = preg_replace("/[\n]{3,}/", "\n\n", $text); 487 488 // Add link list489 if ( !empty($this->_link_list) ) {490 $text .= "\n\nLinks:\n------\n" . $this->_link_list;491 }492 513 493 514 // Wrap the text to a readable format … … 497 518 $text = wordwrap($text, $this->width); 498 519 } 499 500 $this->text = $text;501 502 $this->_converted = true;503 520 } 504 521 … … 518 535 function _build_link_list( $link, $display ) 519 536 { 520 if ( !$this->_do_links ) return $display; 521 522 if ( substr($link, 0, 7) == 'http://' || substr($link, 0, 8) == 'https://' || 523 substr($link, 0, 7) == 'mailto:' ) { 537 if ( !$this->_do_links ) 538 return $display; 539 540 if ( substr($link, 0, 7) == 'http://' || substr($link, 0, 8) == 'https://' || 541 substr($link, 0, 7) == 'mailto:' 542 ) { 524 543 $this->_link_count++; 525 $this->_link_list .= "[". $this->_link_count . "] $link\n";544 $this->_link_list .= '[' . $this->_link_count . "] $link\n"; 526 545 $additional = ' [' . $this->_link_count . ']'; 527 } elseif ( substr($link, 0, 11) == 'javascript:' ) {528 // Don't count the link; ignore it529 $additional = '';546 } elseif ( substr($link, 0, 11) == 'javascript:' ) { 547 // Don't count the link; ignore it 548 $additional = ''; 530 549 // what about href="#anchor" ? 531 550 } else { 532 551 $this->_link_count++; 533 $this->_link_list .= "[" . $this->_link_count . "] ". $this->url;552 $this->_link_list .= '[' . $this->_link_count . '] ' . $this->url; 534 553 if ( substr($link, 0, 1) != '/' ) { 535 554 $this->_link_list .= '/'; … … 541 560 return $display . $additional; 542 561 } 543 562 544 563 /** 545 564 * Helper function for PRE body conversion. … … 550 569 function _convert_pre(&$text) 551 570 { 552 while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {571 while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { 553 572 $result = preg_replace($this->pre_search, $this->pre_replace, $matches[1]); 554 573 $text = preg_replace('/<pre[^>]*>.*<\/pre>/ismU', '<div><br>' . $result . '<br></div>', $text, 1); 574 } 575 } 576 577 /** 578 * Helper function for BLOCKQUOTE body conversion. 579 * 580 * @param string HTML content 581 * @access private 582 */ 583 function _convert_blockquotes(&$text) 584 { 585 if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { 586 $level = 0; 587 $diff = 0; 588 foreach ($matches[0] as $m) { 589 if ($m[0][0] == '<' && $m[0][1] == '/') { 590 $level--; 591 if ($level < 0) { 592 $level = 0; // malformed HTML: go to next blockquote 593 } 594 else if ($level > 0) { 595 // skip inner blockquote 596 } 597 else { 598 $end = $m[1]; 599 $len = $end - $taglen - $start; 600 // Get blockquote content 601 $body = substr($text, $start + $taglen - $diff, $len); 602 603 // Set text width 604 $p_width = $this->width; 605 if ($this->width > 0) $this->width -= 2; 606 // Convert blockquote content 607 $body = trim($body); 608 $this->_converter($body); 609 // Add citation markers and create PRE block 610 $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); 611 $body = '<pre>' . htmlspecialchars($body) . '</pre>'; 612 // Re-set text width 613 $this->width = $p_width; 614 // Replace content 615 $text = substr($text, 0, $start - $diff) 616 . $body . substr($text, $end + strlen($m[0]) - $diff); 617 618 $diff = $len + $taglen + strlen($m[0]) - strlen($body); 619 unset($body); 620 } 621 } 622 else { 623 if ($level == 0) { 624 $start = $m[1]; 625 $taglen = strlen($m[0]); 626 } 627 $level ++; 628 } 629 } 555 630 } 556 631 } … … 593 668 } 594 669 } 595 596 ?> -
program/steps/utils/html2text.inc
re019f2d r11bcac5 23 23 24 24 header('Content-Type: text/plain; charset=UTF-8'); 25 print trim($converter->get_text());25 print rtrim($converter->get_text()); 26 26 exit; 27 27
Note: See TracChangeset
for help on using the changeset viewer.
