Changeset ca85b1c in github


Ignore:
Timestamp:
Jun 3, 2009 6:21:19 AM (4 years ago)
Author:
alecpl <alec@…>
Branches:
master, HEAD, courier-fix, dev-browser-capabilities, pdo, release-0.6, release-0.7, release-0.8
Children:
024130b
Parents:
09839a5
Message:
  • handle iso-8859-1 as windows-1252
  • some fixes for utf8 class
File:
1 edited

Legend:

Unmodified
Added
Removed
  • program/include/main.inc

    ra5897a3 rca85b1c  
    191191  if ($from == $to || empty($str) || empty($from)) 
    192192    return $str; 
    193      
     193 
    194194  // convert charset using iconv module   
    195195  if (function_exists('iconv') && $from != 'UTF-7' && $to != 'UTF-7') { 
    196     $aliases['GB2312'] = 'GB18030'; 
    197     $_iconv = iconv(($aliases[$from] ? $aliases[$from] : $from), ($aliases[$to] ? $aliases[$to] : $to) . "//IGNORE", $str); 
     196    $_iconv = iconv($from, $to . '//IGNORE', $str); 
    198197    if ($_iconv !== false) { 
    199198        return $_iconv; 
     
    245244    } 
    246245    else if ($from != 'UTF-8' && $conv) { 
     246      $from = preg_replace(array('/^WINDOWS-*/', '/^CP-/'), array('CP', 'CP'), $from); 
    247247      $conv->loadCharset($from); 
    248248      $str = $conv->strToUtf8($str); 
     
    265265    } 
    266266    else if ($to != 'UTF-8' && $conv) { 
     267      $to = preg_replace(array('/^WINDOWS-*/', '/^CP-/'), array('CP', 'CP'), $to); 
    267268      $conv->loadCharset($to); 
    268269      return $conv->utf8ToStr($str); 
     
    306307 
    307308  $aliases = array( 
    308     'USASCII'       => 'ISO-8859-1', 
    309     'ANSIX31101983' => 'ISO-8859-1', 
    310     'ANSIX341968'   => 'ISO-8859-1', 
     309    'USASCII'       => 'WINDOWS-1252', 
     310    'ANSIX31101983' => 'WINDOWS-1252', 
     311    'ANSIX341968'   => 'WINDOWS-1252', 
    311312    'UNKNOWN8BIT'   => 'ISO-8859-15', 
    312313    'XUNKNOWN'      => 'ISO-8859-15', 
    313314    'XUSERDEFINED'  => 'ISO-8859-15', 
    314     'ISO88598I'     => 'ISO-8859-8', 
    315315    'KSC56011987'   => 'EUC-KR', 
     316    'GB2312'        => 'GB18030', 
    316317    'UNICODE'       => 'UTF-8', 
    317318    'UTF7IMAP'      => 'UTF7-IMAP' 
     
    326327    return 'UTF-' . $m[1] . $m[2]; 
    327328 
    328   if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) 
    329     return 'ISO-8859-' . ($m[1] ? $m[1] : 1); 
     329  if (preg_match('/ISO8859([0-9]{0,2})/', $str, $m)) { 
     330    $iso = 'ISO-8859-' . ($m[1] ? $m[1] : 1); 
     331    # some clients sends windows-1252 text as latin1, 
     332    # it is safe to use windows-1252 for all latin1 
     333    return $iso == 'ISO-8859-1' ? 'WINDOWS-1252' : $iso; 
     334    } 
    330335 
    331336  return $charset; 
Note: See TracChangeset for help on using the changeset viewer.