2019-12-24

判斷ANSI 或 UTF-8

#932 shift_jis    936 gb2312    950 big5
$ansi_codePage = 950

$a1 = new-object System.Text.EncoderExceptionFallback
$a2 = new-object System.Text.DecoderExceptionFallback
$ansiEnc = [System.Text.Encoding]::GetEncoding($ansi_codePage, $a1, $a2)
$utf8Enc = new-object System.Text.UTF8Encoding  0,1
#====================
foreach($file in dir *.txt){
    $fileEnc = 'unknown'
    if ($file.length -lt 3){"$file is too small"; continue }

    $bytes = [IO.File]::ReadAllBytes($file)

    if ('255 254' -eq $bytes[0..1]){
        $fileEnc = 'UTF-16 LE'

    }elseif('254 255' -eq $bytes[0..1]){
        $fileEnc = 'UTF-16 BE'
    
    }elseif('239 187 191' -eq $bytes[0..2]){
        $fileEnc = 'UTF-8 with BOM'
    
    }else{#========== ansi or utf-8 ================
        $isUTF8 = $isANSI = $true
        try {$ansiEnc.GetString($bytes) > $null} catch {$isANSI = $false}
        try {$utf8Enc.GetString($bytes) > $null} catch {$isUTF8 = $false}

        if ($isUTF8 -and !$isANSI){$fileEnc = 'UTF-8'}
        if (!$isUTF8 -and $isANSI){$fileEnc = 'ANSI'}
        if ($isUTF8 -and $isANSI){$fileEnc =  'ANSI and UTF-8'}
    }

    "$file is $fileEnc"
}

沒有留言:

張貼留言