class CharDet::UTF1632Prober
Public Class Methods
Source
# File lib/rchardet/utf1632prober.rb, line 34 def initialize super() @position = 0 @zeros_at_mod = [0, 0, 0, 0] @nonzeros_at_mod = [0, 0, 0, 0] @state = EDetecting @quad = [0, 0, 0, 0] @invalid_utf16be = false @invalid_utf16le = false @invalid_utf32be = false @invalid_utf32le = false @first_half_surrogate_pair_detected_16be = false @first_half_surrogate_pair_detected_16le = false reset() end
Calls superclass method
CharDet::CharSetProber::new
Public Instance Methods
Source
# File lib/rchardet/utf1632prober.rb, line 82 def feed(aBuf) aBuf.each_byte do |b| mod4 = @position % 4 @quad[mod4] = b if mod4 == 3 validate_utf32_characters(@quad) validate_utf16_characters(@quad[0..2]) validate_utf16_characters(@quad[2..4]) end if b == 0 @zeros_at_mod[mod4] += 1 else @nonzeros_at_mod[mod4] += 1 end @position += 1 end return get_state() end
Source
# File lib/rchardet/utf1632prober.rb, line 65 def get_charset_name if is_likely_utf32be return "UTF-32BE" end if is_likely_utf32le return "UTF-32LE" end if is_likely_utf16be return "UTF-16BE" end if is_likely_utf16le return "UTF-16LE" end # default to something valid return "UTF-16" end
Source
# File lib/rchardet/utf1632prober.rb, line 117 def get_confidence if is_likely_utf16le || is_likely_utf16be || is_likely_utf32le || is_likely_utf32be 0.85 else 0.00 end end
Source
# File lib/rchardet/utf1632prober.rb, line 102 def get_state if [ENotMe, EFoundIt].include? @state # terminal, decided states return @state end if get_confidence > 0.80 @state = EFoundIt elsif @position > 4 * 1024 # if we get to 4kb into the file, and we can't conclude it's UTF, # let's give up @state = ENotMe end return @state end
Source
# File lib/rchardet/utf1632prober.rb, line 50 def reset super() @position = 0 @zeros_at_mod = [0, 0, 0, 0] @nonzeros_at_mod = [0, 0, 0, 0] @state = EDetecting @invalid_utf16be = false @invalid_utf16le = false @invalid_utf32be = false @invalid_utf32le = false @first_half_surrogate_pair_detected_16be = false @first_half_surrogate_pair_detected_16le = false @quad = [0, 0, 0, 0] end
Calls superclass method
CharDet::CharSetProber#reset