use std::env;
use std::fs::File;
use std::io::{self, Read};
use std::path::Path;
#[derive(Debug)]
enum TextType {
TextAnsi,
TextUtf8,
TextUtf8Bom,
TextUtf16Le,
TextUtf16Be,
TextUnknown,
}
// 检查是否为无BOM的UTF8
fn check_utf8_without_bom(file_name: &str) -> bool {
let mut file_in = match File::open(file_name) {
Ok(file) => file,
Err(_) => {
println!("打开文件失败");
return false;
}
};
let mut buffer = Vec::new();
file_in.read_to_end(&mut buffer).expect("无法读取文件内容");
let mut n = 0;
let mut b_all_ascii = true;
for &ch in &buffer {
if (ch & 0x80) != 0 {
b_all_ascii = false;
}
if n == 0 {
if ch >= 0x80 {
if ch >= 0xFC && ch <= 0xFD {
n = 6;
} else if ch >= 0xF8 {
n = 5;
} else if ch >= 0xF0 {
n = 4;
} else if ch >= 0xE0 {
n = 3;
} else if ch >= 0xC0 {
n = 2;
} else {
return false;
}
n -= 1;
}
} else {
if (ch & 0xC0) != 0x80 {
return false;
}
n -= 1;
}
}
if n > 0 {
return false;
}
if b_all_ascii {
return false;
}
true
}
// 检查文本编码
fn check_text_encode(file_name: &str) -> TextType {
let mut file_in = match File::open(file_name) {
Ok(file) => file,
Err(_) => {
println!("打开文件失败");
return TextType::TextUnknown;
}
};
let mut buffer = [0; 2];
file_in.read_exact(&mut buffer).expect("无法读取文件头");
let head = u16::from_be_bytes(buffer);
match head {
0xFFFE => TextType::TextUtf16Le,
0xFEFF => TextType::TextUtf16Be,
0xEFBB => TextType::TextUtf8Bom,
_ => {
if check_utf8_without_bom(file_name) {
TextType::TextUtf8
} else {
TextType::TextAnsi
}
}
}
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() > 1 {
let file_name = &args[1];
println!("file: {}", file_name);
let text_type = check_text_encode(file_name);
let code = match text_type {
TextType::TextAnsi => "ANSI",
TextType::TextUtf8 => "UTF-8",
TextType::TextUtf8Bom => "UTF-8 BOM",
TextType::TextUtf16Le => "Unicode",
TextType::TextUtf16Be => "Unicode big endian",
TextType::TextUnknown => "Unknown",
};
println!("file code: {}", code);
} else {
println!("请将TXT文件拖放到exe程序上.");
}
}
如果文章或资源对您有帮助,欢迎打赏作者。一路走来,感谢有您!
txttool.com 说一段 esp56物联 查询128 IP查询