gbk编码文件转化为utf-8

it2025-10-27  5

数据文件比较大的时候,如果整体copy会处出现内存不足的情况,所以采用流的方式。 使用时候请自觉关闭流,临时工具demo,不再做演示。

private static void codeTransformByFileBuffer() throws IOException { //GBK编码格式源码路径 String infilePath = "E:\\人工智能\\人工智能-语言处理\\news_tensite_xml.full\\gbk\\news_tensite_xml.dat"; //转为UTF-8编码格式源码路径 String outfilePath = "E:\\人工智能\\人工智能-语言处理\\news_tensite_xml.full\\utf8\\corpus.txt"; FileInputStream fileInputStream = new FileInputStream(new File(infilePath)); BufferedReader reader = new BufferedReader(new InputStreamReader(fileInputStream,"gbk")); FileOutputStream fileOutputStream = new FileOutputStream(new File(outfilePath)); BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8)); while (reader.ready()){ String readLine = reader.readLine(); if (StringUtils.startsWith(readLine,"<content>")){ System.out.println(readLine); bufferedWriter.write(readLine); } } System.out.println("传输完毕"); }
最新回复(0)