这是一种从“有效”png 文件中提取文本信息的简单 Java 方法。国际字符可能无法在控制台上很好地显示。
import java.io.*;
import java.util.zip.InflaterInputStream;
public class PNGExtractText
{
/** PNG signature constant */
public static final long SIGNATURE = 0x89504E470D0A1A0AL;
/** PNG Chunk type constants, 4 Critical chunks */
/** Image header */
private static final int IHDR = 0x49484452; // "IHDR"
/** Image trailer */
private static final int IEND = 0x49454E44; // "IEND"
/** Palette */
/** Textual data */
private static final int tEXt = 0x74455874; // "tEXt"
/** Compressed textual data */
private static final int zTXt = 0x7A545874; // "zTXt"
/** International textual data */
private static final int iTXt = 0x69545874; // "iTXt"
/** Background color */
public static void showText(InputStream is) throws Exception
{
//Local variables for reading chunks
int data_len = 0;
int chunk_type = 0;
byte[] buf=null;
long signature = readLong(is);
if (signature != SIGNATURE)
{
System.out.println("--- NOT A PNG IMAGE ---");
return;
}
/** Read header */
/** We are expecting IHDR */
if ((readInt(is)!=13)||(readInt(is) != IHDR))
{
System.out.println("--- NOT A PNG IMAGE ---");
return;
}
buf = new byte[13+4];//13 plus 4 bytes CRC
is.read(buf,0,17);
while (true)
{
data_len = readInt(is);
chunk_type = readInt(is);
//System.out.println("chunk type: 0x"+Integer.toHexString(chunk_type));
if (chunk_type == IEND)
{
System.out.println("IEND found");
int crc = readInt(is);
break;
}
switch (chunk_type)
{
case zTXt:
{
System.out.println("zTXt chunk:");
buf = new byte[data_len];
is.read(buf);
int keyword_len = 0;
while(buf[keyword_len]!=0) keyword_len++;
System.out.print(new String(buf,0,keyword_len,"UTF-8")+": ");
InflaterInputStream ii = new InflaterInputStream(new ByteArrayInputStream(buf,keyword_len+2, data_len-keyword_len-2));
InputStreamReader ir = new InputStreamReader(ii,"UTF-8");
BufferedReader br = new BufferedReader(ir);
String read = null;
while((read=br.readLine()) != null) {
System.out.println(read);
}
System.out.println("**********************");
is.skip(4);
break;
}
case tEXt:
{
System.out.println("tEXt chunk:");
buf = new byte[data_len];
is.read(buf);
int keyword_len = 0;
while(buf[keyword_len]!=0) keyword_len++;
System.out.print(new String(buf,0,keyword_len,"UTF-8")+": ");
System.out.println(new String(buf,keyword_len+1,data_len-keyword_len-1,"UTF-8"));
System.out.println("**********************");
is.skip(4);
break;
}
case iTXt:
{
// System.setOut(new PrintStream(new File("TextChunk.txt"),"UTF-8"));
/**
* Keyword: 1-79 bytes (character string)
* Null separator: 1 byte
* Compression flag: 1 byte
* Compression method: 1 byte
* Language tag: 0 or more bytes (character string)
* Null separator: 1 byte
* Translated keyword: 0 or more bytes
* Null separator: 1 byte
* Text: 0 or more bytes
*/
System.out.println("iTXt chunk:");
buf = new byte[data_len];
is.read(buf);
int keyword_len = 0;
int trans_keyword_len = 0;
int lang_flg_len = 0;
boolean compr = false;
while(buf[keyword_len]!=0) keyword_len++;
System.out.print(new String(buf,0,keyword_len,"UTF-8"));
if(buf[++keyword_len]==1) compr = true;
keyword_len++;//Skip the compresssion method byte.
while(buf[++keyword_len]!=0) lang_flg_len++;
//////////////////////
System.out.print("(");
if(lang_flg_len>0)
System.out.print(new String(buf,keyword_len-lang_flg_len, lang_flg_len, "UTF-8"));
while(buf[++keyword_len]!=0) trans_keyword_len++;
if(trans_keyword_len>0)
System.out.print(" "+new String(buf,keyword_len-trans_keyword_len, trans_keyword_len, "UTF-8"));
System.out.print("): ");
/////////////////////// End of key.
if(compr) //Compressed text
{
InflaterInputStream ii = new InflaterInputStream(new ByteArrayInputStream(buf,keyword_len+1, data_len-keyword_len-1));
InputStreamReader ir = new InputStreamReader(ii,"UTF-8");
BufferedReader br = new BufferedReader(ir);
String read = null;
while((read=br.readLine()) != null) {
System.out.println(read);
}
}
else //Uncompressed text
{
System.out.println(new String(buf,keyword_len+1,data_len-keyword_len-1,"UTF-8"));
}
System.out.println("**********************");
is.skip(4);
break;
}
default:
{
buf = new byte[data_len+4];
is.read(buf,0, data_len+4);
break;
}
}
}
is.close();
}
private static int readInt(InputStream is) throws Exception
{
byte[] buf = new byte[4];
is.read(buf,0,4);
return (((buf[0]&0xff)<<24)|((buf[1]&0xff)<<16)|
((buf[2]&0xff)<<8)|(buf[3]&0xff));
}
private static long readLong(InputStream is) throws Exception
{
byte[] buf = new byte[8];
is.read(buf,0,8);
return (((buf[0]&0xffL)<<56)|((buf[1]&0xffL)<<48)|
((buf[2]&0xffL)<<40)|((buf[3]&0xffL)<<32)|((buf[4]&0xffL)<<24)|
((buf[5]&0xffL)<<16)|((buf[6]&0xffL)<<8)|(buf[7]&0xffL));
}
public static void main(String args[]) throws Exception
{
FileInputStream fs = new FileInputStream(args[0]);
showText(fs);
}
}
注意::用法:java PNGExtractText image.png
这是我从官方 png 测试套件的测试图像 ctzn0g04.png 中得到的:
D:\tmp>java PNGExtractText ctzn0g04.png
tEXt chunk:
Title: PngSuite
**********************
tEXt chunk:
Author: Willem A.J. van Schaik
(willem@schaik.com)
**********************
zTXt chunk:
Copyright: Copyright Willem van Schaik, Singapore 1995-96
**********************
zTXt chunk:
Description: A compilation of a set of images created to test the
various color-types of the PNG format. Included are
black&white, color, paletted, with alpha channel, with
transparency formats. All bit-depths allowed according
to the spec are present.
**********************
zTXt chunk:
Software: Created on a NeXTstation color using "pnmtopng".
**********************
zTXt chunk:
Disclaimer: Freeware.
**********************
IEND found
编辑:刚刚找到您的链接并尝试使用图像,得到这个:
D:\tmp>java PNGExtractText concept_Sjet_dream6.png
tEXt chunk:
Comment: The comment
**********************
IEND found
上述示例已成为 Java 图像库的一部分,可在https://github.com/dragon66/icafe找到