/*
* Copyright (C) 2007 Steve Ratcliffe
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Author: Steve Ratcliffe
* Create date: Dec 16, 2007
*/
package test.display;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
/**
* Standalone program to display the SRT file. This is used for
* the sorting order for different charsets apparently.
*
* @author Steve Ratcliffe
*/
@
SuppressWarnings("MalformedFormatString") // bug, not actually an error for %c to have int
public class SrtDisplay
extends CommonDisplay
{
private Section description
;
private Section tableHeader
;
private int srt1start
;
private Section characterTable
;
private Section srt5
;
private Section srt7
;
private Section srt8
;
private CharsetDecoder decoder
;
private final List<CharPosition
> expansions =
new ArrayList<>();
private final Set<CharPosition
> charmap =
new TreeSet<>();
private final List<CharPosition
> multi =
new ArrayList<>();
private boolean isUnicode
;
protected void print
() {
readCommonHeader
();
readFileHeader
();
printBody
();
}
/**
* This header is unusual as doesn't follow the normal header conventions for
* defining the sections. It points to a single section that contains a header
* which in turn points to other sections.
*/
private void readFileHeader
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT Header");
d.
charValue("??? %d"); //NUmber of sections?? Seems to be true, I've only one section, value here 0x01
srt1start = d.
intValue("SRT 1 start");
d.
charValue("len %d");
if (getHeaderLen
() ==
37) {
d.
charValue("??? %d");
d.
intValue("SRT 2 start %x");
d.
charValue("len %d");
}
d.
print(outStream
);
}
private void printBody
() {
printSrt1
();
//printSrt2();
printDescription
();
tableHeader
();
printSrt5
();
printCharacterTable
();
printSrt8
();
printSrt7
();
// Show the actual sort order
printCharMap
();
}
/**
* This section has regular section headers like the other app files.
*/
private void printSrt1
() {
Displayer d =
new Displayer
(reader
);
reader.
position(srt1start
);
d.
setTitle("SRT 1 (pointers)");
description = readSection
(d,
"Description",
2,
false,
false);
tableHeader = readSection
(d,
"Table header",
3,
false,
false);
d.
print(outStream
);
}
private void printDescription
() {
Displayer d =
new Displayer
(reader
);
reader.
position(description.
getStart());
d.
setTitle("Description");
String s = d.
zstringValue("Description: %s");
long remain = description.
getLen() - s.
length() -
1;
d.
rawValue((int) remain
);
d.
print(outStream
);
}
/**
* This is a strange section, because it also has a header with multiple sections.
*/
private void tableHeader
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("Character table header");
long start = tableHeader.
getStart();
d.
setSectStart(start
);
reader.
position(start
);
int len = d.
charValue("sub header len %d");
d.
charValue("id1 %d");
d.
charValue("id2 %d");
int codepage = d.
charValue("codepage %d");
String cpname
;
if (codepage ==
65001) {
cpname =
"utf-8";
isUnicode =
true;
} else
cpname =
"cp" + codepage
;
Charset charset =
Charset.
forName(cpname
);
decoder = charset.
newDecoder();
d.
intValue("??? %d");
characterTable = readSection
(d,
"SRT 4 character table",
4,
true,
false);
d.
rawValue(6,
"padding?");
srt5 = readSection
(d,
"SRT 5 expansions",
5,
true,
false);
d.
rawValue(6,
"padding?");
if (len
> 0x2c
) {
//d.intValue("SRT 6?");
//d.intValue("");
//d.intValue("");
//d.intValue("");
readSection
(d,
"SRT 6 ??",
6,
true,
false);
d.
charValue("??");
d.
intValue("max code block (in srt7) %d");
}
if (len
> 0x3c
) {
srt7 = readSection
(d,
"SRT 7 srt8 ptrs",
7,
true,
false);
d.
charValue("");
d.
intValue("");
}
if (len
> 0x44
) {
srt8 = readSection
(d,
"SRT 8 multi-byte chars",
8,
true,
false);
}
d.
rawValue((int) (len -
(reader.
position() - start
)),
"remainder");
d.
print(outStream
);
}
/**
* The main character table. You look up the character position in this table and
* it gives you the primary, secondary and tertiary sorting weights. There are
* also a couple of flags to mark numbers and letters. Some entries refer to the
* expansion table - these are letters or symbols that sort as two or more characters.
*/
private void printCharacterTable
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 4 Character table");
int rs = characterTable.
getRecordSize();
long start = tableHeader.
getStart() + characterTable.
getStart();
d.
setSectStart(tableHeader.
getStart());
reader.
position(start
);
for (int i =
1; i
<= characterTable.
getNumberOfRecords(); i++
) {
CharPosition c = printCharPosition
(d, rs, i
);
charmap.
add(c
);
d.
print(outStream
);
d.
setTitle(null);
}
}
/**
* Now that we have all the characters read, print them out showing the sorting.
*
* @param d The display.
*/
private void printCharMap
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("------- Summary of ordering --------");
Formatter chars =
new Formatter();
Formatter comment =
new Formatter();
CharPosition last =
new CharPosition
(0);
last.
first = -
1;
for (CharPosition cp : charmap
) {
if (cp.
expands)
continue;
if (cp.
first != last.
first) {
chars.
format(" # %s\n[%d] < ", comment, cp.
first);
comment =
new Formatter();
} else if (cp.
second != last.
second) {
chars.
format(" ; ");
comment.
format(" ; ");
} else if (cp.
third != last.
third) {
chars.
format(",");
comment.
format(",");
}
last = cp
;
chars.
format("%c", toUnicode
(cp.
val));
comment.
format("U+%04x", cp.
val);
}
d.
item().
addText(chars.
toString());
d.
print(outStream
);
}
private CharPosition printCharPosition
(Displayer d,
int reclen,
int charValue
) {
DisplayItem item = d.
item();
int flags = reader.
get() & 0xff
;
item.
setBytes((byte) flags
);
CharPosition c = readCharPosition
(item, reclen-
1, charValue
);
StringBuilder sb =
new StringBuilder();
Formatter fmt =
new Formatter(sb
);
fmt.
format("0x%02x ", charValue
);
fmt.
format("(%c) ", toUnicode
(charValue
));
if ((flags
& 0x1
) !=
0)
sb.
append("Letter ");
if ((flags
& 0x2
) !=
0)
sb.
append("Number ");
if ((flags
& 0x30
) ==
0) {
sb.
append(c
);
} else {
// This is an expansion, it sorts as two or more characters (eg ß sorts near ss).
// The pos is an index into srt5.
c.
expands =
true;
expansion
(sb, c.
first,
(flags
>> 4) & 0x3
);
}
item.
addText(sb.
toString());
return c
;
}
/**
* Read the sort position information. The format varies depending on the posLength parameter.
*
* @param item The display item - any bytes read are added to this.
* @param posLength The length of the position information (not the record length, just the
* part of it that encodes the positions).
* @param charValue The character that this applies to.
* @return A {@link CharPosition} structure containing the sort position weights.
*/
private CharPosition readCharPosition
(DisplayItem item,
int posLength,
int charValue
) {
CharPosition c =
new CharPosition
(charValue
);
c.
val = charValue
;
if (posLength ==
2) {
int rec = reader.
getChar();
item.
setBytes((char) rec
);
c.
first = rec
& 0xff
;
c.
second =
(rec
>> 8) & 0xf
;
c.
third =
(rec
>> 12) & 0xf
;
} else if (posLength ==
4) {
int rec = reader.
getInt();
item.
setBytes(rec
);
c.
first = rec
& 0xffff
;
c.
second =
(rec
>> 16) & 0xff
;
c.
third =
(rec
>> 24) & 0xff
;
}
return c
;
}
/**
* Some characters sort as if they were two separate characters (eg ß sorts like 'ss').
* @param sb Sort order descriptions are added to this buffer.
* @param pos Index into the expansions area.
* @param n The number of characters in the expansion.
*/
private void expansion
(StringBuilder sb,
int pos,
int n
) {
sb.
append("Expansion: ");
for (int i =
0; i
<= n
; i++
) {
if (pos + i ==
0) {
sb.
append(String.
format("error: pos=%d n=%d, readpos=%x", pos, n,
reader.
position()));
return;
}
CharPosition ch = expansions.
get(pos + i -
1);
sb.
append(ch
);
if (i
!= n
) {
sb.
append(" & ");
}
}
}
private int toUnicode
(int c
) {
if (isUnicode
)
return c
;
ByteBuffer b =
ByteBuffer.
allocate(1);
b.
put((byte) c
);
b.
flip();
try {
CharBuffer chars = decoder.
decode(b
);
return chars.
charAt(0);
} catch (CharacterCodingException e
) {
return '?';
}
}
/**
* Expansion table. Some characters sort as though they were two (or more) characters.
* This table is a list of sort positions that are referred to from the main character
* table. As such there is no particular pattern to the entries in the table.
*/
private void printSrt5
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 5 (expansions)");
int reclen = srt5.
getRecordSize();
reader.
position(tableHeader.
getStart() + srt5.
getStart());
for (int i =
0; i
< srt5.
getNumberOfRecords(); i++
) {
DisplayItem item = d.
item();
CharPosition ch = readCharPosition
(item, reclen,
0);
item.
addText(ch.
toString());
expansions.
add(ch
);
}
d.
print(outStream
);
}
/**
* This is used for multi-byte character sets.
*
* It is a list of pointers into srt8.
*
* Some slots are filled with 0xffffffff so you
* probably look up the high bytes in this table to get the block where you
* look up the low byte or something similar.
*/
private void printSrt7
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 7 (pointers to srt8 indexed by top part of char)");
reader.
position(tableHeader.
getStart() + srt7.
getStart());
int block =
1;
for (int i =
0; i
< srt7.
getNumberOfRecords(); i++
) {
DisplayItem item = d.
intItem();
int val = item.
getValue();
item.
addText("%4x: Pointer to srt8 %x", block, val
);
if (val
!= 0xffffffff
)
addChars
(block
<< 8, val / srt8.
getRecordSize());
d.
print(outStream
);
d.
setTitle(null);
block++
;
}
}
/**
* Add the characters from a part of srt8 to the character map.
*
* Retreive all the character position information from a block in srt8 and update
* the character value. The character value is formed with the index from srt7
* as the high part of the value.
*
* @param block The index into srt7 that we came from.
* @param n The index into srt8 where this block of characters starts.
*/
private void addChars
(int block,
int n
) {
for (int i =
0; i
< 256; i++
) {
CharPosition cp = multi.
get(n + i
);
// Set the character value and add it to the map.
cp.
val = block + i
;
charmap.
add(cp
);
}
}
private void printSrt8
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 8 (character table for multibyte characters)");
int reclen = srt8.
getRecordSize();
reader.
position(tableHeader.
getStart() + srt8.
getStart());
d.
setSectStart(reader.
position());
for (int i =
0; i
< srt8.
getNumberOfRecords(); i++
) {
CharPosition cp = printCharPosition
(d, reclen, i
);
multi.
add(cp
);
d.
print(outStream
);
d.
setTitle(null);
}
}
public static void main
(String[] args
) {
if (args.
length < 1) {
System.
err.
println("Usage: srtdisplay <filename>");
System.
exit(1);
}
CommonDisplay td =
new SrtDisplay
();
td.
display(args
[0],
"SRT");
}
private class CharPosition
implements Comparable {
private int val
;
private int first
;
private int second
;
private int third
;
private boolean expands
;
public CharPosition
(int charValue
) {
this.
val = charValue
;
}
public int compareTo
(Object o
) {
CharPosition c2 =
(CharPosition
) o
;
if (c2.
first == first
)
return compareSecond
(c2
);
else if (first
< c2.
first)
return -
1;
else
return 1;
}
private int compareSecond
(CharPosition c2
) {
if (c2.
second == second
)
return compareThird
(c2
);
else if (second
< c2.
second)
return -
1;
else
return 1;
}
private int compareThird
(CharPosition c2
) {
if (third == c2.
third)
return 0;
else if (third
< c2.
third)
return -
1;
else
return 1;
}
public String toString
() {
return "prim=" + first +
",sec=" + second +
",tert=" + third
;
}
}
}