/*
* Copyright (C) 2007 Steve Ratcliffe
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Author: Steve Ratcliffe
* Create date: Dec 16, 2007
*/
package test.display;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.TreeSet;
import uk.me.parabola.imgfmt.app.srt.Sort;
/**
* Standalone program to display the SRT file. This is used for
* the sorting order for different charsets apparently.
*
* @author Steve Ratcliffe
*/
@
SuppressWarnings("MalformedFormatString") // bug, not actually an error for %c to have int
public class SrtDisplay
extends CommonDisplay
{
private Section description
;
private Section tableHeader
;
private int srt1start
;
private Section characterTable
;
private Section srt5
;
private Section srt7
;
private Section srt8
;
private CharsetDecoder decoder
;
private final List<CharPosition
> expansions =
new ArrayList<>();
private final NavigableSet
<CharPosition
> charmap =
new TreeSet<>();
private boolean isUnicode
;
private final Map<Integer,
Integer> offsetToBlock =
new HashMap<>();
private String srtDescription
;
private int codepage
;
private int id1
;
private int id2
;
protected void print
() {
readCommonHeader
();
readFileHeader
();
printBody
();
}
/**
* This header is unusual as doesn't follow the normal header conventions for
* defining the sections. It points to a single section that contains a header
* which in turn points to other sections.
*/
private void readFileHeader
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT Header");
d.
charValue("??? %d"); //NUmber of sections?? Seems to be true, I've only one section, value here 0x01
srt1start = d.
intValue("SRT 1 start");
d.
charValue("len %d");
if (getHeaderLen
() ==
37) {
d.
charValue("??? %d");
d.
intValue("SRT 2 start %x");
d.
charValue("len %d");
}
d.
print(outStream
);
}
private void printBody
() {
printSrt1
();
//printSrt2();
printDescription
();
tableHeader
();
printSrt5
();
printCharacterTable
();
printSrt7
();
printSrt8
();
// Show the actual sort order
printCharMap
();
}
/**
* This section has regular section headers like the other app files.
*/
private void printSrt1
() {
Displayer d =
new Displayer
(reader
);
reader.
position(srt1start
);
d.
setTitle("SRT 1 (pointers)");
description = readSection
(d,
"Description",
2,
false,
false);
tableHeader = readSection
(d,
"Table header",
3,
false,
false);
d.
print(outStream
);
}
private void printDescription
() {
Displayer d =
new Displayer
(reader
);
reader.
position(description.
getStart());
d.
setTitle("Description");
srtDescription = d.
zstringValue("Description: %s");
long remain = description.
getLen() - srtDescription.
length() -
1;
d.
rawValue((int) remain
);
d.
print(outStream
);
}
/**
* This is a strange section, because it also has a header with multiple sections.
*/
private void tableHeader
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("Character table header");
long start = tableHeader.
getStart();
d.
setSectStart(start
);
reader.
position(start
);
int len = d.
charValue("sub header len %d");
id1 = d.
charValue("id1 %d");
id2 = d.
charValue("id2 %d");
codepage = d.
charValue("codepage %d");
if (codepage ==
65001)
isUnicode =
true;
Charset charset = Sort.
charsetFromCodepage(codepage
);
decoder = charset.
newDecoder();
d.
intValue("??? %d");
characterTable = readSection
(d,
"SRT 4 character table",
4,
true,
false);
d.
rawValue(6,
"padding?");
srt5 = readSection
(d,
"SRT 5 expansions",
5,
true,
false);
d.
rawValue(6,
"padding?");
if (len
> 0x2c
) {
readSection
(d,
"SRT 6 ??",
6,
false,
false);
}
// For multi byte character encodings.
if (len
> 0x34
) {
d.
intValue("??");
d.
intValue("max code block (in srt7) %d");
srt7 = readSection
(d,
"SRT 7 srt8 ptrs",
7,
true,
false);
d.
charValue("");
d.
intValue("");
}
if (len
> 0x44
) {
srt8 = readSection
(d,
"SRT 8 multi-byte chars",
8,
true,
false);
}
d.
rawValue((int) (len -
(reader.
position() - start
)),
"remainder");
d.
print(outStream
);
}
/**
* The main character table. You look up the character position in this table and
* it gives you the primary, secondary and tertiary sorting weights. There are
* also a couple of flags to mark numbers and letters. Some entries refer to the
* expansion table - these are letters or symbols that sort as two or more characters.
*/
private void printCharacterTable
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 4 Character table");
int rs = characterTable.
getRecordSize();
long start = tableHeader.
getStart() + characterTable.
getStart();
d.
setSectStart(tableHeader.
getStart());
reader.
position(start
);
for (int i =
1; i
<= characterTable.
getNumberOfRecords(); i++
) {
CharPosition c = printCharPosition
(d, rs, i
);
charmap.
add(c
);
d.
print(outStream
);
d.
setTitle(null);
}
}
/**
* Now that we have all the characters read, print them out showing the sorting.
*/
private void printCharMap
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("------- Summary of ordering --------");
Formatter chars =
new Formatter();
//Formatter comment = new Formatter();
// reproduce header like mkgmap resource/sort/cp*.txt entries
chars.
format("\n\n\n");
chars.
format("# Compare this with resource/sort/cp%d.txt.\n\n", codepage
);
chars.
format("codepage %d\n", codepage
);
chars.
format("id1 %d\n", id1
);
chars.
format("id2 %d\n", id2
);
chars.
format("description \"%s\"\n\n", srtDescription
);
chars.
format("characters\n\n");
CharPosition last =
new CharPosition
(0);
//last.first = -1;
last.
first =
0; // start first line with zero/ignore sortOrder
for (CharPosition cp : charmap
) {
if (cp.
expands > 0)
continue;
int unicodeChar = toUnicode
(cp.
val);
if (unicodeChar
< 0) // no character defined for this position
continue;
if (cp.
first != last.
first) {
//chars.format(" # %s\n[%d] < ", comment, cp.first);
chars.
format("\n < ");
//comment = new Formatter();
} else if (cp.
second != last.
second) {
chars.
format(" ; ");
//comment.format(" ; ");
} else if (cp.
third != last.
third) {
chars.
format(",");
//comment.format(",");
} else {
chars.
format("=");
//comment.format("=");
}
last = cp
;
chars.
format("%s", fmtChar
(unicodeChar
));
//comment.format("U+%04x", cp.val);
}
chars.
format("\n");
for (CharPosition cp : charmap
) {
if (cp.
expands > 0) {
chars.
format("expand %s to ", fmtChar
(toUnicode
(cp.
val)));
for (int i =
0; i
<= cp.
expands; ++i
) {
CharPosition ch = expansions.
get(cp.
first + i -
1);
// need to search for best char with this first/primary. Doesn't actually matter
// apart from the cosmetics of the sort/cp*.txt expand list because the secondary
// and tertiary binary sortOrders are chosen to avoid matching existing real chars.
// see mkgmap/srt/SrtTextReader.java for more info
if (ch.
second > 7)
ch.
second -=
7;
ch.
third = ch.
third >=
5 ? 2 :
1;
int charValue = -
1;
for (CharPosition scanCp : charmap
) {
if (scanCp.
expands > 0)
continue;
if (scanCp.
first == ch.
first) {
if (scanCp.
second == ch.
second &&
scanCp.
third == ch.
third) {
charValue = scanCp.
val;
break;
} else if (charValue
< 0) {
charValue = scanCp.
val;
}
}
}
if (charValue
>=
0)
charValue = toUnicode
(charValue
);
if (charValue
>=
0)
chars.
format(" %c", charValue
);
}
chars.
format("\n");
}
}
chars.
format("\n# ends\n", codepage
);
d.
item().
addText(chars.
toString());
d.
print(outStream
);
}
private String fmtChar
(int val
) {
boolean asChar =
true;
switch (val
) {
case '<':
case ';':
case ',':
case '=':
case '#':
asChar =
false;
break;
default:
switch (Character.
getType(val
)) {
case Character.
UNASSIGNED:
case Character.
NON_SPACING_MARK:
case Character.
FORMAT:
case Character.
CONTROL:
case Character.
SPACE_SEPARATOR:
case Character.
LINE_SEPARATOR:
case Character.
PARAGRAPH_SEPARATOR:
asChar =
false;
}
}
if (asChar
) {
//noinspection MalformedFormatString
return String.
format("%c", val
);
} else {
return String.
format("%04x", val
);
}
}
private CharPosition printCharPosition
(Displayer d,
int reclen,
int charValue
) {
DisplayItem item = d.
item();
int flags = reader.
get1u();
item.
setBytes1(flags
);
CharPosition c = readCharPosition
(item, reclen-
1, charValue
);
StringBuilder sb =
new StringBuilder();
Formatter fmt =
new Formatter(sb
);
fmt.
format("0x%02x ", charValue
);
int unicodeChar = toUnicode
(charValue
);
if (unicodeChar
< 0) // no character defined for this position
fmt.
format("NaC ");
else
fmt.
format("(%c) ", unicodeChar
);
if ((flags
& 0x1
) !=
0)
sb.
append("Letter ");
if ((flags
& 0x2
) !=
0)
sb.
append("Number ");
if ((flags
& 0xf0
) ==
0) {
sb.
append(c
);
} else {
// This is an expansion, it sorts as two or more characters (eg ß sorts near ss).
// The pos is an index into srt5.
c.
expands =
(flags
>> 4) & 0xf
;
expansion
(sb, c.
first, c.
expands);
}
item.
addText(sb.
toString());
return c
;
}
/**
* Read the sort position information. The format varies depending on the posLength parameter.
*
* @param item The display item - any bytes read are added to this.
* @param posLength The length of the position information (not the record length, just the
* part of it that encodes the positions).
* @param charValue The character that this applies to.
* @return A {@link CharPosition} structure containing the sort position weights.
*/
private CharPosition readCharPosition
(DisplayItem item,
int posLength,
int charValue
) {
CharPosition c =
new CharPosition
(charValue
);
if (posLength ==
2) {
int rec = reader.
get2u();
item.
setBytes2(rec
);
c.
first = rec
& 0xff
;
c.
second =
(rec
>> 8) & 0xf
;
c.
third =
(rec
>> 12) & 0xf
;
} else if (posLength ==
3) {
int rec = reader.
get3u();
item.
setBytes3(rec
);
c.
first = rec
& 0xff
;
c.
second =
(rec
>> 8) & 0xff
;
c.
third =
(rec
>> 16) & 0xff
;
} else if (posLength ==
4) {
int rec = reader.
get4();
item.
setBytes4(rec
);
c.
first = rec
& 0xffff
;
c.
second =
(rec
>> 16) & 0xff
;
c.
third =
(rec
>> 24) & 0xff
;
}
return c
;
}
/**
* Some characters sort as if they were two separate characters (eg ß sorts like 'ss').
* @param sb Sort order descriptions are added to this buffer.
* @param pos Index into the expansions area.
* @param n The number of characters in the expansion.
*/
private void expansion
(StringBuilder sb,
int pos,
int n
) {
sb.
append("Expansion: ");
for (int i =
0; i
<= n
; i++
) {
if (pos + i ==
0) {
sb.
append(String.
format("error: pos=%d n=%d, readpos=%x", pos, n,
reader.
position()));
return;
}
CharPosition ch = expansions.
get(pos + i -
1);
sb.
append(ch
);
if (i
!= n
) {
sb.
append(" & ");
}
}
}
private int toUnicode
(int c
) {
if (isUnicode
)
return c
;
ByteBuffer b =
ByteBuffer.
allocate(1);
b.
put((byte) c
);
b.
flip();
try {
CharBuffer chars = decoder.
decode(b
);
return chars.
charAt(0);
} catch (CharacterCodingException e
) {
return -
1;
}
}
/**
* Expansion table. Some characters sort as though they were two (or more) characters.
* This table is a list of sort positions that are referred to from the main character
* table. As such there is no particular pattern to the entries in the table.
*/
private void printSrt5
() {
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 5 (expansions)");
int reclen = srt5.
getRecordSize();
reader.
position(tableHeader.
getStart() + srt5.
getStart());
for (int i =
0; i
< srt5.
getNumberOfRecords(); i++
) {
DisplayItem item = d.
item();
CharPosition ch = readCharPosition
(item, reclen,
0);
item.
addText(ch.
toString());
expansions.
add(ch
);
}
d.
print(outStream
);
}
/**
* This is used for multi-byte character sets.
*
* It is a list of pointers into srt8.
*
* Some slots are filled with 0xffffffff so you
* probably look up the high bytes in this table to get the block where you
* look up the low byte or something similar.
*/
private void printSrt7
() {
if (srt7 ==
null)
return;
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 7 (pointers to srt8 indexed by top part of char)");
reader.
position(tableHeader.
getStart() + srt7.
getStart());
int block =
1;
for (int i =
0; i
< srt7.
getNumberOfRecords(); i++
) {
DisplayItem item = d.
intItem();
int val = item.
getValue();
item.
addText("%4x: Pointer to srt8 %x", block, val
);
if (val
>=
0)
offsetToBlock.
put(val/srt8.
getRecordSize(), block
);
d.
print(outStream
);
d.
setTitle(null);
block++
;
}
}
private void printSrt8
() {
if (srt8 ==
null)
return;
Displayer d =
new Displayer
(reader
);
d.
setTitle("SRT 8 (character table for multibyte characters)");
int reclen = srt8.
getRecordSize();
reader.
position(tableHeader.
getStart() + srt8.
getStart());
d.
setSectStart(reader.
position());
int block =
1;
for (int i =
0; i
< srt8.
getNumberOfRecords(); i++
) {
Integer nblock = offsetToBlock.
get(i
);
if (nblock
!=
null)
block = nblock
;
CharPosition cp = printCharPosition
(d, reclen, block
*256 +
(i
% 256));
charmap.
add(cp
);
d.
print(outStream
);
d.
setTitle(null);
}
}
public static void main
(String[] args
) {
if (args.
length < 1) {
System.
err.
println("Usage: srtdisplay <filename>");
System.
exit(1);
}
CommonDisplay td =
new SrtDisplay
();
td.
display(args
[0],
"SRT");
}
private class CharPosition
implements Comparable<CharPosition
> {
private final int val
;
private int first
;
private int second
;
private int third
;
private int expands
;
public CharPosition
(int charValue
) {
this.
val = charValue
;
}
public int compareTo
(CharPosition c2
) {
if (c2.
first == first
)
return compareSecond
(c2
);
else if (first
< c2.
first)
return -
1;
else
return 1;
}
private int compareSecond
(CharPosition c2
) {
if (c2.
second == second
)
return compareThird
(c2
);
else if (second
< c2.
second)
return -
1;
else
return 1;
}
private int compareThird
(CharPosition c2
) {
if (third == c2.
third)
return Integer.
compare(val, c2.
val);
else if (third
< c2.
third)
return -
1;
else
return 1;
}
public String toString
() {
return "prim=" + first +
",sec=" + second +
",tert=" + third
;
}
}
}