WebSVN - display - Rev 378 - /u/steve/main/src/test/display/SrtDisplay.java

/*
* Copyright (C) 2007 Steve Ratcliffe
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Author: Steve Ratcliffe
* Create date: Dec 16, 2007
*/
package test.display;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

/**
* Standalone program to display the SRT file. This is used for
* the sorting order for different charsets apparently.
*
* @author Steve Ratcliffe
*/
@SuppressWarnings("MalformedFormatString") // bug, not actually an error for %c to have int
public class SrtDisplay extends CommonDisplay {
private Section description;
private Section tableHeader;

private int srt1start;

private Section characterTable;
private Section srt5;
private Section srt7;
private Section srt8;
private CharsetDecoder decoder;

private final List<CharPosition> expansions = new ArrayList<>();
private final Set<CharPosition> charmap = new TreeSet<>();
private final List<CharPosition> multi = new ArrayList<>();

private boolean isUnicode;

protected void print() {
readCommonHeader();
readFileHeader();
printBody();
}

/**
* This header is unusual as doesn't follow the normal header conventions for
* defining the sections. It points to a single section that contains a header
* which in turn points to other sections.
*/
private void readFileHeader() {
Displayer d = new Displayer(reader);
d.setTitle("SRT Header");

d.charValue("??? %d"); //NUmber of sections?? Seems to be true, I've only one section, value here 0x01
srt1start = d.intValue("SRT 1 start");
d.charValue("len %d");

if (getHeaderLen() == 37) {
d.charValue("??? %d");
d.intValue("SRT 2 start %x");
d.charValue("len %d");
}
d.print(outStream);
}

private void printBody() {
printSrt1();
//printSrt2();
printDescription();
tableHeader();

printSrt5();
printCharacterTable();
printSrt8();
printSrt7();

// Show the actual sort order
printCharMap();
}

/**
* This section has regular section headers like the other app files.
*/
private void printSrt1() {
Displayer d = new Displayer(reader);
reader.position(srt1start);

d.setTitle("SRT 1 (pointers)");
description = readSection(d, "Description", 2, false, false);
tableHeader = readSection(d, "Table header", 3, false, false);

d.print(outStream);
}

private void printDescription() {
Displayer d = new Displayer(reader);
reader.position(description.getStart());

d.setTitle("Description");

String s = d.zstringValue("Description: %s");

long remain = description.getLen() - s.length() - 1;
d.rawValue((int) remain);

d.print(outStream);
}

/**
* This is a strange section, because it also has a header with multiple sections.
*/
private void tableHeader() {
Displayer d = new Displayer(reader);
d.setTitle("Character table header");

long start = tableHeader.getStart();
d.setSectStart(start);
reader.position(start);
int len = d.charValue("sub header len %d");
d.charValue("id1 %d");
d.charValue("id2 %d");

int codepage = d.charValue("codepage %d");
String cpname;
if (codepage == 65001) {
cpname = "utf-8";
isUnicode = true;
} else
cpname = "cp" + codepage;
Charset charset = Charset.forName(cpname);
decoder = charset.newDecoder();
d.intValue("??? %d");

characterTable = readSection(d, "SRT 4 character table", 4, true, false);
d.rawValue(6, "padding?");

srt5 = readSection(d, "SRT 5 expansions", 5, true, false);
d.rawValue(6, "padding?");

if (len > 0x2c) {
//d.intValue("SRT 6?");
//d.intValue("");
//d.intValue("");
//d.intValue("");
readSection(d, "SRT 6 ??", 6, true, false);
d.charValue("??");
d.intValue("max code block (in srt7) %d");
}
if (len > 0x3c) {
srt7 = readSection(d, "SRT 7 srt8 ptrs", 7, true, false);
d.charValue("");
d.intValue("");
}
if (len > 0x44) {
srt8 = readSection(d, "SRT 8 multi-byte chars", 8, true, false);
}
d.rawValue((int) (len - (reader.position() - start)), "remainder");
d.print(outStream);
}

/**
* The main character table. You look up the character position in this table and
* it gives you the primary, secondary and tertiary sorting weights. There are
* also a couple of flags to mark numbers and letters. Some entries refer to the
* expansion table - these are letters or symbols that sort as two or more characters.
*/
private void printCharacterTable() {
Displayer d = new Displayer(reader);
d.setTitle("SRT 4 Character table");

int rs = characterTable.getRecordSize();
long start = tableHeader.getStart() + characterTable.getStart();
d.setSectStart(tableHeader.getStart());
reader.position(start);

for (int i = 1; i <= characterTable.getNumberOfRecords(); i++) {
CharPosition c = printCharPosition(d, rs, i);

charmap.add(c);

d.print(outStream);
d.setTitle(null);
}
}

/**
* Now that we have all the characters read, print them out showing the sorting.
*
* @param d The display.
*/
private void printCharMap() {
Displayer d = new Displayer(reader);
d.setTitle("------- Summary of ordering --------");

Formatter chars = new Formatter();
Formatter comment = new Formatter();
CharPosition last = new CharPosition(0);
last.first = -1;
for (CharPosition cp : charmap) {
if (cp.expands)
continue;

if (cp.first != last.first) {
chars.format(" # %s\n[%d] < ", comment, cp.first);
comment = new Formatter();
} else if (cp.second != last.second) {
chars.format(" ; ");
comment.format(" ; ");
} else if (cp.third != last.third) {
chars.format(",");
comment.format(",");
}
last = cp;
chars.format("%c", toUnicode(cp.val));
comment.format("U+%04x", cp.val);
}
d.item().addText(chars.toString());
d.print(outStream);
}

private CharPosition printCharPosition(Displayer d, int reclen, int charValue) {
DisplayItem item = d.item();

int flags = reader.get() & 0xff;
item.setBytes((byte) flags);

CharPosition c = readCharPosition(item, reclen-1, charValue);

StringBuilder sb = new StringBuilder();
Formatter fmt = new Formatter(sb);
fmt.format("0x%02x ", charValue);
fmt.format("(%c) ", toUnicode(charValue));
if ((flags & 0x1) != 0)
sb.append("Letter ");
if ((flags & 0x2) != 0)
sb.append("Number ");

if ((flags & 0x30) == 0) {
sb.append(c);
} else {
// This is an expansion, it sorts as two or more characters (eg ß sorts near ss).
// The pos is an index into srt5.
c.expands = true;
expansion(sb, c.first, (flags >> 4) & 0x3);
}

item.addText(sb.toString());
return c;
}

/**
* Read the sort position information. The format varies depending on the posLength parameter.
*
* @param item The display item - any bytes read are added to this.
* @param posLength The length of the position information (not the record length, just the
* part of it that encodes the positions).
* @param charValue The character that this applies to.
* @return A {@link CharPosition} structure containing the sort position weights.
*/
private CharPosition readCharPosition(DisplayItem item, int posLength, int charValue) {
CharPosition c = new CharPosition(charValue);
c.val = charValue;

if (posLength == 2) {
int rec = reader.getChar();
item.setBytes((char) rec);

c.first = rec & 0xff;
c.second = (rec >> 8) & 0xf;
c.third = (rec >> 12) & 0xf;
} else if (posLength == 4) {
int rec = reader.getInt();
item.setBytes(rec);

c.first = rec & 0xffff;
c.second = (rec >> 16) & 0xff;
c.third = (rec >> 24) & 0xff;
}
return c;
}

/**
* Some characters sort as if they were two separate characters (eg ß sorts like 'ss').
* @param sb Sort order descriptions are added to this buffer.
* @param pos Index into the expansions area.
* @param n The number of characters in the expansion.
*/
private void expansion(StringBuilder sb, int pos, int n) {
sb.append("Expansion: ");
for (int i = 0; i <= n; i++) {
if (pos + i == 0) {
sb.append(String.format("error: pos=%d n=%d, readpos=%x", pos, n,
reader.position()));
return;
}
CharPosition ch = expansions.get(pos + i - 1);
sb.append(ch);
if (i != n) {
sb.append(" & ");
}
}
}

private int toUnicode(int c) {
if (isUnicode)
return c;
ByteBuffer b = ByteBuffer.allocate(1);
b.put((byte) c);
b.flip();
try {
CharBuffer chars = decoder.decode(b);
return chars.charAt(0);
} catch (CharacterCodingException e) {
return '?';
}
}

/**
* Expansion table. Some characters sort as though they were two (or more) characters.
* This table is a list of sort positions that are referred to from the main character
* table. As such there is no particular pattern to the entries in the table.
*/
private void printSrt5() {
Displayer d = new Displayer(reader);
d.setTitle("SRT 5 (expansions)");

int reclen = srt5.getRecordSize();
reader.position(tableHeader.getStart() + srt5.getStart());
for (int i = 0; i < srt5.getNumberOfRecords(); i++) {
DisplayItem item = d.item();
CharPosition ch = readCharPosition(item, reclen, 0);

item.addText(ch.toString());

expansions.add(ch);
}
d.print(outStream);
}

/**
* This is used for multi-byte character sets.
*
* It is a list of pointers into srt8.
*
* Some slots are filled with 0xffffffff so you
* probably look up the high bytes in this table to get the block where you
* look up the low byte or something similar.
*/
private void printSrt7() {
Displayer d = new Displayer(reader);
d.setTitle("SRT 7 (pointers to srt8 indexed by top part of char)");

reader.position(tableHeader.getStart() + srt7.getStart());

int block = 1;
for (int i = 0; i < srt7.getNumberOfRecords(); i++) {

DisplayItem item = d.intItem();
int val = item.getValue();
item.addText("%4x: Pointer to srt8 %x", block, val);

if (val != 0xffffffff)
addChars(block << 8, val / srt8.getRecordSize());
d.print(outStream);
d.setTitle(null);
block++;
}
}

/**
* Add the characters from a part of srt8 to the character map.
*
* Retreive all the character position information from a block in srt8 and update
* the character value. The character value is formed with the index from srt7
* as the high part of the value.
*
* @param block The index into srt7 that we came from.
* @param n The index into srt8 where this block of characters starts.
*/
private void addChars(int block, int n) {
for (int i = 0; i < 256; i++) {
CharPosition cp = multi.get(n + i);

// Set the character value and add it to the map.
cp.val = block + i;
charmap.add(cp);
}
}

private void printSrt8() {
Displayer d = new Displayer(reader);
d.setTitle("SRT 8 (character table for multibyte characters)");

int reclen = srt8.getRecordSize();
reader.position(tableHeader.getStart() + srt8.getStart());
d.setSectStart(reader.position());
for (int i = 0; i < srt8.getNumberOfRecords(); i++) {
CharPosition cp = printCharPosition(d, reclen, i);
multi.add(cp);

d.print(outStream);
d.setTitle(null);
}
}

public static void main(String[] args) {
if (args.length < 1) {
System.err.println("Usage: srtdisplay <filename>");
System.exit(1);
}

CommonDisplay td = new SrtDisplay();
td.display(args[0],"SRT");
}

private class CharPosition implements Comparable {
private int val;
private int first;
private int second;
private int third;
private boolean expands;

public CharPosition(int charValue) {
this.val = charValue;
}

public int compareTo(Object o) {
CharPosition c2 = (CharPosition) o;
if (c2.first == first)
return compareSecond(c2);
else if (first < c2.first)
return -1;
else
return 1;
}

private int compareSecond(CharPosition c2) {
if (c2.second == second)
return compareThird(c2);
else if (second < c2.second)
return -1;
else
return 1;
}

private int compareThird(CharPosition c2) {
if (third == c2.third)
return 0;
else if (third < c2.third)
return -1;
else
return 1;
}

public String toString() {
return "prim=" + first + ",sec=" + second + ",tert=" + third;
}
}
}

Subversion Repositories display

(root)/u/steve/main/src/test/display/SrtDisplay.java - Rev 378