Rev 420 |
Blame |
Compare with Previous |
Last modification |
View Log
| RSS feed
/*
* Copyright (c) 2009, Chris Miller
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
package uk.me.parabola.splitter.geo;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import uk.me.parabola.splitter.Convert;
import uk.me.parabola.splitter.Utils;
/**
* Loads in city information from a GeoNames file. See
* http://download.geonames.org/export/dump/readme.txt for details of the file format.
*
* @author Chris Miller
*/
public class CityLoader
{
private static final Pattern TAB_DELIMTED_SPLIT_PATTERN =
Pattern.
compile("\\t");
private static final int GEONAME_ID_INDEX =
0;
private static final int NAME_INDEX =
1;
private static final int ASCII_NAME_INDEX =
2;
private static final int COUNTRY_CODE_INDEX =
8;
private static final int LAT_INDEX =
4;
private static final int LON_INDEX =
5;
private static final int POPULATION_INDEX =
14;
private final boolean useAsciiNames
;
public CityLoader
(boolean useAsciiNames
) {
this.
useAsciiNames = useAsciiNames
;
}
public List<City
> load
(String geoNamesFile
) {
List<City
> result =
null;
try(BufferedReader r =
new BufferedReader(Utils.
openFile(geoNamesFile,
true))){
result = load
(r
);
} catch (IOException ignore
) {
System.
out.
println("Warning: Could not read geonames file " + geoNamesFile +
", processing continues");
}
return result
;
}
public List<City
> load
(BufferedReader reader
) throws IOException {
List<City
> cities =
new ArrayList<City
>(1000);
String line
;
int lineNumber =
0;
while ((line = reader.
readLine()) !=
null) {
lineNumber++
;
try {
String[] split = TAB_DELIMTED_SPLIT_PATTERN.
split(line,
16);
int geoNameId =
Integer.
parseInt(split
[GEONAME_ID_INDEX
]);
String name
;
if (useAsciiNames
)
name =
new String(split
[ASCII_NAME_INDEX
].
toCharArray()); // prevent memory leak from substr
else
name =
new String(split
[NAME_INDEX
].
toCharArray());
String countryCode =
new String(split
[COUNTRY_CODE_INDEX
].
toCharArray()).
intern();
int population =
Integer.
parseInt(split
[POPULATION_INDEX
]);
int lat = Utils.
toMapUnit(Convert.
parseDouble(split
[LAT_INDEX
]));
int lon = Utils.
toMapUnit(Convert.
parseDouble(split
[LON_INDEX
]));
cities.
add(new City
(geoNameId, countryCode, name, lat, lon, population
));
} catch (Exception e
) {
System.
err.
format("Unable to parse GeoNames data at line %d%nReason:%s%nData: %s%n",lineNumber, e.
toString(),line
);
}
}
return cities
;
}
}