解析获取Xml Encoding字符集Charset
/* |
* Given a URI string, open it, read its contents into a String |
* and return the String |
* |
*@param uri the URI to open |
*@return the content at the URI or null if any error occurs |
*/ |
private String getRDFfromURI (String uri) throws getRDFException |
{ |
/* add something like this code here, to allow reading from a file: |
(if we really want to allow this!) |
File ff = new File(uri); |
in = new FileInputStream(ff); |
*/ |
URL url = null; |
try { |
url = new URL(uri); |
} catch (MalformedURLException e) { |
throw new getRDFException("Malformed URI."); |
} |
URLConnection con = null; |
try { |
con = url.openConnection(); |
con.setRequestProperty("Accept", "application/rdf+xml"); |
con.connect(); |
} catch (Exception e) { |
throw new getRDFException("Unable to open connection."); |
} |
String contentT = con.getContentType(); |
String HTTPcharset = null; |
if (contentT != null) { |
ContentType contentType = null; |
try { |
contentType = new ContentType(con.getContentType()); |
} catch (javax.mail.internet.ParseException e) { |
throw new getRDFException("Unparsable content type."); |
} |
HTTPcharset = contentType.getParameter("charset"); |
} |
// need buffer for lookahead for encoding detection |
BufferedInputStream bis = null; |
try { |
bis = new BufferedInputStream(con.getInputStream()); |
} catch (IOException e) { |
throw new getRDFException("Cannot open stream."); |
} |
bis.mark(200); // mark start so that we can get back to it |
String s = ""; |
try { // read start of file as bytes |
int c; |
int numRead = 0; |
while ((c = bis.read()) != -1) { |
s += (char)c; |
if (numRead++ >= 195) break; |
} |
} catch (IOException e) { |
throw new getRDFException("IOException while starting reading."); |
} |
if (s.equals("")) |
// Nothing was returned |
throw new getRDFException("Empty document, ignored."); |
// A server could return content but not the RDF/XML that |
// we need. Check the beginning of s and if it looks like |
// a generic HTML message, return an error. |
if (s.startsWith("<!DOCTYPE")) |
throw new getRDFException("Document looks like HTML, ignored."); |
String APPFcharset = null; // 'charset' according to XML APP. F |
int ignoreBytes = 0; |
if (s.startsWith("\u00FE\u00FF")) { |
APPFcharset = "UTF-16BE"; |
ignoreBytes = 2; |
} |
else if (s.startsWith("\u00FF\u00FE")) { |
APPFcharset = "UTF-16LE"; |
ignoreBytes = 2; |
} |
else if (s.startsWith("\u00EF\u00BB\u00BF")) { |
APPFcharset = "UTF-8"; |
ignoreBytes = 3; |
} |
else if (s.startsWith("\u0000<\u0000?")) { |
APPFcharset = "UTF-16BE"; |
} |
else if (s.startsWith("<\u0000?\u0000")) { |
APPFcharset = "UTF-16LE"; |
} |
else if (s.startsWith("<?xml")) { |
APPFcharset = "iso-8859-1"; //to not loose any bytes |
} |
else if (s.startsWith("\u004C\u006F\u00A7\u0094")) { |
APPFcharset = "CP037"; // EBCDIC |
} |
else { |
APPFcharset = "iso-8859-1"; //to not loose any bytes |
} |
// convert start of xml input according to APPFcharset |
String xmlstart = null; |
try { |
// System.err.println("---------------------------"); |
// System.err.println("ignoreBytes="+ignoreBytes); |
// System.err.println("s="+s); |
// System.err.println("APPFcharset="+APPFcharset); |
// if (APPFcharset!=null){xmlstart = new String(s.substring(ignoreBytes).getBytes("iso-8859-1"), APPFcharset);} |
// else {xmlstart=new String(s.substring(ignoreBytes).getBytes("iso-8859-1"));APPFcharset = "UTF-8";} |
xmlstart = new String(s.substring(ignoreBytes).getBytes("iso-8859-1"), APPFcharset); |
} catch (UnsupportedEncodingException e) { |
throw new getRDFException("Unsupported encoding '"+APPFcharset+"'."); |
} |
RE r; |
try { |
r = new RE("<\\?xml[ \\t\\n\\r]+version[ \\t\\n\\r]?=[ \\t\\n\\r]?(['\"])([a-zA-Z0-9_:]|\\.|-)+\\1[ \\t\\n\\r]+encoding[ \\t\\n\\r]?=[ \\t\\n\\r]?(['\"])([A-Za-z]([A-Za-z0-9._]|-)*)\\3"); |
} catch (RESyntaxException res) { |
throw new getRDFException("Wrong regular expression syntax."); |
} |
// r.setMatchFlags(MATCH_NORMAL | MATCH_SINGLELINE); |
String XMLcharset = null; |
if (r.match(xmlstart) && r.getParenStart(0)==0) |
XMLcharset = r.getParen(4); |
if (HTTPcharset != null) |
HTTPcharset = HTTPcharset.toUpperCase(); |
if (XMLcharset != null) |
XMLcharset = XMLcharset.toUpperCase(); |
String finalCharset = null; |
if (HTTPcharset != null) { |
if (XMLcharset != null && !HTTPcharset.equals(XMLcharset)) |
throw new getRDFException("Charset conflict: Content-Type: " |
+ contentT+ ". XML encoding: " + XMLcharset + "."); |
finalCharset = HTTPcharset; |
} |
else if (XMLcharset != null) |
finalCharset = XMLcharset; |
if ((finalCharset != null && finalCharset.equals("UTF-16")) || |
(finalCharset == null && APPFcharset.startsWith("UTF-16"))) |
if (ignoreBytes == 2) |
finalCharset = APPFcharset; // use correct endianness |
else |
throw new getRDFException("Illegal XML: UTF-16 without BOM."); |
if (finalCharset == null) |
finalCharset = "UTF-8"; |
try { |
bis.reset(); // move back to start of stream |
bis.skip(ignoreBytes); // skip BOM |
} catch (IOException e) { |
throw new getRDFException("IOException while resetting stream."); |
} |
InputStreamReader isr = null; |
try { |
isr = new InputStreamReader(bis, finalCharset); |
} catch (UnsupportedEncodingException e) { |
throw new getRDFException("Unsupported encoding '"+finalCharset+"'."); |
} |
StringBuffer sb=new StringBuffer(""); |
int bytenum=0; |
try {// read whole file as characters |
int c; |
while ((c = isr.read()) != -1) { |
sb.append((char)c); |
bytenum++; |
} |
} |
catch (IOException e){ |
throw new getRDFException("Undecodable data when reading URI at byte "+bytenum+" using encoding '"+finalCharset+"'."+" Please check encoding and encoding declaration of your document."); |
} |
// todo: fix encoding parameter in xml pseudo-PI |
return sb.toString(); |
} |
同时入选IMDB和豆瓣电影100强的经典电影
2009-05-19 添加1. 肖申克的救赎 The Shawshank Redemption
导演 : 弗兰克·德拉邦特 Frank Darabont
|
2009-05-19 添加2. 教父 The Godfather
导演 : 弗朗西斯·福特·科波拉 Francis Ford Coppola
|
2009-05-19 添加3. 教父2 The Godfather: Part Ⅱ
导演 : Francis Ford Coppola
|
2009-05-19 添加4. 低俗小说 Pulp Fiction
导演 : 昆汀·塔伦蒂诺 Quentin Tarantino
|
2009-05-19 添加5. 辛德勒的名单 Schindler's List
导演 : Steven Spielberg
|
2009-05-19 添加6. 飞越疯人院 One Flew Over the Cuckoo's Nest
导演 : 米洛斯·福尔曼 Miloš Forman
|
2009-05-19 添加7. 七武士 七人の侍
导演 : 黑泽明
|
2009-05-19 添加8. 指环王3:王者无敌 The Lord of the Rings: The Return of the King
导演 : 彼得·杰克逊 Peter Jackson
|
2009-05-19 添加9. 上帝之城 Cidade de Deus
导演 : Fernando Meirelles/Kátia Lund
|
2009-05-19 添加10. 指环王1:魔戒再现 The Lord of the Rings: The Fellowship of the Ring
导演 : 彼得·杰克逊 Peter Jackson
|
2009-05-19 添加11. 搏击俱乐部 Fight Club
导演 : 大卫·芬奇 David Fincher
|
2009-05-19 添加12. 这个杀手不太冷 Léon
导演 : 吕克·贝松 Luc Besson
|
2009-05-19 添加13. 美丽人生 La vita è bella
导演 : 罗伯托·贝尼尼 Roberto Benigni
|
2009-05-19 添加14. 天堂电影院 Nuovo Cinema Paradiso
导演 : 朱塞佩·托纳多雷 Giuseppe Tornatore
|
2009-06-07 添加15. 千与千寻 千と千尋の神隠し
导演 : 宫崎骏 Hayao Miyazaki
|
2009-06-07 添加16. 钢琴家 The Pianist
导演 : Roman Polanski
|
2009-06-07 添加17. 指环王2:双塔奇兵 The Lord of the Rings: The Two Towers
导演 : 彼得·杰克逊 Peter Jackson
|
2009-06-07 添加18. 美国往事 Once Upon a Time in America
导演 : Sergio Leone
|
2009-06-07 添加19. 阿甘正传 Forrest Gump
导演 : Robert Zemeckis
|
2009-06-07 添加20. 天使爱美丽 Le fabuleux destin d'Amélie Poulain
导演 : Jean-Pierre Jeunet
|
2009-06-07 添加21. 梦之安魂曲 Requiem for a Dream
导演 : 达伦·阿伦诺夫斯基 Darren Aronofsky
|
2010-02-21 添加22. 机器人总动员 WALL·E
导演 : 安德鲁·斯坦顿 Andrew Stanton
|
2010-02-21 添加23. 窃听风暴 Das Leben der Anderen
导演 : 弗洛里安·亨克尔·冯·多纳斯马 Florian Henckel von Donnersmarck
|
2010-02-21 添加24. 飞屋环游记 Up
导演 : 彼特·道格特 Pete Docter
|
2010-02-21 添加25. 雨中曲 Singin' in the Rain
导演 : Stanley Donen/Gene Kelly |
Dynamic DataSource Routing | SpringSource Team Blog
Anyways, here's the code for my Catalog:
As you can see, the Catalog simply returns a list of Item objects. The Item just contains name and price properties:
Now, in order to demonstrate multiple DataSources, I created an enum for different Customer types (representing "levels" of membership I guess), and I created three different databases – so that each type of customer would get a distinct item list (I did mention that this would be a contrived example didn't I?). The important thing is that each of the databases are equivalent in terms of the schema. That way the Catalog's query will work against any of them – just returning different results. In this case, it's just the "item" table with 2 columns: name and price. And… here is the enum:
It's time to create some bean definitions. Since I have 3 datasources where everything is the same except for the port number, I created a parent bean so that the shared properties can be inherited. Then, I added the 3 bean definitions to represent the per-CustomerType DataSources:
Notice that I added a PropertyPlaceholderConfigurer so that I could externalize the port numbers in a "db.properties" file, like so:
Now things start to get interesting. I need to supply the "routing" DataSource to my Catalogso that it can dynamically get connections from the 3 different databases at runtime based on the current customer's type. As I mentioned, the AbstractRoutingDataSource can be rather simple to implement. Here is my implementation:
…and the CustomerContextHolder simply provides access to a thread-bound CustomerType. In reality, the 'context' would likely hold more information about the customer. Also note that if you are using Acegi, then you could retrieve some information from the userDetails. For this example, it's just the customer "type":
Finally, I just need to configure the catalog and routing DataSource beans. As you can see, the "real" DataSource references are provided in a Map. If you provide Strings, they can be resolved as JNDI names (or any custom resolution strategy can be provided – see the JavaDoc). Also, I've simply set the 'bronzeDataSource' as the default:
Of course I'd like to see this working, so I've created a simple test (extending one of Spring's integration test support classes). I added 3 items to the "gold" database, 2 items to the "silver" database, and only 1 item to the "bronze" database. This is the test:
…and rather than simply taking a screenshot of the green bar, you'll notice I've provided some console output – the results!:
As you can see, the configuration is simple. Better still, the data-access code is not concerned with looking up different DataSources. For more information, consult the JavaDoc for AbstractRoutingDataSource.