| /* 
 | 
        
            | * Given a URI string, open it, read its contents into a String 
 | 
        
            | * and return the String 
 | 
        
            | * 
 | 
        
            | *@param uri the URI to open 
 | 
        
            | *@return the content at the URI or null if any error occurs 
 | 
        
            | */ | 
        
            | private String getRDFfromURI (String uri) throws getRDFException | 
        
            | { | 
        
            | /* add something like this code here, to allow reading from a file: 
 | 
        
            | (if we really want to allow this!) 
 | 
        
            | File ff = new File(uri); 
 | 
        
            | in = new FileInputStream(ff); 
 | 
        
            | */ | 
        
            | URL url = null; | 
        
            | try { | 
        
            | url = new URL(uri); | 
        
            | } catch (MalformedURLException e) { | 
        
            | throw new getRDFException("Malformed URI."); | 
        
            | } | 
        
            |  | 
        
            | URLConnection con = null; | 
        
            | try { | 
        
            | con = url.openConnection(); | 
        
            | con.setRequestProperty("Accept", "application/rdf+xml"); | 
        
            | con.connect(); | 
        
            | } catch (Exception e) { | 
        
            | throw new getRDFException("Unable to open connection."); | 
        
            | } | 
        
            | String contentT = con.getContentType(); | 
        
            | String HTTPcharset = null; | 
        
            | if (contentT != null) { | 
        
            | ContentType contentType = null; | 
        
            | try { | 
        
            | contentType = new ContentType(con.getContentType()); | 
        
            | } catch (javax.mail.internet.ParseException e) { | 
        
            | throw new getRDFException("Unparsable content type."); | 
        
            | } | 
        
            | HTTPcharset = contentType.getParameter("charset"); | 
        
            | } | 
        
            |  | 
        
            | // need buffer for lookahead for encoding detection | 
        
            | BufferedInputStream bis = null; | 
        
            | try { | 
        
            | bis = new BufferedInputStream(con.getInputStream()); | 
        
            | } catch (IOException e) { | 
        
            | throw new getRDFException("Cannot open stream."); | 
        
            | } | 
        
            | bis.mark(200); // mark start so that we can get back to it | 
        
            | String s = ""; | 
        
            |  | 
        
            | try { // read start of file as bytes | 
        
            | int c; | 
        
            | int numRead = 0; | 
        
            | while ((c = bis.read()) != -1) { | 
        
            | s += (char)c; | 
        
            | if (numRead++ >= 195) break; | 
        
            | } | 
        
            | } catch (IOException e) { | 
        
            | throw new getRDFException("IOException while starting reading."); | 
        
            | } | 
        
            |  | 
        
            | if (s.equals("")) | 
        
            | // Nothing was returned | 
        
            | throw new getRDFException("Empty document, ignored."); | 
        
            |  | 
        
            | // A server could return content but not the RDF/XML that | 
        
            | // we need.  Check the beginning of s and if it looks like | 
        
            | // a generic HTML message, return an error. | 
        
            | if (s.startsWith("<!DOCTYPE")) | 
        
            | throw new getRDFException("Document looks like HTML, ignored."); | 
        
            |  | 
        
            | String APPFcharset = null; // 'charset' according to XML APP. F | 
        
            | int ignoreBytes = 0; | 
        
            | if (s.startsWith("\u00FE\u00FF")) { | 
        
            | APPFcharset = "UTF-16BE"; | 
        
            | ignoreBytes = 2; | 
        
            | } | 
        
            | else if (s.startsWith("\u00FF\u00FE")) { | 
        
            | APPFcharset = "UTF-16LE"; | 
        
            | ignoreBytes = 2; | 
        
            | } | 
        
            | else if (s.startsWith("\u00EF\u00BB\u00BF")) { | 
        
            | APPFcharset = "UTF-8"; | 
        
            | ignoreBytes = 3; | 
        
            | } | 
        
            | else if (s.startsWith("\u0000<\u0000?")) { | 
        
            | APPFcharset = "UTF-16BE"; | 
        
            | } | 
        
            | else if (s.startsWith("<\u0000?\u0000")) { | 
        
            | APPFcharset = "UTF-16LE"; | 
        
            | } | 
        
            | else if (s.startsWith("<?xml")) { | 
        
            | APPFcharset = "iso-8859-1"; //to not loose any bytes | 
        
            | } | 
        
            | else if (s.startsWith("\u004C\u006F\u00A7\u0094")) { | 
        
            | APPFcharset = "CP037"; // EBCDIC | 
        
            | } | 
        
            | else { | 
        
            | APPFcharset = "iso-8859-1"; //to not loose any bytes | 
        
            | } | 
        
            |  | 
        
            | // convert start of xml input according to APPFcharset | 
        
            | String xmlstart = null; | 
        
            | try { | 
        
            | //          System.err.println("---------------------------"); | 
        
            | //          System.err.println("ignoreBytes="+ignoreBytes); | 
        
            | //          System.err.println("s="+s); | 
        
            | //          System.err.println("APPFcharset="+APPFcharset); | 
        
            | //          if (APPFcharset!=null){xmlstart = new String(s.substring(ignoreBytes).getBytes("iso-8859-1"), APPFcharset);} | 
        
            | //          else {xmlstart=new String(s.substring(ignoreBytes).getBytes("iso-8859-1"));APPFcharset = "UTF-8";} | 
        
            | xmlstart = new String(s.substring(ignoreBytes).getBytes("iso-8859-1"), APPFcharset); | 
        
            | } catch (UnsupportedEncodingException e) { | 
        
            | throw new getRDFException("Unsupported encoding '"+APPFcharset+"'."); | 
        
            | } | 
        
            | RE r; | 
        
            | try { | 
        
            | r = new RE("<\\?xml[ \\t\\n\\r]+version[ \\t\\n\\r]?=[ \\t\\n\\r]?(['\"])([a-zA-Z0-9_:]|\\.|-)+\\1[ \\t\\n\\r]+encoding[ \\t\\n\\r]?=[ \\t\\n\\r]?(['\"])([A-Za-z]([A-Za-z0-9._]|-)*)\\3"); | 
        
            | } catch (RESyntaxException res) { | 
        
            | throw new getRDFException("Wrong regular expression syntax."); | 
        
            | } | 
        
            | // r.setMatchFlags(MATCH_NORMAL | MATCH_SINGLELINE); | 
        
            | String XMLcharset = null; | 
        
            | if (r.match(xmlstart) && r.getParenStart(0)==0) | 
        
            | XMLcharset = r.getParen(4); | 
        
            | if (HTTPcharset != null) | 
        
            | HTTPcharset = HTTPcharset.toUpperCase(); | 
        
            | if (XMLcharset != null) | 
        
            | XMLcharset = XMLcharset.toUpperCase(); | 
        
            |  | 
        
            | String finalCharset = null; | 
        
            | if (HTTPcharset != null) { | 
        
            | if (XMLcharset != null && !HTTPcharset.equals(XMLcharset)) | 
        
            | throw new getRDFException("Charset conflict: Content-Type: " | 
        
            | + contentT+ ". XML encoding: " + XMLcharset + "."); | 
        
            | finalCharset = HTTPcharset; | 
        
            | } | 
        
            | else if (XMLcharset != null) | 
        
            | finalCharset = XMLcharset; | 
        
            | if ((finalCharset != null && finalCharset.equals("UTF-16")) || | 
        
            | (finalCharset == null && APPFcharset.startsWith("UTF-16"))) | 
        
            | if (ignoreBytes == 2) | 
        
            | finalCharset = APPFcharset; // use correct endianness | 
        
            | else | 
        
            | throw new getRDFException("Illegal XML: UTF-16 without BOM."); | 
        
            | if (finalCharset == null) | 
        
            | finalCharset = "UTF-8"; | 
        
            |  | 
        
            | try { | 
        
            | bis.reset();                 // move back to start of stream | 
        
            | bis.skip(ignoreBytes);       // skip BOM | 
        
            | } catch (IOException e) { | 
        
            | throw new getRDFException("IOException while resetting stream."); | 
        
            | } | 
        
            |  | 
        
            | InputStreamReader isr = null; | 
        
            | try { | 
        
            | isr = new InputStreamReader(bis, finalCharset); | 
        
            | } catch (UnsupportedEncodingException e) { | 
        
            | throw new getRDFException("Unsupported encoding '"+finalCharset+"'."); | 
        
            | } | 
        
            | StringBuffer sb=new StringBuffer(""); | 
        
            | int bytenum=0; | 
        
            | try {// read whole file as characters | 
        
            | int c; | 
        
            | while ((c = isr.read()) != -1) { | 
        
            | sb.append((char)c); | 
        
            | bytenum++; | 
        
            | } | 
        
            | } | 
        
            | catch (IOException e){ | 
        
            | throw new getRDFException("Undecodable data when reading URI at byte "+bytenum+" using encoding '"+finalCharset+"'."+" Please check encoding and encoding declaration of your document."); | 
        
            | } | 
        
            | // todo: fix encoding parameter in xml pseudo-PI | 
        
            | return sb.toString(); | 
        
            | } |