/*
Paros and its related class files.
Paros is an HTTP/HTTPS proxy for assessing web application security.
Copyright (C) 2003-2004 www.proofsecure.com

This program is free software; you can redistribute it and/or
modify it under the terms of the Clarified Artistic License
as published by the Free Software Foundation.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Clarified Artistic License for more details.

You should have received a copy of the Clarified Artistic License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

package com.proofsecure.paros.spider;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.*;
import java.util.Date;
import java.util.EmptyStackException;
import java.util.Enumeration;
import java.util.Stack;
import java.util.Vector;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLPeerUnverifiedException;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocket;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.swing.text.AttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLDocument;
import javax.swing.text.html.HTMLEditorKit;

import com.proofsecure.paros.network.HttpRequestHeader;
/*
Todo list:
- Cookie support = ok, not yet tested
- ThreadPool
- Connection Timeout
- setDefaultRequestProperty(..) = ok
- parse form javax.swing.text.html.parser  = ok, but much slower
- getContentType to handle diff. content, and recognize images <img src="xxx"

*/

public class Spider extends Thread {
	static int SPIDER_THREAD_COUNT = 6;
	protected Thread myThread = null;
	protected boolean isStarted = false;

	protected Website mSite = null;
	protected URL startURL = null;
	protected boolean isStop = false;
	protected String mHost = "";
	protected int mPort = 80;
	protected boolean mSecureFlag = false;
	// Create empty HTMLDocument to read into
	protected HTMLEditorKit htmlKit = new HTMLEditorKit();
	protected HTMLDocument htmlDoc;

	protected URLConnection conn = null;
	protected HttpURLConnection hconn = null;
	protected HttpRequestHeader reqHeader = null;

	protected SSLContext sc;

	protected String cookie = null;
	protected Vector headerProperty = new Vector();
	protected UrlStack links = new UrlStack();
	protected static final String USAGE =
		"java paros.spider.Spider URL [proxy_name:proxy_port]\r\n"
			+ "e.g. java paros.spider.Spider http://localhost:8080/ \r\n "
			+ "     java paros.spider.Spider http://localhost:8080/ proxy.abc.com:8080\r\n ";

	public Spider() {
	}

	public boolean init() {
		return true;
	}

	public static void main(String[] args) throws Exception {

		if (args.length < 1) {
			System.out.println(USAGE);
			return;
		}
		if (args.length == 2 && args[1].indexOf(":") == -1) {
			System.out.println(USAGE);
			return;
		}

		Spider s = new Spider();

		//    s.initSecureConnection();
		/*
		    Spider.connectS("localhost", 10443) ;
		    Spider.connectS("localhost", 10080) ;
		    
		    if (true)
		      return;
		
		               
		*/

		URL url = new URL(args[0]);
		if (url.getFile().equals(""))
			url = new URL(url.toString() + "/");

		s.setStartURL(url);

		if (args.length == 2) { // proxy argument
			String[] proxyArg = args[1].split(":");
			//s.setProxy("127.0.0.1","8080");
			s.setProxy(proxyArg[0], proxyArg[1]);
		}

		s.start();
		//    s.startCrawl();
	}

	public void setSecure(boolean s) {
		mSecureFlag = s;
	}

	public void setStartURL(URL url) {
		startURL = url;

		//  	mSite = new Website(url.toString());
		mSite = new Website(url.toString());

		String proto = url.getProtocol();

		if (!proto.equalsIgnoreCase("http")
			&& !proto.equalsIgnoreCase("https")) {
			System.out.println(USAGE);
			return;
		}
		if (proto.equalsIgnoreCase("https"))
			this.setSecure(true);

		// getPort() return -1 if URL did not define the port
		if (url.getPort() == -1) {
			if (!mSecureFlag)
				this.setPort(80);
			else
				this.setPort(443);

		} else
			this.setPort(url.getPort());
		this.setHost(url.getHost());
		String path = url.getFile();
		if (!path.endsWith("/")) {
			path = path.substring(0, path.lastIndexOf("/") + 1);
			//      System.out.println(path);
		}
		this.setRoot(path);
		//    this.setRoot("/");
		//    this.setRoot(url.getFile().indexOf("/"));

	}

	public void setStop() {
		isStop = true;
	}

	public void setHost(String h) {
		mHost = h;
	}

	public void setPort(int p) {
		mPort = p;
	}

	public void setRoot(String s) {
		links.setRoot(s);
	}

	public void setProxy(String proxyhost, String proxyport) {
		//    if (mSecureFlag){
		System.getProperties().setProperty("https.proxyHost", proxyhost);
		System.getProperties().setProperty("https.proxyPort", proxyport);
		//    }
		//    else{
		System.getProperties().setProperty("http.proxyHost", proxyhost);
		System.getProperties().setProperty("http.proxyPort", proxyport);
		//    }
	}

	public void startCrawl() {
		if (isStarted) {
			return;
		}
		myThread = new Thread(this);
		myThread.setDaemon(true);
		myThread.start();
		isStarted = true;

	}

	public void run() {
		Date a = new Date(), b;

		if (startURL == null)
			return;
		if (mSecureFlag)
			initSecureConnection();
		// start crawl and create next round of crawling
		String url;

		try {

			while (links.size() > 0 & !isStop) {

				url = (String) (links.pop());
				//          System.out.print(url + " " );
//				System.out.print(".");
				Vector res = sendAndReceive(url);
				// return a vector of discovered urls

				if (res != null && res.size() > 0)
					links.push(res); // add new links for next round

				//          list(links);
			}

		} catch (EmptyStackException es) {
			logError(es.getMessage());
			//      System.out.println("Spider finished.");
			//      break;
		} catch (Exception ex) {
			logError(ex.getMessage());
			//      break;
		}

		// last step		
		wrapup();

		//		b= new Date();
		//		System.out.println(b + " " + a);
	}

	protected void addHeaderProperty(HeaderProperty h) {
		headerProperty.add(h);
	}

	protected void setHeaderProperty(URLConnection conn, HttpRequestHeader req) {
		conn.setRequestProperty("Accept", "*/*");
		conn.setRequestProperty("Accept-Language", "en-us");
		conn.setRequestProperty(
			"User-Agent",
			"Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)");

		req.setHeader("Accept", "*/*");
		req.setHeader("Accept-Language", "en-us");
		req.setHeader("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)");

		if (cookie != null) {
			conn.setRequestProperty("Cookie", cookie);
			req.setHeader("Cookie", cookie);
		}
		if (headerProperty != null && headerProperty.size() > 0) {
			HeaderProperty hp;
			for (Enumeration e = headerProperty.elements();
				e.hasMoreElements();
				) {
				hp = ((HeaderProperty) (e.nextElement()));
				conn.setRequestProperty(hp.name, hp.value);
				req.setHeader(hp.name, hp.value);
			}

		}

	}

	protected void wrapup() {

		logError("History size: " + mSite.getHistory().size());

		if (hconn != null)
			hconn.disconnect();

		logError(
			"\r\n"
				+ "Total "
				+ links.getUrlDone().size()
				+ " link(s) crawled.\r\n");
		//	  links.listUrlDone();	  
		logError(links.listUrlRejected());
		isStarted = false;
	}

	protected void checkHeader(URLConnection conn) {
		for (int i = 0;; i++) {
			String headerName = conn.getHeaderFieldKey(i);
			String headerValue = conn.getHeaderField(i);
			//		System.out.println("Pair: " + headerName + " " + headerValue);
			if (headerName == null && headerValue == null) {
				// No more headers
				break;
			}
			if (headerName == null) {
				// The header value contains the server's HTTP version
			} else {
				if (headerName.equalsIgnoreCase("Set-cookie")) {
					//addHeaderProperty(new HeaderProperty(headerName,headerValue.substring(0,headerValue.indexOf(";"))));
					if (cookie == null)
						cookie =
							headerValue.substring(0, headerValue.indexOf(";"));
					else
						cookie =
							cookie
								+ "; "
								+ headerValue.substring(
									0,
									headerValue.indexOf(";"));
				}
			}

		}

	}

	// log each crawled link to GUI (for subclass to use only, not used here)
	protected void log(URL ur, HttpRequestHeader reqHeader) throws Exception {
	}

	private Vector sendAndReceive(String url) {
		String respCode = null;

		try {
			//      URL ur = new URL((mSecureFlag?"https":"http"),mHost,mPort,url);
			URI aur = new URI(url);
			if (!aur.isAbsolute()) {
				aur = new URI(startURL.toExternalForm()).resolve(aur);
			}
			URL ur = aur.toURL();

			//			System.out.println(ur);
			conn = ur.openConnection();

			hconn = (HttpURLConnection) conn;
			//ur.openConnection(); //(HTTPClient.HttpURLConnection)conn;   

			if (conn instanceof HttpsURLConnection) {
				(
					(
						HttpsURLConnection) hconn)
							.setHostnameVerifier(new HostnameVerifier() {
					public boolean verify(String urlHost, SSLSession ssls) {
						return true;
					}
				});
				((HttpsURLConnection) hconn).setSSLSocketFactory(
					sc.getSocketFactory());
				//((HttpsURLConnection)hconn).setSSLSocketFactory(Global.ssl.getClientSocketFactory("SSL"));
			}

			HttpURLConnection.setFollowRedirects(false);
			hconn.setInstanceFollowRedirects(false);

			reqHeader = new HttpRequestHeader();
			reqHeader.setMessage("GET " + ur.toString() + " HTTP/1.0\r\nConnection:\r\nContent-length: 0\r\n\r\n");

			reqHeader.setSecure(mSecureFlag);
			setHeaderProperty(hconn, reqHeader);
//			System.out.println(reqHeader);
			
			hconn.connect();

			if (conn.getHeaderField(0) != null)
				respCode = conn.getHeaderField(0);

			int code = hconn.getResponseCode();

			if (code == 301 || code == 302) {
				//            System.out.println("Location: " + hconn.getHeaderField("Location"));
				Vector v = new Vector();
				v.add(hconn.getHeaderField("Location"));

				Vector v2 = checkUriPrefix(v, aur.toString(), true);
				//            hconn.disconnect();
				//            links.push(v2);
				return v2;
			} else if (code >= 200 & code < 300) {
				// saved to TreeView for Paros
				log(ur, reqHeader);
			}

			checkHeader(conn);

			if (hconn.getContentType() != null
				&& (hconn.getContentType()).toLowerCase().indexOf("text") == -1
				&& (hconn.getContentType()).toLowerCase().indexOf("html") == -1) {
				// ... handle other type here ...
				return null;
			}
			Object instream = hconn.getContent();
			if (instream == null)
				return null;

			//      System.out.println("URL: " + url);
			if (instream != null && instream instanceof InputStream) {
				int BUFFER_SIZE = 4096;

				Vector v = getContent2((InputStream) instream);
				Vector v2 = null;
				if (v != null && v.size() > 0) {
					v2 = checkUriPrefix(v, aur.toString(), false);
				}
				/*
				        String body = getContent(new BufferedInputStream((InputStream)instream, BUFFER_SIZE));
				//        System.out.println(body);
				        Vector v = getLinks(body, "A HREF=");
				        Vector v2=null, v3 = null;
				        if (v!=null && v.size()>0){
				          v2 = checkUriPrefix(v,url, false);
				        }
				        
				        v = getLinks(body, "IMG SRC=");
				        if (v!=null && v.size()>0){
				          v3 = checkUriPrefix(v,url, false);
				        }
				        if (v2!=null && v3!=null){
				          v2.addAll(v3);          
				        }
				        else if (v3!=null)
				          v2 = v3;
				*/
				//          if (v2!=null)
				//            links.push(v2);

				//    		hconn.disconnect();
				return v2;

			}
		} catch (Exception fn1) {
			//    	System.out.println(fn1.getMessage());
			logError(fn1.getMessage());
			//    	fn1.printStackTrace();
			if (fn1 instanceof java.io.FileNotFoundException) {
				//        System.out.println("Returned 404");
				links.pushUrlRejected(
					url + " (rejected with reply \"" + respCode + "\")");
			} else if (fn1 instanceof java.io.IOException) {
				//        System.out.println("Returned 401");
				links.pushUrlRejected(
					url + " (rejected with reply \"" + respCode + "\")");
				//        fn1.printStackTrace();

			} else {
				// other exception ...
				// fn1.printStackTrace();
			}
		}
		return null;

	}

	public void logError(String s) {
		System.out.println(s);
	}

	private Vector getLinks(String s, String pattern) {
		Vector v = new Vector();
		int i, j;
		int base = 0;
		int plen = pattern.length();
		String tUrl;
		String abody = s.toUpperCase();
		while ((i = abody.indexOf(pattern)) != -1) {
			// for pattern <a href="..." /a>
			if (abody.charAt(i + plen) == '\"'
				&& (j = abody.indexOf("\"", i + plen + 1)) != -1) {
				//        System.out.println("i: " + i + "; j: " + j+ "; base: " + base);
				tUrl = s.substring(base + i + plen + 1, base + j);
				v.add(tUrl);
				//        System.out.println(tUrl);

				abody = abody.substring(j);
				base += j;
			} else {
				// for pattern <a href=... /a>
				int a, b;
				a = abody.indexOf(" ", i + plen + 1);
				b = abody.indexOf(">", i + plen + 1);
				if (a == -1 && b == -1) {
					break;
				}
				if (a != -1) {
					if (b == -1 || a < b)
						j = a;
					else
						j = b;
				} else {
					j = b;
				}

				/*        
				         if (
				          ((j = abody.indexOf(" ",i+8)) != -1) ||
				          ((j = abody.indexOf(">",i+8)) != -1) ||
				          ((j = abody.indexOf("?",i+8)) != -1)           
				          ){  // pattern <a href=abc/abc.html ...
				*/
				tUrl = s.substring(base + i + plen, base + j);
				v.add(tUrl);
				abody = abody.substring(j);
				base += j;

			} /*
			      else{
			        System.out.println("problem occur during parsing for HREF.");
			        break;
			      }*/
		}
		if (v.size() == 0)
			v = null;
		return v;
	}

	private String getContent(BufferedInputStream in) {
		int len = 0;
		byte[] buf = new byte[1024];
		StringBuffer body = new StringBuffer();
		try {
			while (len >= 0) {

				len = in.read(buf);
				if (len > 0) {
					String temp = null;
					try {
						temp = new String(buf, 0, len, "8859_1");
					} catch (Exception e) {
						temp = new String(buf, 0, len);
					}
					body.append(temp);
				} else {
					break;
				}
			}
		} catch (IOException e) {
		}

		return body.toString();

	}

	public Vector getContent2(InputStream in) {
		//    InputStream is = connection.getInputStream();
		InputStreamReader isr = new InputStreamReader(in);
		BufferedReader br = new BufferedReader(isr);
		Vector res = new Vector();

		htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
		htmlDoc.putProperty("IgnoreCharsetDirective", new Boolean(true));

		try {
			//      Reader reader = new InputStreamReader(url.openConnection().getInputStream());
			htmlKit.read(br, htmlDoc, 0);
			HTMLDocument.Iterator iterator = htmlDoc.getIterator(HTML.Tag.A);

			/*
			         ElementIterator it = new ElementIterator(htmlDoc);
			          javax.swing.text.Element elem;
			          while ((elem = it.next()) != null) {
			            SimpleAttributeSet s = (SimpleAttributeSet)
			              elem.getAttributes().getAttribute(
			                HTML.Tag.A);
			            if (s != null) {
			              res.add((String)s.getAttribute(HTML.Attribute.HREF));
			            }
			          }
			*/
			while (iterator.isValid()) {
				AttributeSet as = iterator.getAttributes();
				if (as != null && as.getAttribute(HTML.Attribute.HREF) != null)
					res.add(as.getAttribute(HTML.Attribute.HREF));
				//      for (Enumeration e = as.getAttributeNames(); e.hasMoreElements() ;) {
				//      System.out.println(as.getAttribute((String)(e.nextElement())));

				//        }

				iterator.next();
			}

		} catch (IOException ie) {
			ie.printStackTrace();
		} catch (javax.swing.text.BadLocationException ie) {
			ie.printStackTrace();
		}

		if (res.size() == 0)
			return null;
		else
			return res;

	}

	/* call Website.java to resolve URI to absolute URI */
	protected Vector checkUriPrefix(Vector v, String uri, boolean isRedirect)
		throws URISyntaxException {
		Vector res = mSite.resolveURI(v, Website.StringtoURI(uri));
		//new URI(uri));
		links.pushUrlRejected(mSite.getOtherLink());
		// get the links to other website and store in UrlRejected
		mSite.clearOtherLink(); // clear other website links in mSite class
		return res;

	}

	protected Vector checkUriPrefix2(
		Vector v,
		String uri,
		boolean isRedirect) {
		Vector result = new Vector();
		for (Enumeration e = v.elements(); e.hasMoreElements();) {
			String t1 = ((String) (e.nextElement()));
			if (t1 == null) {
				list(v);
				break;
			}
			String t = t1.toLowerCase();

			if (t.startsWith("mailto:")
				|| t.startsWith("ftp:")
				|| t.startsWith("irc:")
				|| t.startsWith("news:")
				|| t.startsWith("gopher:")
				|| t.startsWith("file:")
				|| t.startsWith("nfs:")) {
				continue;
			}

			if (t.startsWith("http://") || t.startsWith("https://")) {
				int plen;
				int i = 0;

				if (t.startsWith("http://"))
					plen = 7;
				else
					plen = 8;

				if ((i = t.substring(plen).indexOf(":")) != -1) {
					String a = t.substring(plen, plen + i);
					//          System.out.println(a);
					if (compareAddress(a, mHost)) {
						int j;
						if ((j = t.substring(i + 1).indexOf("/")) != -1) {
							String p1 = t.substring(i + plen + 1, i + j + 1);
							//          System.out.println(p1);
							int p = -1;
							try {
								p = Integer.parseInt(p1);
							} catch (Exception e1) {
								e1.printStackTrace();
							}
							if (p != -1 && p == mPort) {
								//                          System.out.println(t1.substring(i+j+1));
								result.add(t1.substring(i + j + 1));
							}

						}
					}
				} else if ((i = t.substring(plen).indexOf("/")) != -1) {
					String a = t.substring(plen, plen + i);
					if (compareAddress(a, mHost)) {
						if ((mSecureFlag == true && mPort == 443)
							|| (mSecureFlag == false && mPort == 80))
							result.add(t1.substring(plen + i));
						else
							links.pushUrlRejected(
								t1
									+ ":"
									+ mPort
									+ " (port different from that of spider)");
					}
				}
				continue;
			}

			if (isRedirect)
				continue;

			if (t.startsWith("/")) {
				result.add(t1);
				continue;
			}

			// add prefix to url
			int q;
			if ((q = uri.lastIndexOf("/")) != -1) {
				//  System.out.println("url: " + uri.substring(0,q) + " " + q);    
				if (q == 0)
					result.add("/" + t1);
				else
					result.add(uri.substring(0, q) + "/" + t1);
				continue;
			}
		}

		return result;

	}

	protected boolean compareAddress(String a, String b) {
		try {
			return InetAddress.getByName(a).equals(InetAddress.getByName(b));

		} catch (Exception e) {
			e.printStackTrace();
		}
		return false;
	}

	public void list(Vector v) {
		String url;
		for (Enumeration e = v.elements(); e.hasMoreElements();) {
			System.out.println("list: " + (String) (e.nextElement()));
		}

	}

	public void initSecureConnection() {

		// Create a trust manager that does not validate certificate chains
		TrustManager[] trustAllCerts =
			{
				new X509TrustManager() { public boolean checkClientTrusted(
					java
					.security
					.cert
					.X509Certificate[] chain) {
					return true;
				}

				public boolean isServerTrusted(
					java.security.cert.X509Certificate[] chain) {
					return true;
				}

				public boolean isClientTrusted(
					java.security.cert.X509Certificate[] chain) {
					return true;
				}
				public java
					.security
					.cert
					.X509Certificate[] getAcceptedIssuers() {
					return null;
				}
				public void checkClientTrusted(
					java.security.cert.X509Certificate[] certs,
					String authType) {
				}
				public void checkServerTrusted(
					java.security.cert.X509Certificate[] certs,
					String authType) {
				}
			}
		};

		// Install the all-trusting trust manager
		try {

			//					System.setProperty("java.protocol.handler.pkgs","com.sun.net.ssl.internal.www.protocol");
			//					java.security.Security.insertProviderAt(new sun.security.provider.Sun(), 1);

			sc = SSLContext.getInstance("SSL");
			java.security.SecureRandom x = new java.security.SecureRandom();
			//					x.setSeed(System.currentTimeMillis());
			sc.init(null, trustAllCerts, x);

			HttpsURLConnection.setDefaultSSLSocketFactory(
				sc.getSocketFactory());

			//          HttpsURLConnection.setDefaultSSLSocketFactory(Global.ssl.getClientSocketFactory("SSL"));

		} catch (Exception e) {
			System.out.println(e.getMessage());
		}

		//			this.setProxy(paros.Global.config.getProxyIP(), ""+paros.Global.config.getProxyPort());

		//			HttpsURLConnection.setDefaultSSLSocketFactory(Global.ssl.getClientSocketFactory("SSL"));
	}

	// not support https & proxy
	public static void connectS(String hostName, int hostPort) {
		int port = hostPort;
		long lastTime = System.currentTimeMillis();
		try {
			// Create the client socket
			String hostname = hostName;
			SSLSocketFactory factory =
				HttpsURLConnection.getDefaultSSLSocketFactory();
			SSLSocket socket = (SSLSocket) factory.createSocket(hostname, port);
			socket.setSoTimeout(500);
			// Connect to the server
			socket.startHandshake();

			// Retrieve the server's certificate chain
			java.security.cert.Certificate[] serverCerts =
				socket.getSession().getPeerCertificates();

			// Close the socket
			socket.close();
		} catch (SSLPeerUnverifiedException e) {
			System.out.println(port + " " + e.getMessage());
			System.out.println(System.currentTimeMillis() - lastTime);
			return;
		} catch (IOException e) {
			System.out.println(port + " IOException: " + e.getMessage());
			System.out.println(System.currentTimeMillis() - lastTime);
			return;
		}
		System.out.println(port + " is https port");
		System.out.println(System.currentTimeMillis() - lastTime);

		if (true)
			return;

	}

}

class UrlStack extends Stack {
	//  private Vector links = new Vector();
	private Vector UrlDone = new Vector();
	private Vector UrlRejected = new Vector();
	private String root = "/";

	public void setRoot(String s) {
		root = s;
		this.add(s);
	}

	public synchronized Object pop() {
		Object obj;

		try {
			do {
				obj = super.pop();

			} while (UrlDone.indexOf((String) obj) != -1);

			//      System.out.println((String)obj);
			//    UrlDone.add(URLDecoder.decode((String)obj,"8859_1"));
			UrlDone.add((String) obj);
			//    UrlDone.add(URLEncoder.encode((String)obj,"UTF8"));
			/*
			    }catch(UnsupportedEncodingException ese){
			      return null;    
			*/
		} catch (EmptyStackException ese) {
			return null;
		}

		return obj;
	}

	public Vector getUrlDone() {
		return UrlDone;
	}

	public synchronized boolean pushUrlRejected(String s) {
		remove(s);
		if (UrlRejected.indexOf(s) == -1) {
			UrlRejected.add(s);
			return true;
		} else
			return false;
	}

	public synchronized void pushUrlRejected(Vector s) {

		for (int i = 0; i < s.size(); i++) {
			remove(s.elementAt(i));
			if (UrlRejected.indexOf(s.elementAt(i)) == -1) {
				if (s.elementAt(i) instanceof URI) {
					UrlRejected.add(s.elementAt(i).toString());
				} else // assume string
					UrlRejected.add(s.elementAt(i));
			}
		}
	}

	public synchronized boolean push2(String s) {
		String a;
		try {
			//    System.out.println(s);
			/*
				s = "http://whats-on.atnext.com/pms/index.cfm?topic_id=2000329&amp;subtopic_id=1538&amp;one_id=1538";
				try{
					s=    URLEncoder.encode(s,"8859_1");
					System.out.println(s);
			    }catch(Exception ie){
			      System.out.println("url error "+ s + " " + ie.getMessage());
			      return false;
			    }    		
				if (true)
				return false;
				*/
			int index = 0;
			String query = null;

			if ((index = s.indexOf("#")) != -1) {
				// remove #fragment to hide URISyntaxException..
				//        s= URLEncoder.encode(s.substring(0,index),"UTF8");
				//        s=    URLDecoder.decode(s.substring(0,index),"8859_1");
				s = s.substring(0, index);
			}

			if ((index = s.indexOf("?")) != -1) {
				// remove #fragment to hide URISyntaxException..
				//        s= URLEncoder.encode(s.substring(0,index),"UTF8");
				query = s.substring(index + 1);
				query = URLEncoder.encode(query, "8859_1");
				s = s.substring(0, index);
			}
			a =
				(new java.net.URI(s)).normalize().getPath()
					+ (query != null ? "?" + query : "");

		} catch (UnsupportedEncodingException ie) {
			System.out.println("url error (unspported encoding)" + ie);
			return false;
		} catch (URISyntaxException ie) {
			System.out.println("url error " + s);
			ie.printStackTrace();
			return false;
		}
		if (!a.startsWith(root)) {
			UrlRejected.add(a + " (not under spider path)");
			return false;
		}

		if (UrlDone.indexOf(a) != -1
			|| UrlRejected.indexOf(a) != -1) // already processed or rejected
			return false;

		if (this.indexOf(a) != -1) // already existed
			return false;

		return super.add(a);

	}

	public synchronized boolean push(String s) {
		String a;
		java.net.URI uri = null;

		int index = 0;
		String query = null;

		if ((index = s.indexOf("#")) != -1) {
			// remove #fragment to hide URISyntaxException..
			//        s= URLEncoder.encode(s.substring(0,index),"UTF8");
			//        s=    URLDecoder.decode(s.substring(0,index),"8859_1");
			s = s.substring(0, index);
		}

		try {
			uri = new java.net.URI(s);
			a = uri.toString();
			//).normalize().getPath() + (query!=null?"?"+query:"");         

		} catch (URISyntaxException ie) {
			System.out.println("url error " + s);
			ie.printStackTrace();
			return false;
		}

		if (UrlDone.indexOf(a) != -1
			|| UrlRejected.indexOf(a) != -1) // already processed or rejected
			return false;

		if (this.indexOf(a) != -1) // already existed
			return false;

		return super.add(a);

	}

	public synchronized boolean push(Vector s) {
		String a;
		for (Enumeration e = s.elements(); e.hasMoreElements();) {
			push((String) (e.nextElement()));

		}

		return true;
	}

	public synchronized boolean remove(String s) {
		return super.remove(s);
	}

	public void list() {
		String url;
		for (Enumeration e = super.elements(); e.hasMoreElements();) {
			System.out.println("list: " + (String) (e.nextElement()));
		}

	}

	public void listUrlDone() {
		String url;
		System.out.println("\r\nURLs crawled: ");

		for (Enumeration e = UrlDone.elements(); e.hasMoreElements();) {
			System.out.println("list: " + (String) (e.nextElement()));
		}
	}

	public String listUrlRejected() {
		String url;
		String out;
		out = "URLs rejected: ";

		if (UrlRejected.size() == 0) {
			return out + "NONE\r\n";
		}

		out += "\r\n";
		for (Enumeration e = UrlRejected.elements(); e.hasMoreElements();) {
			out += " - " + (String) (e.nextElement()) + "\r\n";
		}
		return out;
	}

}

class HeaderProperty {
	String name;
	String value;

	HeaderProperty(String n, String v) {
		name = n;
		value = v;
	}
}
