/******************************************************************** * World Wide Web Page Download Using Lynx in Java * *===================================================================* * Name: Wen-Chen Hu * * Dated: October 1, 1999 * ********************************************************************* ********************************************************************* * The Purpose of this Project * ********************************************************************* Demonstrate how to use Java language to download Web pages. ********************************************************************* * How to Use This Program robot * ********************************************************************* > javac lynxj.java > java lynxj url number 1. lynxj : this class name 2. url : a seed URL 3. number: number of Web pages downloaded For example, > java lynxj http://www.eng.auburn.edu/~wenchen/ 50 ********************************************************************* ********************************************************************/ import java.io.*; import java.lang.*; /******************************************************************* * Main Program * *******************************************************************/ public class lynxj { public static void main (String args[]) { // args[0]: a seed URL // args[1]: number of Web pages downloaded String url = args[0]; downLoadHTML t = new downLoadHTML(); for (int i = 0; i < Integer.valueOf(args[1]).intValue( ); i++) { t.loading (url, "work"); // work is a temporary workarea // start text processing on the work, // enter the extracted information into database, and // assign the url with a new URL . } } } class downLoadHTML { void loading (String url, String work) { //Open the URL for reading try { Runtime run = Runtime.getRuntime( ); try { OutputStream to_file = new FileOutputStream(work); String command = "lynx -dump " + url; DataInputStream theHTML = new DataInputStream (run.exec(command).getInputStream()); byte[] buffer = new byte[8096]; int bytes_read; while ((bytes_read = theHTML.read(buffer)) != -1) to_file.write (buffer, 0, bytes_read); to_file.close( ); } // end try catch (Exception e) { System.err.println(e); } } // end try catch (Exception e) { System.err.println(e); } } }