/******************************************************************** * World Wide Web Page Download Using Lynx in C++ * *===================================================================* * Name: Wen-Chen Hu * * Dated: October 1, 1999 * ********************************************************************* ********************************************************************* * The Purpose of this Project * ********************************************************************* Demonstrate how to use a text browser lynx to download Web pages. ********************************************************************* * How to Use This Program spider * ********************************************************************* > g++ -o lynxc lynxc.cc > lynxc url number 1. lynxc : this program name 2. url : a seed URL 3. number: number of Web pages downloaded For example, > lynxc http://www.eng.auburn.edu/~wenchen/ 50 ********************************************************************* ********************************************************************/ #include #include #include /******************************************************************* * Main Program * *******************************************************************/ main (int argc, char *argv[]) { char command[128], url[64]; // argv[1]: the seed URL // argv[2]: number of Web sites downloaded strcpy (url, argv[1]); for (int i = 0; i < atoi(argv[2]); i++) { strcpy (command, "lynx -dump "); strcat (command, url); strcat (command, " > work"); // work is a temporary workarea. system (command); // Execute a host operating system command. // Start text processing on the work. // Enter the extracted information into database. // Assign url with a new URL. } }