<?php
# SQL> CREATE TABLE keywords (
# 2 kwID INT AUTO_INCREMENT PRIMARY KEY,
# 3 keyword VARCHAR(64) );
#
# SQL> CREATE TABLE url_title (
# 2 urlID INT AUTO_INCREMENT PRIMARY KEY,
# 3 url VARCHAR(128),
# 4 title VARCHAR(128) );
#
# SQL> CREATE TABLE www_index (
# 2 kwID INT,
# 3 urlID INT,
# 4 PRIMARY KEY ( kwID, urlID ),
# 5 FOREIGN KEY ( kwID ) REFERENCES keywords ( kwID ),
# 6 FOREIGN KEY ( urlID ) REFERENCES url_title ( urlID ) );
include 'password.php'; // Containing only one line: $password="your-pw";
$keyword = $argv[1];
$URL = $argv[2];
$username = "your-id@undcsmysql";
$database = "your-db";
$host = "undcsmysql.mysql.database.azure.com";
$conn = new mysqli( $host, $username, $password, $database );
if ( $conn->connect_error )
die( 'Could not connect: ' . $conn->connect_error );
# Dump the source code to the file result.txt.
$cmd = "lynx -dump -source '" . $URL . "' > result.txt";
system( "chmod 777 result.txt ../2/" );
system( $cmd );
system( "chmod 755 ../2/" );
# Find the page title by using a regular expression.
$file = file_get_contents( "result.txt" );
$pattern = '/<title>.*?<\/title>/';
preg_match( $pattern, $file, $matches );
$title = strip_tags( $matches[0] );
# Check whether the page contains the keyword.
$file = fopen( "result.txt", "r" ) or
exit( "Unable to open file!" );
$found = false;
while ( !feof( $file ) ) {
$line = fgets( $file );
if ( substr_count( $line, $keyword ) != 0 ) {
$found = true;
break;
}
}
fclose( $file );
# Find the ID of the input keyword from the keywords table.
$sql = "SELECT kwID FROM keywords WHERE keyword='$keyword';";
echo( $sql . "\n\n" );
$result = $conn->query( $sql );
if ( $result->num_rows > 0 )
while( $row = $result->fetch_assoc( ) )
$kwID = $row['kwID'];
else {
$sql = "INSERT INTO keywords( keyword ) VALUES ( '$keyword' );";
echo( $sql . "\n\n" );
$conn->query( $sql );
$sql = "SELECT kwID FROM keywords WHERE keyword='$keyword';";
echo( $sql . "\n\n" );
$result = $conn->query( $sql );
if ( $result->num_rows > 0 )
while( $row = $result->fetch_assoc( ) )
$kwID = $row['kwID'];
}
# Find the ID of the input URL from the url_title table.
$sql = "SELECT urlID FROM url_title WHERE url='$URL';";
echo( $sql . "\n\n" );
$result = $conn->query( $sql );
if ( $result->num_rows > 0 )
while( $row = $result->fetch_assoc( ) )
$urlID = $row['urlID'];
else {
$sql = "INSERT INTO url_title( url, title ) VALUES ( '$URL', '$title' );";
echo( $sql . "\n\n" );
$conn->query( $sql );
$sql = "SELECT urlID FROM url_title WHERE url='$URL';";
echo( $sql . "\n\n" );
$result = $conn->query( $sql );
if ( $result->num_rows > 0 )
while( $row = $result->fetch_assoc( ) )
$urlID = $row['urlID'];
}
# Update the inverted list if the keyword is found.
if ( $found == true ) {
$sql = "INSERT INTO www_index VALUES ( '$kwID', '$urlID' );";
echo( $sql . "\n\n" );
$conn->query( $sql );
}
$conn->close( );
?>
|