编码:
#!/usr/bin/php -q
<?php
while(true){
$rate_limit = json_decode(file_get_contents('http://api.twitter.com/1/account/rate_limit_status.json'),true); # Check how many API calls remain
$wait = round(3600/150);
# *** IMPORTANT ***
# Twitter limits API calls to 150/hr so regardless of the number of handles, we can only make one request every 24 seconds (which is 3600/150).
# As a result, it takes one hour to run through 150 handles one time each. Since we'll probably never monitor that many, each handle may get updated multiple times an hour.
# Establish the database connection
if(!$mysqli = mysqli_connect('localhost','twitterd','password','twitterd')){
file_put_contents('tweet_gremlin.log', date('r') . ' Cannot connect to the database');
break;
}
$get_handles = mysqli_query($mysqli,"SELECT `handle` FROM twitter_handles"); # Grab the Twitter handles from the database
# Append them to the $handles array
while($row = mysqli_fetch_array($get_handles,MYSQLI_NUM)){
$handles[] = $row[0];
}
# Check the remaining API calls
if($rate_limit['remaining_hits']<count($handles)){
file_put_contents('tweet_gremlin.log', date('r') . ' Rate limit reached', FILE_APPEND);
sleep(600);
continue;
}else{
# Loop through the $handles values, make an API call, and insert the tweets.
foreach($handles as $value){
file_put_contents('tweet_gremlin.log', date('r') . ' Processing ' . $value . '\'s data...' . PHP_EOL, FILE_APPEND); # Tell the log what we're doing
$user_data = json_decode(file_get_contents('http://search.twitter.com/search.json?q=from:' . $value . '&rpp=100&include_entities=1'),true); # Get the handle's timeline and put it into $user_data
$user_data = $user_data['results']; # Put only the results index (tweets) into into $user_data
if(count($user_data)<1){
file_put_contents('tweet_gremlin.log', date('r') . ' No data for ' . $value . PHP_EOL, FILE_APPEND);
}
for($i=0;$i<count($user_data);$i++){
# Lazy method for sanitizing variables
$id = mysqli_real_escape_string($mysqli,$user_data[$i]['id']);
$created_at = mysqli_real_escape_string($mysqli,$user_data[$i]['created_at']);
$from_user_id = mysqli_real_escape_string($mysqli,$user_data[$i]['from_user_id']);
$profile_image_url = mysqli_real_escape_string($mysqli,$user_data[$i]['profile_image_url']);
$from_user = mysqli_real_escape_string($mysqli,$user_data[$i]['from_user']);
$from_user_name = mysqli_real_escape_string($mysqli,$user_data[$i]['from_user_name']);
$text = mysqli_real_escape_string($mysqli,$user_data[$i]['text']);
$needles = array('hack','tango down','dump','breach','data');
# Check the tweet relevance
foreach($needles as $needle){
$needle = '/' . $needle . '/i'; # So we don't have to manually type out the regex
# Make a call based on the tweet contents
if(preg_match($needle,$text) == 0){
file_put_contents('tweet_gremlin.log', date('r') . ' No relevant tweet data in tweet #' . $id .PHP_EOL, FILE_APPEND);
}else{
file_put_contents('tweet_gremlin.log', date('r') . ' Processing tweet #' . $id .PHP_EOL, FILE_APPEND);
$insert_tweets = "INSERT INTO tweets (`id`,`created_at`,`from_user_id`,`profile_image`,`from_user`,`from_user_name`,`text`) VALUES ('{$id}','{$created_at}','{$from_user_id}','{$profile_image_url}','{$from_user}','{$from_user_name}','{$text}');";
mysqli_query($mysqli,$insert_tweets);
}
}
}
file_put_contents('tweet_gremlin.log', date('r') . ' Sleeping for ' . $wait . ' seconds before processing the next handle' .PHP_EOL, FILE_APPEND);
sleep($wait);
}
}
mysqli_close($mysqli);
}
?>
如果我像这样在命令行上运行它:
./tweet_gremlin.php
它运行良好。但如果我这样运行它:
./tweet_gremlin.php &
它什么也不做。我不知道为什么。
编辑:我不得不取出几乎所有的代码,因为它不会让我全部提交。