我想看看哪种算法最有效,所以我运行了一些基准测试。我对所有 7 个测试用例进行了 100k 次迭代。(在单核 Ubuntu 虚拟机中运行)
@ppsreejith和@Kristian Antonsen的代码的结果被省略了,因为他们的代码在我尝试运行它时崩溃了。@PhpMyCoder的代码运行,只要我在对象构造后没有将格式设置为 48 长度。因此测试结果不完整。(固定的)
基准测试结果
$ php justify.bench.php
盖伦(justify1):5.1464750766754
nickb(justify2): 3.8629620075226
保罗·贝甘蒂诺(justify3):4.3705048561096
用户 381521(justify5):8.5988481044769
vlzvl(justify7): 6.6795041561127
亚历山大(justify8):6.7060301303864
哦哈尔(justify9):2.9896130561829
PhpMyCoder:6.1514630317688(已修复!)
证明.bench.php
<?php
$tests = array(
'hello world there ok then',
'hello',
'ok then',
'this string is almost certainly longer than 48 I think',
'two words',
'three ok words',
'1 2 3 4 5 6 7 8 9'
);
$testers = array(
'Galen' => 'justify1',
'nickb' => 'justify2',
'Paolo Bergantino' => 'justify3',
// 'Kristian Antonsen' => 'justify4',
'user381521' => 'justify5',
// 'ppsreejith' => 'justify6',
'vlzvl' => 'justify7',
'Alexander' => 'justify8',
'ohaal' => 'justify9'
);
// ppsreejith and Kristian Antonsen's code crashed and burned when I tried to run it
// PhpMyCoder is a special case, but his code also crashed when doing $jus->format(48);
foreach ($testers as $tester => $func) {
$b=microtime(true);
for($i=0;$i<100000;$i++)
foreach ($tests as $test)
$func($test,48);
$a=microtime(true);
echo $tester.'('.$func.'): '.($a-$b)."\n";
}
echo "\n";
// Fixed!
$jus = new Justifier($tests);
$b=microtime(true);
for($i=0;$i<100000;$i++) {
$jus->format(54);
}
$a=microtime(true);
echo 'PhpMyCoder: '.($a-$b)." (Fixed!)\n";
// ALGORITHMS BELOW
// Galen
function justify1( $str_in, $desired_length=48 ) {
if ( strlen( $str_in ) > $desired_length ) {
$str_in = current( explode( "\n", wordwrap( $str_in, $desired_length ) ) );
}
$string_length = strlen( $str_in );
$spaces_count = substr_count( $str_in, ' ' );
$needed_spaces_count = $desired_length - $string_length + $spaces_count;
if ( $spaces_count === 0 ) {
return str_pad( $str_in, $desired_length, ' ', STR_PAD_BOTH );
}
$spaces_per_space = ceil( $needed_spaces_count / $spaces_count );
$spaced_string = preg_replace( '~\s+~', str_repeat( ' ', $spaces_per_space ), $str_in );
return preg_replace_callback(
sprintf( '~\s{%s}~', $spaces_per_space ),
function ( $m ) use( $spaces_per_space ) {
return str_repeat( ' ', $spaces_per_space-1 );
},
$spaced_string,
strlen( $spaced_string ) - $desired_length
);
}
// nickb
function justify2($str_in, $desired_length, $char = '_') {
// Some common vars and simple error checking / sanitation
$return = '';
$str_in = trim( $str_in);
$desired_length = intval( $desired_length);
// If we've got invalid input, we're done
if( $desired_length <= 0)
return $str_in;
// If the input string is greater than the length, we need to truncate it WITHOUT splitting words
if( strlen( $str_in) > $desired_length) {
$str = wordwrap($str_in, $desired_length);
$str = explode("\n", $str);
$str_in = $str[0];
}
$words = explode( ' ', $str_in);
$num_words = count( $words);
// If there's only one word, it's a simple edge case
if( $num_words == 1) {
$length = ($desired_length - strlen( $words[0])) / 2;
$return .= str_repeat( $char, floor( $length)) . $words[0] . str_repeat( $char, ceil( $length));
} else {
$word_length = strlen( implode( '', $words));
// Calculate the number of spaces to distribute over the words
$num_words--; // We're going to eliminate the last word
$spaces = floor( ($desired_length - $word_length) / $num_words);
$remainder = $desired_length - $word_length - ($num_words * $spaces);
$last = array_pop( $words);
foreach( $words as $word) {
// If we didn't get an even number of spaces to distribute, just tack it on to the front
$spaces_to_add = $spaces;
if( $remainder > 0) {
$spaces_to_add++;
$remainder--;
}
$return .= $word . str_repeat( $char, $spaces_to_add);
}
$return .= $last;
}
return $return;
}
// Paolo Bergantino
function justify3($str, $to_len) {
$str = trim($str);
$strlen = strlen($str);
if($str == '') return '';
if($strlen >= $to_len) {
return substr($str, 0, $to_len);
}
$words = explode(' ', $str);
$word_count = count($words);
$space_count = $word_count - 1;
if($word_count == 1) {
return str_pad($str, $to_len, ' ', STR_PAD_BOTH);
}
$space = $to_len - $strlen + $space_count;
$per_space = $space/$space_count;
if(is_int($per_space)) {
return implode($words, str_pad('', $per_space, ' '));
}
$new_str = '';
$spacing = floor($per_space);
$new_str .= $words[0] . str_pad('', $spacing);
foreach($words as $x => $word) {
if($x == $word_count - 1 || $x == 0) continue;
if($x < $word_count - 1) {
$diff = $to_len - strlen($new_str) - (strlen(implode('', array_slice($words, $x))));
$new_str .= $word . str_pad('', floor($diff/($space_count - $x)), ' ');
}
}
$new_str .= $words[$x];
return $new_str;
}
// Kristian Antonsen
function justify4($str_in, $desired_length)
{
foreach ($str_in as &$line) {
$words = explode(' ', $line);
$word_count = count($words) - 1;
$spaces_to_fill = $desired_length - strlen($line) + $word_count;
if (count($words) == 1) {
$line = str_repeat('_', ceil($spaces_to_fill/2)) . $line
. str_repeat('_', floor($spaces_to_fill/2));
continue;
}
$next_space = floor($spaces_to_fill/$word_count);
$leftover_space = $spaces_to_fill % $word_count;
$line = array_shift($words);
foreach($words as $word) {
$extra_space = ($leftover_space) ? ceil($leftover_space / $word_count) : 0;
$leftover_space -= $extra_space;
$line .= str_repeat('_', $next_space + $extra_space) . $word;
}
}
return $str_in;
}
// user381521
function justify5 ($str, $len)
{
// split by whitespace, remove empty strings
$words = array_diff (preg_split ('/\s+/', $str), array (""));
// just space if no words
if (count ($words) == 0)
return str_repeat (" ", $len);
// add empty strings if only one element
if (count ($words) == 1)
$words = array ("", $words[0], "");
// get number of words and spaces
$wordcount = count ($words);
$numspaces = $wordcount - 1;
// get number of non-space characters
$numchars = array_sum (array_map ("strlen", $words));
// get number of characters remaining for space
$remaining = $len - $numchars;
// return if too little spaces remaining
if ($remaining <= $numspaces)
return substr (implode (" ", $words), 0, $len);
// get number of spaces per space
$spaces_per_space = $remaining / $numspaces;
$spaces_leftover = $remaining % $numspaces;
// make array for spaces, spread out leftover spaces
$spaces = array_fill (0, $numspaces, $spaces_per_space);
while ($spaces_leftover--)
$spaces[$numspaces - $spaces_leftover - 1]++;
$spaces[] = 0; // make count ($words) == count ($spaces)
// join it all together
$result = array ();
foreach ($words as $k => $v)
array_push ($result, $v, str_repeat (" ", $spaces[$k]));
return implode ($result);
}
// ppsreejith
function justify6($str, $to_len) {
$str = trim($str);
$strlen = strlen($str);
if($str == '') return '';
if($strlen >= $to_len) {
return substr($str, 0, $to_len);
}
$words = explode(' ', $str);
$word_count = count($words);
$space_count = $word_count - 1;
if($word_count == 1) {
return str_pad($str, $to_len, ' ', STR_PAD_BOTH);
}
$space = $to_len - $strlen + $space_count;
$per_space = floor($space/$space_count);
$spaces = str_pad('', $per_space, ' ');
$curr_word = implode($words, $spaces);
while(strlen($curr_word) < $to_len){
$curr_word = substr($curr_word,0,preg_match("[! ][".$spaces."][! ]",$curr_word)." ".preg_match("[! ][".$spaces."][! ]",$curr_word));
}
return $curr_word;
}
// vlzvl
function justify7($str_in, $desired_length)
{
$str_in = preg_replace("!\s+!"," ",$str_in); // get rid of multiple spaces
$words = explode(" ",$str_in); // break words
$num_words = sizeof($words); // num words
if ($num_words==1) {
return str_pad($str_in,$desired_length,"_",STR_PAD_BOTH);
}
else {
$num_chars = 0; $lenwords = array();
for($x=0;$x<$num_words;$x++) { $num_chars += $lenwords[$x] = strlen($words[$x]); }
$each_div = round(($desired_length - $num_chars) / ($num_words-1));
for($x=0,$sum=0;$x<$num_words;$x++) { $sum += ($lenwords[$x] + ($x<$num_words-1 ? $each_div : 0)); }
$space_to_addcut = ($desired_length - $sum);
for($x=0;$x<$num_words-1;$x++) {
$words[$x] .= str_repeat("_",$each_div+($each_div>1? ($space_to_addcut<0?-1:($space_to_addcut>0?1:0)) :0));
if ($each_div>1) { $space_to_addcut += ($space_to_addcut<0 ? 1 : ($space_to_addcut>0?-1:0) ); }
}
return substr(implode($words),0,$desired_length);
}
}
// Alexander
function justify8($str, $length) {
$words = explode(' ', $str);
if(count($words)==1) $words = array("", $str, "");
$spaces = $length - array_sum(array_map("strlen", $words));
$add = (int)($spaces / (count($words) - 1));
$left = $spaces % (count($words) - 1);
$spaced = implode(str_repeat("_", $add + 1), array_slice($words, 0, $left + 1));
$spaced .= str_repeat("_", max(1, $add));
$spaced .= implode(str_repeat("_", max(1, $add)), array_slice($words, $left + 1));
return substr($spaced, 0, $length);
}
// ohaal
function justify9($s,$m){$s=trim($s);$l=strlen($s);if($l>=$m){$s=explode("\n",wordwrap($s,$m));$s=$s[0];$l=strlen($s);}$c=substr_count($s,' ');if($c===0)return str_pad($s,$m,' ',STR_PAD_BOTH);$a=($m-$l+$c)/$c;$h=floor($a);$i=($a-$h)*$c;$w=explode(' ',$s,$i+1);$w[$i]=str_replace(' ',str_repeat(' ',$h),$w[$i]);return implode(str_repeat(' ',ceil($a)),$w);}
// PhpMyCoder
class Justifier {
private $text;
public function __construct($text) {
if(!is_string($text) && !is_array($text)) {
throw new InvalidArgumentException('Expected a string or an array of strings, instead received type: ' . gettype($text));
}
if(is_array($text)) {
// String arrays must be converted to JustifierLine arrays
$this->text = array_map(function($line) {
return JustifierLine::fromText($line);
}, $text);
} else {
// Single line of text input
$this->text = $text;
}
}
public function format($width = NULL) {
// Strings have to be broken into an array and then jusitifed
if(is_string($this->text)) {
if($width == null) {
throw new InvalidArgumentException('A width must be provided for separation when an un-split string is provided');
}
if($width <= 0) {
throw new InvalidArgumentException('Expected a positive, non-zero width, instead received width of ' . $width);
}
// Break up a JustifierLine of all text until each piece is smaller or equal to $width
$lines = array(JustifierLine::fromText($this->text));
$count = 0;
$newLine = $lines[0]->breakAtColumn($width);
while($newLine !== null) {
$lines[] = $newLine;
$newLine = $lines[++$count]->breakAtColumn($width);
}
} else {
$lines = $this->text;
// Allow for fluid width (uses longest line with single space)
if($width == NULL) {
$width = -1;
foreach($lines as $line) {
// Width of line = Sum of the lengths of the words and the spaces (number of words - 1)
$newWidth = $line->calculateWordsLength() + $line->countWords() - 1;
if($newWidth > $width) { // Looking for the longest line
$width = $newWidth;
}
}
}
}
// Justify each element of array
//$output = array_map(function($line) use ($width) {
// return $this->justify($line, $width);
//}, $lines);
$output = array();
foreach($lines as $line) {
$output[] = $this->justify($line, $width);
}
// If a single-line is passed in, a single line is returned
if(count($output)) {
return $output[0];
}
return $output;
}
private function justify(JustifierLine $line, $width) {
// Retrieve already calculated line information
$words = $line->extractWords();
$spaces = $line->countWords() - 1;
$wordLens = $line->findWordLengths();
$wordsLen = $line->calculateWordsLength();
$minWidth = $wordsLen + $spaces;
$output = '';
if($minWidth > $width) {
throw new LengthException('A minimum width of ' . $minWidth . ' was required, but a width of ' . $width . ' was given instead');
}
// No spaces means only one word (center align)
if($spaces == 0) {
return str_pad($words[0], $width, ' ', STR_PAD_BOTH);
}
for(;$spaces > 0; $spaces--) {
// Add next word to output and subtract its length from counters
$output .= array_shift($words);
$length = array_shift($wordLens);
$wordsLen -= $length;
$width -= $length;
if($spaces == 1) { // Last Iteration
return $output . str_repeat(' ', $width - $wordsLen) . $words[0];
}
// Magic padding is really just simple math
$padding = floor(($width - $wordsLen) / $spaces);
$output .= str_repeat(' ', $padding);
$width -= $padding;
}
}
}
class JustifierLine {
private $words;
private $numWords;
private $wordLengths;
private $wordsLength;
public static function fromText($text) {
// Split words into an array
preg_match_all('/[^ ]+/', $text, $matches, PREG_PATTERN_ORDER);
$words = $matches[0];
// Count words
$numWords = count($words);
// Find the length of each word
$wordLengths = array_map('strlen', $words);
//And Finally, calculate the total length of all words
$wordsLength = array_reduce($wordLengths, function($result, $length) {
return $result + $length;
}, 0);
return new JustifierLine($words, $numWords, $wordLengths, $wordsLength);
}
private function __construct($words, $numWords, $wordLengths, $wordsLength) {
$this->words = $words;
$this->numWords = $numWords;
$this->wordLengths = $wordLengths;
$this->wordsLength = $wordsLength;
}
public function extractWords() { return $this->words; }
public function countWords() { return $this->numWords; }
public function findWordLengths() { return $this->wordLengths; }
public function calculateWordsLength() { return $this->wordsLength; }
public function breakAtColumn($column) {
// Avoid extraneous processing if we can determine no breaking can be done
if($column >= ($this->wordsLength + $this->numWords - 1)) {
return null;
}
$width = 0;
$wordsLength = 0;
for($i = 0; $i < $this->numWords; $i++) {
// Add width of next word
$width += $this->wordLengths[$i];
// If the line is overflowing past required $width
if($width > $column) {
// Remove overflow at end & create a new object with the overflow
$words = array_splice($this->words, $i);
$numWords = $this->numWords - $i;
$this->numWords = $i;
$wordLengths = array_splice($this->wordLengths, $i);
$tempWordsLength = $wordsLength;
$wordsLength = $this->wordsLength - $wordsLength;
$this->wordsLength = $tempWordsLength;
return new JustifierLine($words, $numWords, $wordLengths, $wordsLength);
}
$width++; // Assuming smallest spacing to fit
// We also have to keep track of the total $wordsLength
$wordsLength += $this->wordLengths[$i];
}
return null;
}
}