|
1 <?php |
|
2 |
|
3 // A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3) |
|
4 // |
|
5 // Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org> |
|
6 // You may copy this code freely under the conditions of the GPL. |
|
7 // |
|
8 |
|
9 define('USE_ASSERTS', function_exists('assert')); |
|
10 |
|
11 /** |
|
12 * @todo document |
|
13 * @package Enano |
|
14 * @subpackage DifferenceEngine |
|
15 */ |
|
16 class _DiffOp { |
|
17 var $type; |
|
18 var $orig; |
|
19 var $closing; |
|
20 |
|
21 function reverse() { |
|
22 trigger_error('pure virtual', E_USER_ERROR); |
|
23 } |
|
24 |
|
25 function norig() { |
|
26 return $this->orig ? sizeof($this->orig) : 0; |
|
27 } |
|
28 |
|
29 function nclosing() { |
|
30 return $this->closing ? sizeof($this->closing) : 0; |
|
31 } |
|
32 } |
|
33 |
|
34 /** |
|
35 * @todo document |
|
36 * @package Enano |
|
37 * @subpackage DifferenceEngine |
|
38 */ |
|
39 class _DiffOp_Copy extends _DiffOp { |
|
40 var $type = 'copy'; |
|
41 |
|
42 function _DiffOp_Copy ($orig, $closing = false) { |
|
43 if (!is_array($closing)) |
|
44 $closing = $orig; |
|
45 $this->orig = $orig; |
|
46 $this->closing = $closing; |
|
47 } |
|
48 |
|
49 function reverse() { |
|
50 return new _DiffOp_Copy($this->closing, $this->orig); |
|
51 } |
|
52 } |
|
53 |
|
54 /** |
|
55 * @todo document |
|
56 * @package Enano |
|
57 * @subpackage DifferenceEngine |
|
58 */ |
|
59 class _DiffOp_Delete extends _DiffOp { |
|
60 var $type = 'delete'; |
|
61 |
|
62 function _DiffOp_Delete ($lines) { |
|
63 $this->orig = $lines; |
|
64 $this->closing = false; |
|
65 } |
|
66 |
|
67 function reverse() { |
|
68 return new _DiffOp_Add($this->orig); |
|
69 } |
|
70 } |
|
71 |
|
72 /** |
|
73 * @todo document |
|
74 * @package Enano |
|
75 * @subpackage DifferenceEngine |
|
76 */ |
|
77 class _DiffOp_Add extends _DiffOp { |
|
78 var $type = 'add'; |
|
79 |
|
80 function _DiffOp_Add ($lines) { |
|
81 $this->closing = $lines; |
|
82 $this->orig = false; |
|
83 } |
|
84 |
|
85 function reverse() { |
|
86 return new _DiffOp_Delete($this->closing); |
|
87 } |
|
88 } |
|
89 |
|
90 /** |
|
91 * @todo document |
|
92 * @package Enano |
|
93 * @subpackage DifferenceEngine |
|
94 */ |
|
95 class _DiffOp_Change extends _DiffOp { |
|
96 var $type = 'change'; |
|
97 |
|
98 function _DiffOp_Change ($orig, $closing) { |
|
99 $this->orig = $orig; |
|
100 $this->closing = $closing; |
|
101 } |
|
102 |
|
103 function reverse() { |
|
104 return new _DiffOp_Change($this->closing, $this->orig); |
|
105 } |
|
106 } |
|
107 |
|
108 |
|
109 /** |
|
110 * Class used internally by Diff to actually compute the diffs. |
|
111 * |
|
112 * The algorithm used here is mostly lifted from the perl module |
|
113 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at: |
|
114 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip |
|
115 * |
|
116 * More ideas are taken from: |
|
117 * http://www.ics.uci.edu/~eppstein/161/960229.html |
|
118 * |
|
119 * Some ideas are (and a bit of code) are from from analyze.c, from GNU |
|
120 * diffutils-2.7, which can be found at: |
|
121 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz |
|
122 * |
|
123 * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations) |
|
124 * are my own. |
|
125 * |
|
126 * Line length limits for robustness added by Tim Starling, 2005-08-31 |
|
127 * |
|
128 * @author Geoffrey T. Dairiki, Tim Starling |
|
129 * @package Enano |
|
130 * @subpackage DifferenceEngine |
|
131 */ |
|
132 define('MAX_XREF_LENGTH', 10000); |
|
133 class _DiffEngine |
|
134 { |
|
135 function diff ($from_lines, $to_lines) { |
|
136 $fname = '_DiffEngine::diff'; |
|
137 // wfProfileIn( $fname ); |
|
138 |
|
139 $n_from = sizeof($from_lines); |
|
140 $n_to = sizeof($to_lines); |
|
141 |
|
142 $this->xchanged = $this->ychanged = array(); |
|
143 $this->xv = $this->yv = array(); |
|
144 $this->xind = $this->yind = array(); |
|
145 unset($this->seq); |
|
146 unset($this->in_seq); |
|
147 unset($this->lcs); |
|
148 |
|
149 // Skip leading common lines. |
|
150 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { |
|
151 if ($from_lines[$skip] !== $to_lines[$skip]) |
|
152 break; |
|
153 $this->xchanged[$skip] = $this->ychanged[$skip] = false; |
|
154 } |
|
155 // Skip trailing common lines. |
|
156 $xi = $n_from; $yi = $n_to; |
|
157 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { |
|
158 if ($from_lines[$xi] !== $to_lines[$yi]) |
|
159 break; |
|
160 $this->xchanged[$xi] = $this->ychanged[$yi] = false; |
|
161 } |
|
162 |
|
163 // Ignore lines which do not exist in both files. |
|
164 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
|
165 $xhash[$this->_line_hash($from_lines[$xi])] = 1; |
|
166 } |
|
167 |
|
168 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { |
|
169 $line = $to_lines[$yi]; |
|
170 if ( ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) ) |
|
171 continue; |
|
172 $yhash[$this->_line_hash($line)] = 1; |
|
173 $this->yv[] = $line; |
|
174 $this->yind[] = $yi; |
|
175 } |
|
176 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
|
177 $line = $from_lines[$xi]; |
|
178 if ( ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) ) |
|
179 continue; |
|
180 $this->xv[] = $line; |
|
181 $this->xind[] = $xi; |
|
182 } |
|
183 |
|
184 // Find the LCS. |
|
185 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv)); |
|
186 |
|
187 // Merge edits when possible |
|
188 $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged); |
|
189 $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged); |
|
190 |
|
191 // Compute the edit operations. |
|
192 $edits = array(); |
|
193 $xi = $yi = 0; |
|
194 while ($xi < $n_from || $yi < $n_to) { |
|
195 USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]); |
|
196 USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]); |
|
197 |
|
198 // Skip matching "snake". |
|
199 $copy = array(); |
|
200 while ( $xi < $n_from && $yi < $n_to |
|
201 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
|
202 $copy[] = $from_lines[$xi++]; |
|
203 ++$yi; |
|
204 } |
|
205 if ($copy) |
|
206 $edits[] = new _DiffOp_Copy($copy); |
|
207 |
|
208 // Find deletes & adds. |
|
209 $delete = array(); |
|
210 while ($xi < $n_from && $this->xchanged[$xi]) |
|
211 $delete[] = $from_lines[$xi++]; |
|
212 |
|
213 $add = array(); |
|
214 while ($yi < $n_to && $this->ychanged[$yi]) |
|
215 $add[] = $to_lines[$yi++]; |
|
216 |
|
217 if ($delete && $add) |
|
218 $edits[] = new _DiffOp_Change($delete, $add); |
|
219 elseif ($delete) |
|
220 $edits[] = new _DiffOp_Delete($delete); |
|
221 elseif ($add) |
|
222 $edits[] = new _DiffOp_Add($add); |
|
223 } |
|
224 // wfProfileOut( $fname ); |
|
225 return $edits; |
|
226 } |
|
227 |
|
228 /** |
|
229 * Returns the whole line if it's small enough, or the MD5 hash otherwise |
|
230 */ |
|
231 function _line_hash( $line ) { |
|
232 if ( strlen( $line ) > MAX_XREF_LENGTH ) { |
|
233 return md5( $line ); |
|
234 } else { |
|
235 return $line; |
|
236 } |
|
237 } |
|
238 |
|
239 |
|
240 /* Divide the Largest Common Subsequence (LCS) of the sequences |
|
241 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally |
|
242 * sized segments. |
|
243 * |
|
244 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an |
|
245 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between |
|
246 * sub sequences. The first sub-sequence is contained in [X0, X1), |
|
247 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note |
|
248 * that (X0, Y0) == (XOFF, YOFF) and |
|
249 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). |
|
250 * |
|
251 * This function assumes that the first lines of the specified portions |
|
252 * of the two files do not match, and likewise that the last lines do not |
|
253 * match. The caller must trim matching lines from the beginning and end |
|
254 * of the portions it is going to specify. |
|
255 */ |
|
256 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) { |
|
257 $fname = '_DiffEngine::_diag'; |
|
258 // wfProfileIn( $fname ); |
|
259 $flip = false; |
|
260 |
|
261 if ($xlim - $xoff > $ylim - $yoff) { |
|
262 // Things seems faster (I'm not sure I understand why) |
|
263 // when the shortest sequence in X. |
|
264 $flip = true; |
|
265 list ($xoff, $xlim, $yoff, $ylim) |
|
266 = array( $yoff, $ylim, $xoff, $xlim); |
|
267 } |
|
268 |
|
269 if ($flip) |
|
270 for ($i = $ylim - 1; $i >= $yoff; $i--) |
|
271 $ymatches[$this->xv[$i]][] = $i; |
|
272 else |
|
273 for ($i = $ylim - 1; $i >= $yoff; $i--) |
|
274 $ymatches[$this->yv[$i]][] = $i; |
|
275 |
|
276 $this->lcs = 0; |
|
277 $this->seq[0]= $yoff - 1; |
|
278 $this->in_seq = array(); |
|
279 $ymids[0] = array(); |
|
280 |
|
281 $numer = $xlim - $xoff + $nchunks - 1; |
|
282 $x = $xoff; |
|
283 for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
|
284 // wfProfileIn( "$fname-chunk" ); |
|
285 if ($chunk > 0) |
|
286 for ($i = 0; $i <= $this->lcs; $i++) |
|
287 $ymids[$i][$chunk-1] = $this->seq[$i]; |
|
288 |
|
289 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); |
|
290 for ( ; $x < $x1; $x++) { |
|
291 $line = $flip ? $this->yv[$x] : $this->xv[$x]; |
|
292 if (empty($ymatches[$line])) |
|
293 continue; |
|
294 $matches = $ymatches[$line]; |
|
295 reset($matches); |
|
296 while (list ($junk, $y) = each($matches)) |
|
297 if (empty($this->in_seq[$y])) { |
|
298 $k = $this->_lcs_pos($y); |
|
299 USE_ASSERTS && assert($k > 0); |
|
300 $ymids[$k] = $ymids[$k-1]; |
|
301 break; |
|
302 } |
|
303 while (list ($junk, $y) = each($matches)) { |
|
304 if ($y > $this->seq[$k-1]) { |
|
305 USE_ASSERTS && assert($y < $this->seq[$k]); |
|
306 // Optimization: this is a common case: |
|
307 // next match is just replacing previous match. |
|
308 $this->in_seq[$this->seq[$k]] = false; |
|
309 $this->seq[$k] = $y; |
|
310 $this->in_seq[$y] = 1; |
|
311 } else if (empty($this->in_seq[$y])) { |
|
312 $k = $this->_lcs_pos($y); |
|
313 USE_ASSERTS && assert($k > 0); |
|
314 $ymids[$k] = $ymids[$k-1]; |
|
315 } |
|
316 } |
|
317 } |
|
318 // wfProfileOut( "$fname-chunk" ); |
|
319 } |
|
320 |
|
321 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); |
|
322 $ymid = $ymids[$this->lcs]; |
|
323 for ($n = 0; $n < $nchunks - 1; $n++) { |
|
324 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); |
|
325 $y1 = $ymid[$n] + 1; |
|
326 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); |
|
327 } |
|
328 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); |
|
329 |
|
330 // wfProfileOut( $fname ); |
|
331 return array($this->lcs, $seps); |
|
332 } |
|
333 |
|
334 function _lcs_pos ($ypos) { |
|
335 $fname = '_DiffEngine::_lcs_pos'; |
|
336 // wfProfileIn( $fname ); |
|
337 |
|
338 $end = $this->lcs; |
|
339 if ($end == 0 || $ypos > $this->seq[$end]) { |
|
340 $this->seq[++$this->lcs] = $ypos; |
|
341 $this->in_seq[$ypos] = 1; |
|
342 // wfProfileOut( $fname ); |
|
343 return $this->lcs; |
|
344 } |
|
345 |
|
346 $beg = 1; |
|
347 while ($beg < $end) { |
|
348 $mid = (int)(($beg + $end) / 2); |
|
349 if ( $ypos > $this->seq[$mid] ) |
|
350 $beg = $mid + 1; |
|
351 else |
|
352 $end = $mid; |
|
353 } |
|
354 |
|
355 USE_ASSERTS && assert($ypos != $this->seq[$end]); |
|
356 |
|
357 $this->in_seq[$this->seq[$end]] = false; |
|
358 $this->seq[$end] = $ypos; |
|
359 $this->in_seq[$ypos] = 1; |
|
360 // wfProfileOut( $fname ); |
|
361 return $end; |
|
362 } |
|
363 |
|
364 /* Find LCS of two sequences. |
|
365 * |
|
366 * The results are recorded in the vectors $this->{x,y}changed[], by |
|
367 * storing a 1 in the element for each line that is an insertion |
|
368 * or deletion (ie. is not in the LCS). |
|
369 * |
|
370 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. |
|
371 * |
|
372 * Note that XLIM, YLIM are exclusive bounds. |
|
373 * All line numbers are origin-0 and discarded lines are not counted. |
|
374 */ |
|
375 function _compareseq ($xoff, $xlim, $yoff, $ylim) { |
|
376 $fname = '_DiffEngine::_compareseq'; |
|
377 // wfProfileIn( $fname ); |
|
378 |
|
379 // Slide down the bottom initial diagonal. |
|
380 while ($xoff < $xlim && $yoff < $ylim |
|
381 && $this->xv[$xoff] == $this->yv[$yoff]) { |
|
382 ++$xoff; |
|
383 ++$yoff; |
|
384 } |
|
385 |
|
386 // Slide up the top initial diagonal. |
|
387 while ($xlim > $xoff && $ylim > $yoff |
|
388 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { |
|
389 --$xlim; |
|
390 --$ylim; |
|
391 } |
|
392 |
|
393 if ($xoff == $xlim || $yoff == $ylim) |
|
394 $lcs = 0; |
|
395 else { |
|
396 // This is ad hoc but seems to work well. |
|
397 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); |
|
398 //$nchunks = max(2,min(8,(int)$nchunks)); |
|
399 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; |
|
400 list ($lcs, $seps) |
|
401 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks); |
|
402 } |
|
403 |
|
404 if ($lcs == 0) { |
|
405 // X and Y sequences have no common subsequence: |
|
406 // mark all changed. |
|
407 while ($yoff < $ylim) |
|
408 $this->ychanged[$this->yind[$yoff++]] = 1; |
|
409 while ($xoff < $xlim) |
|
410 $this->xchanged[$this->xind[$xoff++]] = 1; |
|
411 } else { |
|
412 // Use the partitions to split this problem into subproblems. |
|
413 reset($seps); |
|
414 $pt1 = $seps[0]; |
|
415 while ($pt2 = next($seps)) { |
|
416 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); |
|
417 $pt1 = $pt2; |
|
418 } |
|
419 } |
|
420 // wfProfileOut( $fname ); |
|
421 } |
|
422 |
|
423 /* Adjust inserts/deletes of identical lines to join changes |
|
424 * as much as possible. |
|
425 * |
|
426 * We do something when a run of changed lines include a |
|
427 * line at one end and has an excluded, identical line at the other. |
|
428 * We are free to choose which identical line is included. |
|
429 * `compareseq' usually chooses the one at the beginning, |
|
430 * but usually it is cleaner to consider the following identical line |
|
431 * to be the "change". |
|
432 * |
|
433 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
|
434 */ |
|
435 function _shift_boundaries ($lines, &$changed, $other_changed) { |
|
436 $fname = '_DiffEngine::_shift_boundaries'; |
|
437 // wfProfileIn( $fname ); |
|
438 $i = 0; |
|
439 $j = 0; |
|
440 |
|
441 USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)'); |
|
442 $len = sizeof($lines); |
|
443 $other_len = sizeof($other_changed); |
|
444 |
|
445 while (1) { |
|
446 /* |
|
447 * Scan forwards to find beginning of another run of changes. |
|
448 * Also keep track of the corresponding point in the other file. |
|
449 * |
|
450 * Throughout this code, $i and $j are adjusted together so that |
|
451 * the first $i elements of $changed and the first $j elements |
|
452 * of $other_changed both contain the same number of zeros |
|
453 * (unchanged lines). |
|
454 * Furthermore, $j is always kept so that $j == $other_len or |
|
455 * $other_changed[$j] == false. |
|
456 */ |
|
457 while ($j < $other_len && $other_changed[$j]) |
|
458 $j++; |
|
459 |
|
460 while ($i < $len && ! $changed[$i]) { |
|
461 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); |
|
462 $i++; $j++; |
|
463 while ($j < $other_len && $other_changed[$j]) |
|
464 $j++; |
|
465 } |
|
466 |
|
467 if ($i == $len) |
|
468 break; |
|
469 |
|
470 $start = $i; |
|
471 |
|
472 // Find the end of this run of changes. |
|
473 while (++$i < $len && $changed[$i]) |
|
474 continue; |
|
475 |
|
476 do { |
|
477 /* |
|
478 * Record the length of this run of changes, so that |
|
479 * we can later determine whether the run has grown. |
|
480 */ |
|
481 $runlength = $i - $start; |
|
482 |
|
483 /* |
|
484 * Move the changed region back, so long as the |
|
485 * previous unchanged line matches the last changed one. |
|
486 * This merges with previous changed regions. |
|
487 */ |
|
488 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { |
|
489 $changed[--$start] = 1; |
|
490 $changed[--$i] = false; |
|
491 while ($start > 0 && $changed[$start - 1]) |
|
492 $start--; |
|
493 USE_ASSERTS && assert('$j > 0'); |
|
494 while ($other_changed[--$j]) |
|
495 continue; |
|
496 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); |
|
497 } |
|
498 |
|
499 /* |
|
500 * Set CORRESPONDING to the end of the changed run, at the last |
|
501 * point where it corresponds to a changed run in the other file. |
|
502 * CORRESPONDING == LEN means no such point has been found. |
|
503 */ |
|
504 $corresponding = $j < $other_len ? $i : $len; |
|
505 |
|
506 /* |
|
507 * Move the changed region forward, so long as the |
|
508 * first changed line matches the following unchanged one. |
|
509 * This merges with following changed regions. |
|
510 * Do this second, so that if there are no merges, |
|
511 * the changed region is moved forward as far as possible. |
|
512 */ |
|
513 while ($i < $len && $lines[$start] == $lines[$i]) { |
|
514 $changed[$start++] = false; |
|
515 $changed[$i++] = 1; |
|
516 while ($i < $len && $changed[$i]) |
|
517 $i++; |
|
518 |
|
519 USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]'); |
|
520 $j++; |
|
521 if ($j < $other_len && $other_changed[$j]) { |
|
522 $corresponding = $i; |
|
523 while ($j < $other_len && $other_changed[$j]) |
|
524 $j++; |
|
525 } |
|
526 } |
|
527 } while ($runlength != $i - $start); |
|
528 |
|
529 /* |
|
530 * If possible, move the fully-merged run of changes |
|
531 * back to a corresponding run in the other file. |
|
532 */ |
|
533 while ($corresponding < $i) { |
|
534 $changed[--$start] = 1; |
|
535 $changed[--$i] = 0; |
|
536 USE_ASSERTS && assert('$j > 0'); |
|
537 while ($other_changed[--$j]) |
|
538 continue; |
|
539 USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]'); |
|
540 } |
|
541 } |
|
542 // wfProfileOut( $fname ); |
|
543 } |
|
544 } |
|
545 |
|
546 /** |
|
547 * Class representing a 'diff' between two sequences of strings. |
|
548 * @todo document |
|
549 * @package Enano |
|
550 * @subpackage DifferenceEngine |
|
551 */ |
|
552 class Diff |
|
553 { |
|
554 var $edits; |
|
555 |
|
556 /** |
|
557 * Constructor. |
|
558 * Computes diff between sequences of strings. |
|
559 * |
|
560 * @param $from_lines array An array of strings. |
|
561 * (Typically these are lines from a file.) |
|
562 * @param $to_lines array An array of strings. |
|
563 */ |
|
564 function Diff($from_lines, $to_lines) { |
|
565 $eng = new _DiffEngine; |
|
566 $this->edits = $eng->diff($from_lines, $to_lines); |
|
567 //$this->_check($from_lines, $to_lines); |
|
568 } |
|
569 |
|
570 /** |
|
571 * Compute reversed Diff. |
|
572 * |
|
573 * SYNOPSIS: |
|
574 * |
|
575 * $diff = new Diff($lines1, $lines2); |
|
576 * $rev = $diff->reverse(); |
|
577 * @return object A Diff object representing the inverse of the |
|
578 * original diff. |
|
579 */ |
|
580 function reverse () { |
|
581 $rev = $this; |
|
582 $rev->edits = array(); |
|
583 foreach ($this->edits as $edit) { |
|
584 $rev->edits[] = $edit->reverse(); |
|
585 } |
|
586 return $rev; |
|
587 } |
|
588 |
|
589 /** |
|
590 * Check for empty diff. |
|
591 * |
|
592 * @return bool True iff two sequences were identical. |
|
593 */ |
|
594 function isEmpty () { |
|
595 foreach ($this->edits as $edit) { |
|
596 if ($edit->type != 'copy') |
|
597 return false; |
|
598 } |
|
599 return true; |
|
600 } |
|
601 |
|
602 /** |
|
603 * Compute the length of the Longest Common Subsequence (LCS). |
|
604 * |
|
605 * This is mostly for diagnostic purposes. |
|
606 * |
|
607 * @return int The length of the LCS. |
|
608 */ |
|
609 function lcs () { |
|
610 $lcs = 0; |
|
611 foreach ($this->edits as $edit) { |
|
612 if ($edit->type == 'copy') |
|
613 $lcs += sizeof($edit->orig); |
|
614 } |
|
615 return $lcs; |
|
616 } |
|
617 |
|
618 /** |
|
619 * Get the original set of lines. |
|
620 * |
|
621 * This reconstructs the $from_lines parameter passed to the |
|
622 * constructor. |
|
623 * |
|
624 * @return array The original sequence of strings. |
|
625 */ |
|
626 function orig() { |
|
627 $lines = array(); |
|
628 |
|
629 foreach ($this->edits as $edit) { |
|
630 if ($edit->orig) |
|
631 array_splice($lines, sizeof($lines), 0, $edit->orig); |
|
632 } |
|
633 return $lines; |
|
634 } |
|
635 |
|
636 /** |
|
637 * Get the closing set of lines. |
|
638 * |
|
639 * This reconstructs the $to_lines parameter passed to the |
|
640 * constructor. |
|
641 * |
|
642 * @return array The sequence of strings. |
|
643 */ |
|
644 function closing() { |
|
645 $lines = array(); |
|
646 |
|
647 foreach ($this->edits as $edit) { |
|
648 if ($edit->closing) |
|
649 array_splice($lines, sizeof($lines), 0, $edit->closing); |
|
650 } |
|
651 return $lines; |
|
652 } |
|
653 |
|
654 /** |
|
655 * Check a Diff for validity. |
|
656 * |
|
657 * This is here only for debugging purposes. |
|
658 */ |
|
659 function _check ($from_lines, $to_lines) { |
|
660 $fname = 'Diff::_check'; |
|
661 // wfProfileIn( $fname ); |
|
662 if (serialize($from_lines) != serialize($this->orig())) |
|
663 trigger_error("Reconstructed original doesn't match", E_USER_ERROR); |
|
664 if (serialize($to_lines) != serialize($this->closing())) |
|
665 trigger_error("Reconstructed closing doesn't match", E_USER_ERROR); |
|
666 |
|
667 $rev = $this->reverse(); |
|
668 if (serialize($to_lines) != serialize($rev->orig())) |
|
669 trigger_error("Reversed original doesn't match", E_USER_ERROR); |
|
670 if (serialize($from_lines) != serialize($rev->closing())) |
|
671 trigger_error("Reversed closing doesn't match", E_USER_ERROR); |
|
672 |
|
673 |
|
674 $prevtype = 'none'; |
|
675 foreach ($this->edits as $edit) { |
|
676 if ( $prevtype == $edit->type ) |
|
677 trigger_error("Edit sequence is non-optimal", E_USER_ERROR); |
|
678 $prevtype = $edit->type; |
|
679 } |
|
680 |
|
681 $lcs = $this->lcs(); |
|
682 trigger_error('Diff okay: LCS = '.$lcs, E_USER_NOTICE); |
|
683 // wfProfileOut( $fname ); |
|
684 } |
|
685 } |
|
686 |
|
687 /** |
|
688 * FIXME: bad name. |
|
689 * @todo document |
|
690 * @package Enano |
|
691 * @subpackage DifferenceEngine |
|
692 */ |
|
693 class MappedDiff extends Diff |
|
694 { |
|
695 /** |
|
696 * Constructor. |
|
697 * |
|
698 * Computes diff between sequences of strings. |
|
699 * |
|
700 * This can be used to compute things like |
|
701 * case-insensitve diffs, or diffs which ignore |
|
702 * changes in white-space. |
|
703 * |
|
704 * @param $from_lines array An array of strings. |
|
705 * (Typically these are lines from a file.) |
|
706 * |
|
707 * @param $to_lines array An array of strings. |
|
708 * |
|
709 * @param $mapped_from_lines array This array should |
|
710 * have the same size number of elements as $from_lines. |
|
711 * The elements in $mapped_from_lines and |
|
712 * $mapped_to_lines are what is actually compared |
|
713 * when computing the diff. |
|
714 * |
|
715 * @param $mapped_to_lines array This array should |
|
716 * have the same number of elements as $to_lines. |
|
717 */ |
|
718 function MappedDiff($from_lines, $to_lines, |
|
719 $mapped_from_lines, $mapped_to_lines) { |
|
720 $fname = 'MappedDiff::MappedDiff'; |
|
721 // wfProfileIn( $fname ); |
|
722 |
|
723 assert(sizeof($from_lines) == sizeof($mapped_from_lines)); |
|
724 assert(sizeof($to_lines) == sizeof($mapped_to_lines)); |
|
725 |
|
726 $this->Diff($mapped_from_lines, $mapped_to_lines); |
|
727 |
|
728 $xi = $yi = 0; |
|
729 for ($i = 0; $i < sizeof($this->edits); $i++) { |
|
730 $orig = &$this->edits[$i]->orig; |
|
731 if (is_array($orig)) { |
|
732 $orig = array_slice($from_lines, $xi, sizeof($orig)); |
|
733 $xi += sizeof($orig); |
|
734 } |
|
735 |
|
736 $closing = &$this->edits[$i]->closing; |
|
737 if (is_array($closing)) { |
|
738 $closing = array_slice($to_lines, $yi, sizeof($closing)); |
|
739 $yi += sizeof($closing); |
|
740 } |
|
741 } |
|
742 // wfProfileOut( $fname ); |
|
743 } |
|
744 } |
|
745 |
|
746 /** |
|
747 * A class to format Diffs |
|
748 * |
|
749 * This class formats the diff in classic diff format. |
|
750 * It is intended that this class be customized via inheritance, |
|
751 * to obtain fancier outputs. |
|
752 * @todo document |
|
753 * @package Enano |
|
754 * @subpackage DifferenceEngine |
|
755 */ |
|
756 class DiffFormatter |
|
757 { |
|
758 /** |
|
759 * Number of leading context "lines" to preserve. |
|
760 * |
|
761 * This should be left at zero for this class, but subclasses |
|
762 * may want to set this to other values. |
|
763 */ |
|
764 var $leading_context_lines = 0; |
|
765 |
|
766 /** |
|
767 * Number of trailing context "lines" to preserve. |
|
768 * |
|
769 * This should be left at zero for this class, but subclasses |
|
770 * may want to set this to other values. |
|
771 */ |
|
772 var $trailing_context_lines = 0; |
|
773 |
|
774 /** |
|
775 * Format a diff. |
|
776 * |
|
777 * @param $diff object A Diff object. |
|
778 * @return string The formatted output. |
|
779 */ |
|
780 function format($diff) { |
|
781 $fname = 'DiffFormatter::format'; |
|
782 // wfProfileIn( $fname ); |
|
783 |
|
784 $xi = $yi = 1; |
|
785 $block = false; |
|
786 $context = array(); |
|
787 |
|
788 $nlead = $this->leading_context_lines; |
|
789 $ntrail = $this->trailing_context_lines; |
|
790 |
|
791 $this->_start_diff(); |
|
792 |
|
793 foreach ($diff->edits as $edit) { |
|
794 if ($edit->type == 'copy') { |
|
795 if (is_array($block)) { |
|
796 if (sizeof($edit->orig) <= $nlead + $ntrail) { |
|
797 $block[] = $edit; |
|
798 } |
|
799 else{ |
|
800 if ($ntrail) { |
|
801 $context = array_slice($edit->orig, 0, $ntrail); |
|
802 $block[] = new _DiffOp_Copy($context); |
|
803 } |
|
804 $this->_block($x0, $ntrail + $xi - $x0, |
|
805 $y0, $ntrail + $yi - $y0, |
|
806 $block); |
|
807 $block = false; |
|
808 } |
|
809 } |
|
810 $context = $edit->orig; |
|
811 } |
|
812 else { |
|
813 if (! is_array($block)) { |
|
814 $context = array_slice($context, sizeof($context) - $nlead); |
|
815 $x0 = $xi - sizeof($context); |
|
816 $y0 = $yi - sizeof($context); |
|
817 $block = array(); |
|
818 if ($context) |
|
819 $block[] = new _DiffOp_Copy($context); |
|
820 } |
|
821 $block[] = $edit; |
|
822 } |
|
823 |
|
824 if ($edit->orig) |
|
825 $xi += sizeof($edit->orig); |
|
826 if ($edit->closing) |
|
827 $yi += sizeof($edit->closing); |
|
828 } |
|
829 |
|
830 if (is_array($block)) |
|
831 $this->_block($x0, $xi - $x0, |
|
832 $y0, $yi - $y0, |
|
833 $block); |
|
834 |
|
835 $end = $this->_end_diff(); |
|
836 // wfProfileOut( $fname ); |
|
837 return $end; |
|
838 } |
|
839 |
|
840 function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) { |
|
841 $fname = 'DiffFormatter::_block'; |
|
842 // wfProfileIn( $fname ); |
|
843 $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen)); |
|
844 foreach ($edits as $edit) { |
|
845 if ($edit->type == 'copy') |
|
846 $this->_context($edit->orig); |
|
847 elseif ($edit->type == 'add') |
|
848 $this->_added($edit->closing); |
|
849 elseif ($edit->type == 'delete') |
|
850 $this->_deleted($edit->orig); |
|
851 elseif ($edit->type == 'change') |
|
852 $this->_changed($edit->orig, $edit->closing); |
|
853 else |
|
854 trigger_error('Unknown edit type', E_USER_ERROR); |
|
855 } |
|
856 $this->_end_block(); |
|
857 // wfProfileOut( $fname ); |
|
858 } |
|
859 |
|
860 function _start_diff() { |
|
861 ob_start(); |
|
862 } |
|
863 |
|
864 function _end_diff() { |
|
865 $val = ob_get_contents(); |
|
866 ob_end_clean(); |
|
867 return $val; |
|
868 } |
|
869 |
|
870 function _block_header($xbeg, $xlen, $ybeg, $ylen) { |
|
871 if ($xlen > 1) |
|
872 $xbeg .= "," . ($xbeg + $xlen - 1); |
|
873 if ($ylen > 1) |
|
874 $ybeg .= "," . ($ybeg + $ylen - 1); |
|
875 |
|
876 return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg; |
|
877 } |
|
878 |
|
879 function _start_block($header) { |
|
880 echo $header; |
|
881 } |
|
882 |
|
883 function _end_block() { |
|
884 } |
|
885 |
|
886 function _lines($lines, $prefix = ' ') { |
|
887 foreach ($lines as $line) |
|
888 echo "$prefix $line\n"; |
|
889 } |
|
890 |
|
891 function _context($lines) { |
|
892 $this->_lines($lines); |
|
893 } |
|
894 |
|
895 function _added($lines) { |
|
896 $this->_lines($lines, '>'); |
|
897 } |
|
898 function _deleted($lines) { |
|
899 $this->_lines($lines, '<'); |
|
900 } |
|
901 |
|
902 function _changed($orig, $closing) { |
|
903 $this->_deleted($orig); |
|
904 echo "---\n"; |
|
905 $this->_added($closing); |
|
906 } |
|
907 } |
|
908 |
|
909 |
|
910 /** |
|
911 * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3 |
|
912 * |
|
913 */ |
|
914 |
|
915 define('NBSP', ' '); // iso-8859-x non-breaking space. |
|
916 |
|
917 /** |
|
918 * @todo document |
|
919 * @package Enano |
|
920 * @subpackage DifferenceEngine |
|
921 */ |
|
922 class _HWLDF_WordAccumulator { |
|
923 function _HWLDF_WordAccumulator () { |
|
924 $this->_lines = array(); |
|
925 $this->_line = ''; |
|
926 $this->_group = ''; |
|
927 $this->_tag = ''; |
|
928 } |
|
929 |
|
930 function _flushGroup ($new_tag) { |
|
931 if ($this->_group !== '') { |
|
932 if ($this->_tag == 'mark') |
|
933 $this->_line .= '<span class="diffchange">' . |
|
934 htmlspecialchars ( $this->_group ) . '</span>'; |
|
935 else |
|
936 $this->_line .= htmlspecialchars ( $this->_group ); |
|
937 } |
|
938 $this->_group = ''; |
|
939 $this->_tag = $new_tag; |
|
940 } |
|
941 |
|
942 function _flushLine ($new_tag) { |
|
943 $this->_flushGroup($new_tag); |
|
944 if ($this->_line != '') |
|
945 array_push ( $this->_lines, $this->_line ); |
|
946 else |
|
947 # make empty lines visible by inserting an NBSP |
|
948 array_push ( $this->_lines, NBSP ); |
|
949 $this->_line = ''; |
|
950 } |
|
951 |
|
952 function addWords ($words, $tag = '') { |
|
953 if ($tag != $this->_tag) |
|
954 $this->_flushGroup($tag); |
|
955 |
|
956 foreach ($words as $word) { |
|
957 // new-line should only come as first char of word. |
|
958 if ($word == '') |
|
959 continue; |
|
960 if ($word[0] == "\n") { |
|
961 $this->_flushLine($tag); |
|
962 $word = substr($word, 1); |
|
963 } |
|
964 assert(!strstr($word, "\n")); |
|
965 $this->_group .= $word; |
|
966 } |
|
967 } |
|
968 |
|
969 function getLines() { |
|
970 $this->_flushLine('~done'); |
|
971 return $this->_lines; |
|
972 } |
|
973 } |
|
974 |
|
975 /** |
|
976 * @todo document |
|
977 * @package Enano |
|
978 * @subpackage DifferenceEngine |
|
979 */ |
|
980 define('MAX_LINE_LENGTH', 10000); |
|
981 class WordLevelDiff extends MappedDiff |
|
982 { |
|
983 function WordLevelDiff ($orig_lines, $closing_lines) { |
|
984 $fname = 'WordLevelDiff::WordLevelDiff'; |
|
985 // wfProfileIn( $fname ); |
|
986 |
|
987 list ($orig_words, $orig_stripped) = $this->_split($orig_lines); |
|
988 list ($closing_words, $closing_stripped) = $this->_split($closing_lines); |
|
989 |
|
990 $this->MappedDiff($orig_words, $closing_words, |
|
991 $orig_stripped, $closing_stripped); |
|
992 // wfProfileOut( $fname ); |
|
993 } |
|
994 |
|
995 function _split($lines) { |
|
996 $fname = 'WordLevelDiff::_split'; |
|
997 // wfProfileIn( $fname ); |
|
998 |
|
999 $words = array(); |
|
1000 $stripped = array(); |
|
1001 $first = true; |
|
1002 foreach ( $lines as $line ) { |
|
1003 # If the line is too long, just pretend the entire line is one big word |
|
1004 # This prevents resource exhaustion problems |
|
1005 if ( $first ) { |
|
1006 $first = false; |
|
1007 } else { |
|
1008 $words[] = "\n"; |
|
1009 $stripped[] = "\n"; |
|
1010 } |
|
1011 if ( strlen( $line ) > MAX_LINE_LENGTH ) { |
|
1012 $words[] = $line; |
|
1013 $stripped[] = $line; |
|
1014 } else { |
|
1015 if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', |
|
1016 $line, $m)) |
|
1017 { |
|
1018 $words = array_merge( $words, $m[0] ); |
|
1019 $stripped = array_merge( $stripped, $m[1] ); |
|
1020 } |
|
1021 } |
|
1022 } |
|
1023 // wfProfileOut( $fname ); |
|
1024 return array($words, $stripped); |
|
1025 } |
|
1026 |
|
1027 function orig () { |
|
1028 $fname = 'WordLevelDiff::orig'; |
|
1029 // wfProfileIn( $fname ); |
|
1030 $orig = new _HWLDF_WordAccumulator; |
|
1031 |
|
1032 foreach ($this->edits as $edit) { |
|
1033 if ($edit->type == 'copy') |
|
1034 $orig->addWords($edit->orig); |
|
1035 elseif ($edit->orig) |
|
1036 $orig->addWords($edit->orig, 'mark'); |
|
1037 } |
|
1038 $lines = $orig->getLines(); |
|
1039 // wfProfileOut( $fname ); |
|
1040 return $lines; |
|
1041 } |
|
1042 |
|
1043 function closing () { |
|
1044 $fname = 'WordLevelDiff::closing'; |
|
1045 // wfProfileIn( $fname ); |
|
1046 $closing = new _HWLDF_WordAccumulator; |
|
1047 |
|
1048 foreach ($this->edits as $edit) { |
|
1049 if ($edit->type == 'copy') |
|
1050 $closing->addWords($edit->closing); |
|
1051 elseif ($edit->closing) |
|
1052 $closing->addWords($edit->closing, 'mark'); |
|
1053 } |
|
1054 $lines = $closing->getLines(); |
|
1055 // wfProfileOut( $fname ); |
|
1056 return $lines; |
|
1057 } |
|
1058 } |
|
1059 |
|
1060 /** |
|
1061 * Wikipedia Table style diff formatter. |
|
1062 * @todo document |
|
1063 * @package Enano |
|
1064 * @subpackage DifferenceEngine |
|
1065 */ |
|
1066 class TableDiffFormatter extends DiffFormatter |
|
1067 { |
|
1068 function TableDiffFormatter() { |
|
1069 $this->leading_context_lines = 2; |
|
1070 $this->trailing_context_lines = 2; |
|
1071 } |
|
1072 |
|
1073 function _block_header( $xbeg, $xlen, $ybeg, $ylen ) { |
|
1074 $r = '<tr><td colspan="2" align="left"><strong><!--LINE '.$xbeg."--></strong></td>\n" . |
|
1075 '<td colspan="2" align="left"><strong><!--LINE '.$ybeg."--></strong></td></tr>\n"; |
|
1076 return $r; |
|
1077 } |
|
1078 |
|
1079 function _start_block( $header ) { |
|
1080 echo $header; |
|
1081 } |
|
1082 |
|
1083 function _end_block() { |
|
1084 } |
|
1085 |
|
1086 function _lines( $lines, $prefix=' ', $color='white' ) { |
|
1087 } |
|
1088 |
|
1089 # HTML-escape parameter before calling this |
|
1090 function addedLine( $line ) { |
|
1091 return "<td>+</td><td class='diff-addedline'>{$line}</td>"; |
|
1092 } |
|
1093 |
|
1094 # HTML-escape parameter before calling this |
|
1095 function deletedLine( $line ) { |
|
1096 return "<td>-</td><td class='diff-deletedline'>{$line}</td>"; |
|
1097 } |
|
1098 |
|
1099 # HTML-escape parameter before calling this |
|
1100 function contextLine( $line ) { |
|
1101 return "<td> </td><td class='diff-context'>{$line}</td>"; |
|
1102 } |
|
1103 |
|
1104 function emptyLine() { |
|
1105 return '<td colspan="2"> </td>'; |
|
1106 } |
|
1107 |
|
1108 function _added( $lines ) { |
|
1109 foreach ($lines as $line) { |
|
1110 echo '<tr>' . $this->emptyLine() . |
|
1111 $this->addedLine( htmlspecialchars ( $line ) ) . "</tr>\n"; |
|
1112 } |
|
1113 } |
|
1114 |
|
1115 function _deleted($lines) { |
|
1116 foreach ($lines as $line) { |
|
1117 echo '<tr>' . $this->deletedLine( htmlspecialchars ( $line ) ) . |
|
1118 $this->emptyLine() . "</tr>\n"; |
|
1119 } |
|
1120 } |
|
1121 |
|
1122 function _context( $lines ) { |
|
1123 foreach ($lines as $line) { |
|
1124 echo '<tr>' . |
|
1125 $this->contextLine( htmlspecialchars ( $line ) ) . |
|
1126 $this->contextLine( htmlspecialchars ( $line ) ) . "</tr>\n"; |
|
1127 } |
|
1128 } |
|
1129 |
|
1130 function _changed( $orig, $closing ) { |
|
1131 $fname = 'TableDiffFormatter::_changed'; |
|
1132 // wfProfileIn( $fname ); |
|
1133 |
|
1134 $diff = new WordLevelDiff( $orig, $closing ); |
|
1135 $del = $diff->orig(); |
|
1136 $add = $diff->closing(); |
|
1137 |
|
1138 # Notice that WordLevelDiff returns HTML-escaped output. |
|
1139 # Hence, we will be calling addedLine/deletedLine without HTML-escaping. |
|
1140 |
|
1141 while ( $line = array_shift( $del ) ) { |
|
1142 $aline = array_shift( $add ); |
|
1143 echo '<tr>' . $this->deletedLine( $line ) . |
|
1144 $this->addedLine( $aline ) . "</tr>\n"; |
|
1145 } |
|
1146 foreach ($add as $line) { # If any leftovers |
|
1147 echo '<tr>' . $this->emptyLine() . |
|
1148 $this->addedLine( $line ) . "</tr>\n"; |
|
1149 } |
|
1150 // wfProfileOut( $fname ); |
|
1151 } |
|
1152 } |
|
1153 |