4988 |
kaklik |
1 |
<?php |
|
|
2 |
// WebSVN - Subversion repository viewing via the web using PHP |
|
|
3 |
// Copyright (C) 2004-2006 Tim Armes |
|
|
4 |
// |
|
|
5 |
// This program is free software; you can redistribute it and/or modify |
|
|
6 |
// it under the terms of the GNU General Public License as published by |
|
|
7 |
// the Free Software Foundation; either version 2 of the License, or |
|
|
8 |
// (at your option) any later version. |
|
|
9 |
// |
|
|
10 |
// This program is distributed in the hope that it will be useful, |
|
|
11 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
12 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
13 |
// GNU General Public License for more details. |
|
|
14 |
// |
|
|
15 |
// You should have received a copy of the GNU General Public License |
|
|
16 |
// along with this program; if not, write to the Free Software |
|
|
17 |
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
|
18 |
// |
|
|
19 |
// -- |
|
|
20 |
// |
|
|
21 |
// diff_util.php |
|
|
22 |
// |
|
|
23 |
// help diff_inc.php to make sensible changes from added and deleted diff lines |
|
|
24 |
// These lines are automatically paired and also inline diff is performed to show |
|
|
25 |
// insertions/deletions on one line. diff_inc.php must have all Horde_Text_Diff |
|
|
26 |
// requirements met (include_once statements). |
|
|
27 |
|
|
|
28 |
// Interface for diffing function |
|
|
29 |
class LineDiffInterface { |
|
|
30 |
// similarity 1 means that strings are very close to each other |
|
|
31 |
// 0 means totally different |
|
|
32 |
function lineSimilarity($text1, $text2) { |
|
|
33 |
assert(false); |
|
|
34 |
} |
|
|
35 |
|
|
|
36 |
// return array($left, $right) annotated with <ins> and <del> |
|
|
37 |
function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) { |
|
|
38 |
assert(false); |
|
|
39 |
} |
|
|
40 |
} |
|
|
41 |
|
|
|
42 |
// Default line diffing function |
|
|
43 |
class LineDiff extends LineDiffInterface { |
|
|
44 |
|
|
|
45 |
private bool $ignoreWhitespace; |
|
|
46 |
|
|
|
47 |
function __construct($ignoreWhitespace) { |
|
|
48 |
$this->ignoreWhitespace = $ignoreWhitespace; |
|
|
49 |
} |
|
|
50 |
|
|
|
51 |
// {{{ levenshtein2 |
|
|
52 |
// levenshtein edit distance, on small strings use php function |
|
|
53 |
// on large strings approximate distance using words |
|
|
54 |
// computed by dynamic programming |
|
|
55 |
function levenshtein2($str1, $str2) { |
|
|
56 |
if (strlen($str1) < 255 && strlen($str2) < 255) { |
|
|
57 |
return levenshtein($str1, $str2); |
|
|
58 |
} |
|
|
59 |
|
|
|
60 |
$l1 = explode(' ', $str1); |
|
|
61 |
$l2 = explode(' ', $str2); |
|
|
62 |
|
|
|
63 |
$n = count($l1); |
|
|
64 |
$m = count($l2); |
|
|
65 |
$d = array_fill(0, $n + 1, array_fill(0, $m + 1, 0)); |
|
|
66 |
|
|
|
67 |
for ($i = 1; $i < $n + 1; $i++) { |
|
|
68 |
$d[$i][0] = $i; |
|
|
69 |
} |
|
|
70 |
for ($j = 1; $j < $m + 1; $j++) { |
|
|
71 |
$d[0][$j] = $j; |
|
|
72 |
} |
|
|
73 |
|
|
|
74 |
for ($i = 1; $i < $n + 1; $i++) { |
|
|
75 |
for ($j = 1; $j < $m + 1; $j++) { |
|
|
76 |
$c = ($l1[$i - 1] == $l2[$j - 1]) ? 0 : strlen($l1[$i - 1]) + strlen($l2[$j - 1]); |
|
|
77 |
$d[$i][$j] = min($d[$i - 1][$j] + 1, $d[$i][$j - 1] + 1, $d[$i - 1][$j - 1] + $c); |
|
|
78 |
} |
|
|
79 |
} |
|
|
80 |
|
|
|
81 |
return $d[$n][$m]; |
|
|
82 |
} |
|
|
83 |
// }}} |
|
|
84 |
|
|
|
85 |
// {{{ lineSimilarity |
|
|
86 |
function lineSimilarity($text1, $text2) { |
|
|
87 |
$distance = $this->levenshtein2($text1, $text2); |
|
|
88 |
return max(0.0, 1.0 - $distance / (strlen($text1) + strlen($text2) + 4)); |
|
|
89 |
} |
|
|
90 |
// }}} |
|
|
91 |
|
|
|
92 |
// {{{ tokenize whole line into words |
|
|
93 |
// note that separators are returned as tokens of length 1 |
|
|
94 |
// and if $ignoreWhitespace is true, consecutive whitespaces are returned as one token |
|
|
95 |
function tokenize($string, $highlighted, $ignoreWhitespace) { |
|
|
96 |
$html = array('<' => '>', '&' => ';'); |
|
|
97 |
$whitespaces = array("\t","\n","\r",' '); |
|
|
98 |
$separators = array('.','-','+','*','/','<','>','?','(',')','&','/','{','}','[',']',':',';'); |
|
|
99 |
$data = array(); |
|
|
100 |
$segment = ''; |
|
|
101 |
$segmentIsWhitespace = true; |
|
|
102 |
$count = strlen($string); |
|
|
103 |
for ($i = 0; $i < $count; $i++) { |
|
|
104 |
$c = $string[$i]; |
|
|
105 |
if ($highlighted && array_key_exists($c, $html)) { |
|
|
106 |
if ($segment != '') { |
|
|
107 |
$data[] = $segment; |
|
|
108 |
} |
|
|
109 |
// consider html tags and entities as a single token |
|
|
110 |
$endchar = $html[$c]; |
|
|
111 |
$segment = $c; |
|
|
112 |
do { |
|
|
113 |
$i++; |
|
|
114 |
$c = $string[$i]; |
|
|
115 |
$segment .= $c; |
|
|
116 |
} while ($c != $endchar && $i < $count - 1); |
|
|
117 |
$data[] = $segment; |
|
|
118 |
$segment = ''; |
|
|
119 |
$segmentIsWhitespace = false; |
|
|
120 |
} else if (in_array($c, $separators) || (!$ignoreWhitespace && in_array($c, $whitespaces))) { |
|
|
121 |
// if it is separator or whitespace and we do not consider consecutive whitespaces |
|
|
122 |
if ($segment != '') { |
|
|
123 |
$data[] = $segment; |
|
|
124 |
} |
|
|
125 |
$data[] = $c; |
|
|
126 |
$segment = ''; |
|
|
127 |
$segmentIsWhitespace = true; |
|
|
128 |
} else if (in_array($c, $whitespaces)) { |
|
|
129 |
// if it is whitespace and we consider consecutive whitespaces as one token |
|
|
130 |
if (!$segmentIsWhitespace) { |
|
|
131 |
$data[] = $segment; |
|
|
132 |
$segment = ''; |
|
|
133 |
$segmentIsWhitespace = true; |
|
|
134 |
} |
|
|
135 |
$segment .= $c; |
|
|
136 |
} else { |
|
|
137 |
// no separator or whitespace |
|
|
138 |
if ($segmentIsWhitespace && $segment != '') { |
|
|
139 |
$data[] = $segment; |
|
|
140 |
$segment = ''; |
|
|
141 |
} |
|
|
142 |
$segment .= $c; |
|
|
143 |
$segmentIsWhitespace = false; |
|
|
144 |
} |
|
|
145 |
} |
|
|
146 |
if ($segment != '') { |
|
|
147 |
$data[] = $segment; |
|
|
148 |
} |
|
|
149 |
return $data; |
|
|
150 |
} |
|
|
151 |
// }}} |
|
|
152 |
|
|
|
153 |
// {{{ lineDiff |
|
|
154 |
function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) { |
|
|
155 |
$whitespaces = array(' ', "\t", "\n", "\r"); |
|
|
156 |
|
|
|
157 |
$do_diff = true; |
|
|
158 |
|
|
|
159 |
if ($text1 == '' || $text2 == '') { |
|
|
160 |
$do_diff = false; |
|
|
161 |
} |
|
|
162 |
|
|
|
163 |
if ($this->ignoreWhitespace && (str_replace($whitespaces, array(), $text1) == str_replace($whitespaces, array(), $text2))) { |
|
|
164 |
$do_diff = false; |
|
|
165 |
} |
|
|
166 |
|
|
|
167 |
// Exit gracefully if loading of Horde_Text_Diff failed |
|
|
168 |
if (!class_exists('Horde_Text_Diff') || !class_exists('Horde_Text_Diff_Mapped')) { |
|
|
169 |
$do_diff = false; |
|
|
170 |
} |
|
|
171 |
|
|
|
172 |
// Return highlighted lines without doing inline diff |
|
|
173 |
if (!$do_diff) { |
|
|
174 |
return array($highlighted1, $highlighted2); |
|
|
175 |
} |
|
|
176 |
|
|
|
177 |
$tokens1 = $this->tokenize($highlighted1, $highlighted, $this->ignoreWhitespace); |
|
|
178 |
$tokens2 = $this->tokenize($highlighted2, $highlighted, $this->ignoreWhitespace); |
|
|
179 |
|
|
|
180 |
if (!$this->ignoreWhitespace) { |
|
|
181 |
$diff = new Horde_Text_Diff('Native', array($tokens1, $tokens2)); |
|
|
182 |
} else { |
|
|
183 |
// we need to create mapped parts for MappedDiff |
|
|
184 |
$mapped1 = array(); |
|
|
185 |
foreach ($tokens1 as $token) { |
|
|
186 |
$mapped1[] = str_replace($whitespaces, array(), $token); |
|
|
187 |
} |
|
|
188 |
$mapped2 = array(); |
|
|
189 |
foreach ($tokens2 as $token) { |
|
|
190 |
$mapped2[] = str_replace($whitespaces, array(), $token); |
|
|
191 |
} |
|
|
192 |
$diff = new Horde_Text_Diff_Mapped('Native', array($tokens1, $tokens2, $mapped1, $mapped2)); |
|
|
193 |
} |
|
|
194 |
|
|
|
195 |
// now, get the diff and annotate text |
|
|
196 |
$edits = $diff->getDiff(); |
|
|
197 |
|
|
|
198 |
$line1 = ''; |
|
|
199 |
$line2 = ''; |
|
|
200 |
foreach ($edits as $edit) { |
|
|
201 |
if ($edit instanceof Horde_Text_Diff_Op_Copy) { |
|
|
202 |
$line1 .= implode('', $edit->orig); |
|
|
203 |
$line2 .= implode('', $edit->final); |
|
|
204 |
} else if ($edit instanceof Horde_Text_Diff_Op_Delete) { |
|
|
205 |
$line1 .= '<del>'.implode('', $edit->orig).'</del>'; |
|
|
206 |
} else if ($edit instanceof Horde_Text_Diff_Op_Add) { |
|
|
207 |
$line2 .= '<ins>'.implode('', $edit->final).'</ins>'; |
|
|
208 |
} else if ($edit instanceof Horde_Text_Diff_Op_Change) { |
|
|
209 |
$line1 .= '<del>'.implode('', $edit->orig).'</del>'; |
|
|
210 |
$line2 .= '<ins>'.implode('', $edit->final).'</ins>'; |
|
|
211 |
} else { |
|
|
212 |
assert(false); |
|
|
213 |
} |
|
|
214 |
} |
|
|
215 |
return array($line1, $line2); |
|
|
216 |
} |
|
|
217 |
// }}} |
|
|
218 |
} |
|
|
219 |
|
|
|
220 |
// Class for computing sensibly added/deleted block of lines. |
|
|
221 |
class SensibleLineChanges { |
|
|
222 |
var $_added = array(); |
|
|
223 |
var $_deleted = array(); |
|
|
224 |
var $_lineDiff = null; |
|
|
225 |
|
|
|
226 |
function __construct($lineDiff) { |
|
|
227 |
$this->_lineDiff = $lineDiff; |
|
|
228 |
} |
|
|
229 |
|
|
|
230 |
function addDeletedLine($text, $highlighted_text, $lineno) { |
|
|
231 |
$this->_deleted[] = array($text, $highlighted_text, $lineno); |
|
|
232 |
} |
|
|
233 |
|
|
|
234 |
function addAddedLine($text, $highlighted_text, $lineno) { |
|
|
235 |
$this->_added[] = array($text, $highlighted_text, $lineno); |
|
|
236 |
} |
|
|
237 |
|
|
|
238 |
// this function computes simple match - first min(deleted,added) lines are marked as changed |
|
|
239 |
// it is intended to be run instead of _computeBestMatching if the diff is too big |
|
|
240 |
function _computeFastMatching($n, $m) { |
|
|
241 |
$result = array(); |
|
|
242 |
$q = 0; |
|
|
243 |
while ($q < $n && $q < $m) { |
|
|
244 |
$result[] = array($this->_deleted[$q], $this->_added[$q]); |
|
|
245 |
$q++; |
|
|
246 |
} |
|
|
247 |
while ($q < $n) { |
|
|
248 |
$result[] = array($this->_deleted[$q], null); |
|
|
249 |
$q++; |
|
|
250 |
} |
|
|
251 |
while ($q < $m) { |
|
|
252 |
$result[] = array(null, $this->_added[$q]); |
|
|
253 |
$q++; |
|
|
254 |
} |
|
|
255 |
return $result; |
|
|
256 |
} |
|
|
257 |
|
|
|
258 |
// {{{ _computeBestMatching |
|
|
259 |
// dynamically compute best matching |
|
|
260 |
// note that this is O(n*m) * O(line similarity) |
|
|
261 |
function _computeBestMatching() { |
|
|
262 |
$n = count($this->_deleted); |
|
|
263 |
$m = count($this->_added); |
|
|
264 |
|
|
|
265 |
// if the computation will be slow, just run fast algorithm |
|
|
266 |
if ($n * $m > 10000) { |
|
|
267 |
return $this->_computeFastMatching($n, $m); |
|
|
268 |
} |
|
|
269 |
|
|
|
270 |
// dyn[$i][$j] holds best sum of similarities we can obtain if we match |
|
|
271 |
// first $i deleted lines and first $j added lines |
|
|
272 |
$dyn = array_fill(0, $n + 1, array_fill(0, $m + 1, 0.0)); |
|
|
273 |
// backlinks, so we can reconstruct best layout easily |
|
|
274 |
$back = array_fill(0, $n + 1, array_fill(0, $m + 1, -1)); |
|
|
275 |
|
|
|
276 |
// if there is no similarity, prefer adding/deleting lines |
|
|
277 |
$value_del = 0.1; |
|
|
278 |
$value_add = 0.1; |
|
|
279 |
|
|
|
280 |
// initialize arrays |
|
|
281 |
for ($i = 1; $i <= $n; $i++) { |
|
|
282 |
$back[$i][0] = 0; |
|
|
283 |
$dyn[$i][0] = $value_del * $i; |
|
|
284 |
} |
|
|
285 |
for ($j = 1; $j <= $m; $j++) { |
|
|
286 |
$back[0][$j] = 1; |
|
|
287 |
$dyn[0][$j] = $value_add * $j; |
|
|
288 |
} |
|
|
289 |
|
|
|
290 |
// main dynamic programming |
|
|
291 |
for ($i = 1; $i <= $n; $i++) { |
|
|
292 |
for ($j = 1; $j <= $m; $j++) { |
|
|
293 |
$best = - 1.0; |
|
|
294 |
$b = -1; |
|
|
295 |
if ($dyn[$i - 1][$j] + $value_del >= $best) { |
|
|
296 |
$b = 0; |
|
|
297 |
$best = $dyn[$i - 1][$j] + $value_del; |
|
|
298 |
} |
|
|
299 |
if ($dyn[$i][$j - 1] + $value_add >= $best) { |
|
|
300 |
$b = 1; |
|
|
301 |
$best = $dyn[$i][$j - 1] + $value_add; |
|
|
302 |
} |
|
|
303 |
$sim = $this->_lineDiff->lineSimilarity($this->_deleted[$i - 1][0], $this->_added[$j - 1][0]); |
|
|
304 |
if ($dyn[$i - 1][$j - 1] + $sim >= $best) { |
|
|
305 |
$b = 2; |
|
|
306 |
$best = $dyn[$i - 1][$j - 1] + $sim; |
|
|
307 |
} |
|
|
308 |
$back[$i][$j] = $b; |
|
|
309 |
$dyn[$i][$j] = $best; |
|
|
310 |
} |
|
|
311 |
} |
|
|
312 |
|
|
|
313 |
// compute layout for best result |
|
|
314 |
$i = $n; |
|
|
315 |
$j = $m; |
|
|
316 |
$result = array(); |
|
|
317 |
|
|
|
318 |
while ($i + $j >= 1) { |
|
|
319 |
switch($back[$i][$j]) { |
|
|
320 |
case 2: array_push($result, array($this->_deleted[$i - 1], $this->_added[$j - 1])); |
|
|
321 |
$i--; |
|
|
322 |
$j--; |
|
|
323 |
break; |
|
|
324 |
case 1: array_push($result, array(null, $this->_added[$j - 1])); |
|
|
325 |
$j--; |
|
|
326 |
break; |
|
|
327 |
case 0: array_push($result, array($this->_deleted[$i - 1], null)); |
|
|
328 |
$i--; |
|
|
329 |
break; |
|
|
330 |
default: |
|
|
331 |
assert(false); |
|
|
332 |
} |
|
|
333 |
} |
|
|
334 |
return array_reverse($result); |
|
|
335 |
} |
|
|
336 |
// }}} |
|
|
337 |
|
|
|
338 |
// {{{ addChangesToListing |
|
|
339 |
// add computed changes to the listing |
|
|
340 |
function addChangesToListing(&$listingHelper, $highlighted) { |
|
|
341 |
$matching = $this->_computeBestMatching(); |
|
|
342 |
foreach ($matching as $change) { |
|
|
343 |
if ($change[1] == null) { |
|
|
344 |
// deleted -- preserve original highlighted text |
|
|
345 |
$listingHelper->addDeletedLine($change[0][1], $change[0][2]); |
|
|
346 |
} else if ($change[0] == null) { |
|
|
347 |
// added -- preserve original highlighted text |
|
|
348 |
$listingHelper->addAddedLine($change[1][1], $change[1][2]); |
|
|
349 |
} else { |
|
|
350 |
// this is fully changed line, make inline diff |
|
|
351 |
$diff = $this->_lineDiff->inlineDiff($change[0][0], $change[0][1], $change[1][0], $change[1][1], $highlighted); |
|
|
352 |
$listingHelper->addChangedLine($diff[0], $change[0][2], $diff[1], $change[1][2]); |
|
|
353 |
} |
|
|
354 |
} |
|
|
355 |
$this->clear(); |
|
|
356 |
} |
|
|
357 |
// }}} |
|
|
358 |
|
|
|
359 |
function clear() { |
|
|
360 |
$this->_added = array(); |
|
|
361 |
$this->_deleted = array(); |
|
|
362 |
} |
|
|
363 |
} |
|
|
364 |
|