Rev Author Line No. Line
4988 kaklik 1 <?php
2 // WebSVN - Subversion repository viewing via the web using PHP
3 // Copyright (C) 2004-2006 Tim Armes
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 //
19 // --
20 //
21 // diff_util.php
22 //
23 // help diff_inc.php to make sensible changes from added and deleted diff lines
24 // These lines are automatically paired and also inline diff is performed to show
25 // insertions/deletions on one line. diff_inc.php must have all Horde_Text_Diff
26 // requirements met (include_once statements).
27  
28 // Interface for diffing function
29 class LineDiffInterface {
30 // similarity 1 means that strings are very close to each other
31 // 0 means totally different
32 function lineSimilarity($text1, $text2) {
33 assert(false);
34 }
35  
36 // return array($left, $right) annotated with <ins> and <del>
37 function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) {
38 assert(false);
39 }
40 }
41  
42 // Default line diffing function
43 class LineDiff extends LineDiffInterface {
44  
45 private bool $ignoreWhitespace;
46  
47 function __construct($ignoreWhitespace) {
48 $this->ignoreWhitespace = $ignoreWhitespace;
49 }
50  
51 // {{{ levenshtein2
52 // levenshtein edit distance, on small strings use php function
53 // on large strings approximate distance using words
54 // computed by dynamic programming
55 function levenshtein2($str1, $str2) {
56 if (strlen($str1) < 255 && strlen($str2) < 255) {
57 return levenshtein($str1, $str2);
58 }
59  
60 $l1 = explode(' ', $str1);
61 $l2 = explode(' ', $str2);
62  
63 $n = count($l1);
64 $m = count($l2);
65 $d = array_fill(0, $n + 1, array_fill(0, $m + 1, 0));
66  
67 for ($i = 1; $i < $n + 1; $i++) {
68 $d[$i][0] = $i;
69 }
70 for ($j = 1; $j < $m + 1; $j++) {
71 $d[0][$j] = $j;
72 }
73  
74 for ($i = 1; $i < $n + 1; $i++) {
75 for ($j = 1; $j < $m + 1; $j++) {
76 $c = ($l1[$i - 1] == $l2[$j - 1]) ? 0 : strlen($l1[$i - 1]) + strlen($l2[$j - 1]);
77 $d[$i][$j] = min($d[$i - 1][$j] + 1, $d[$i][$j - 1] + 1, $d[$i - 1][$j - 1] + $c);
78 }
79 }
80  
81 return $d[$n][$m];
82 }
83 // }}}
84  
85 // {{{ lineSimilarity
86 function lineSimilarity($text1, $text2) {
87 $distance = $this->levenshtein2($text1, $text2);
88 return max(0.0, 1.0 - $distance / (strlen($text1) + strlen($text2) + 4));
89 }
90 // }}}
91  
92 // {{{ tokenize whole line into words
93 // note that separators are returned as tokens of length 1
94 // and if $ignoreWhitespace is true, consecutive whitespaces are returned as one token
95 function tokenize($string, $highlighted, $ignoreWhitespace) {
96 $html = array('<' => '>', '&' => ';');
97 $whitespaces = array("\t","\n","\r",' ');
98 $separators = array('.','-','+','*','/','<','>','?','(',')','&','/','{','}','[',']',':',';');
99 $data = array();
100 $segment = '';
101 $segmentIsWhitespace = true;
102 $count = strlen($string);
103 for ($i = 0; $i < $count; $i++) {
104 $c = $string[$i];
105 if ($highlighted && array_key_exists($c, $html)) {
106 if ($segment != '') {
107 $data[] = $segment;
108 }
109 // consider html tags and entities as a single token
110 $endchar = $html[$c];
111 $segment = $c;
112 do {
113 $i++;
114 $c = $string[$i];
115 $segment .= $c;
116 } while ($c != $endchar && $i < $count - 1);
117 $data[] = $segment;
118 $segment = '';
119 $segmentIsWhitespace = false;
120 } else if (in_array($c, $separators) || (!$ignoreWhitespace && in_array($c, $whitespaces))) {
121 // if it is separator or whitespace and we do not consider consecutive whitespaces
122 if ($segment != '') {
123 $data[] = $segment;
124 }
125 $data[] = $c;
126 $segment = '';
127 $segmentIsWhitespace = true;
128 } else if (in_array($c, $whitespaces)) {
129 // if it is whitespace and we consider consecutive whitespaces as one token
130 if (!$segmentIsWhitespace) {
131 $data[] = $segment;
132 $segment = '';
133 $segmentIsWhitespace = true;
134 }
135 $segment .= $c;
136 } else {
137 // no separator or whitespace
138 if ($segmentIsWhitespace && $segment != '') {
139 $data[] = $segment;
140 $segment = '';
141 }
142 $segment .= $c;
143 $segmentIsWhitespace = false;
144 }
145 }
146 if ($segment != '') {
147 $data[] = $segment;
148 }
149 return $data;
150 }
151 // }}}
152  
153 // {{{ lineDiff
154 function inlineDiff($text1, $highlighted1, $text2, $highlighted2, $highlighted) {
155 $whitespaces = array(' ', "\t", "\n", "\r");
156  
157 $do_diff = true;
158  
159 if ($text1 == '' || $text2 == '') {
160 $do_diff = false;
161 }
162  
163 if ($this->ignoreWhitespace && (str_replace($whitespaces, array(), $text1) == str_replace($whitespaces, array(), $text2))) {
164 $do_diff = false;
165 }
166  
167 // Exit gracefully if loading of Horde_Text_Diff failed
168 if (!class_exists('Horde_Text_Diff') || !class_exists('Horde_Text_Diff_Mapped')) {
169 $do_diff = false;
170 }
171  
172 // Return highlighted lines without doing inline diff
173 if (!$do_diff) {
174 return array($highlighted1, $highlighted2);
175 }
176  
177 $tokens1 = $this->tokenize($highlighted1, $highlighted, $this->ignoreWhitespace);
178 $tokens2 = $this->tokenize($highlighted2, $highlighted, $this->ignoreWhitespace);
179  
180 if (!$this->ignoreWhitespace) {
181 $diff = new Horde_Text_Diff('Native', array($tokens1, $tokens2));
182 } else {
183 // we need to create mapped parts for MappedDiff
184 $mapped1 = array();
185 foreach ($tokens1 as $token) {
186 $mapped1[] = str_replace($whitespaces, array(), $token);
187 }
188 $mapped2 = array();
189 foreach ($tokens2 as $token) {
190 $mapped2[] = str_replace($whitespaces, array(), $token);
191 }
192 $diff = new Horde_Text_Diff_Mapped('Native', array($tokens1, $tokens2, $mapped1, $mapped2));
193 }
194  
195 // now, get the diff and annotate text
196 $edits = $diff->getDiff();
197  
198 $line1 = '';
199 $line2 = '';
200 foreach ($edits as $edit) {
201 if ($edit instanceof Horde_Text_Diff_Op_Copy) {
202 $line1 .= implode('', $edit->orig);
203 $line2 .= implode('', $edit->final);
204 } else if ($edit instanceof Horde_Text_Diff_Op_Delete) {
205 $line1 .= '<del>'.implode('', $edit->orig).'</del>';
206 } else if ($edit instanceof Horde_Text_Diff_Op_Add) {
207 $line2 .= '<ins>'.implode('', $edit->final).'</ins>';
208 } else if ($edit instanceof Horde_Text_Diff_Op_Change) {
209 $line1 .= '<del>'.implode('', $edit->orig).'</del>';
210 $line2 .= '<ins>'.implode('', $edit->final).'</ins>';
211 } else {
212 assert(false);
213 }
214 }
215 return array($line1, $line2);
216 }
217 // }}}
218 }
219  
220 // Class for computing sensibly added/deleted block of lines.
221 class SensibleLineChanges {
222 var $_added = array();
223 var $_deleted = array();
224 var $_lineDiff = null;
225  
226 function __construct($lineDiff) {
227 $this->_lineDiff = $lineDiff;
228 }
229  
230 function addDeletedLine($text, $highlighted_text, $lineno) {
231 $this->_deleted[] = array($text, $highlighted_text, $lineno);
232 }
233  
234 function addAddedLine($text, $highlighted_text, $lineno) {
235 $this->_added[] = array($text, $highlighted_text, $lineno);
236 }
237  
238 // this function computes simple match - first min(deleted,added) lines are marked as changed
239 // it is intended to be run instead of _computeBestMatching if the diff is too big
240 function _computeFastMatching($n, $m) {
241 $result = array();
242 $q = 0;
243 while ($q < $n && $q < $m) {
244 $result[] = array($this->_deleted[$q], $this->_added[$q]);
245 $q++;
246 }
247 while ($q < $n) {
248 $result[] = array($this->_deleted[$q], null);
249 $q++;
250 }
251 while ($q < $m) {
252 $result[] = array(null, $this->_added[$q]);
253 $q++;
254 }
255 return $result;
256 }
257  
258 // {{{ _computeBestMatching
259 // dynamically compute best matching
260 // note that this is O(n*m) * O(line similarity)
261 function _computeBestMatching() {
262 $n = count($this->_deleted);
263 $m = count($this->_added);
264  
265 // if the computation will be slow, just run fast algorithm
266 if ($n * $m > 10000) {
267 return $this->_computeFastMatching($n, $m);
268 }
269  
270 // dyn[$i][$j] holds best sum of similarities we can obtain if we match
271 // first $i deleted lines and first $j added lines
272 $dyn = array_fill(0, $n + 1, array_fill(0, $m + 1, 0.0));
273 // backlinks, so we can reconstruct best layout easily
274 $back = array_fill(0, $n + 1, array_fill(0, $m + 1, -1));
275  
276 // if there is no similarity, prefer adding/deleting lines
277 $value_del = 0.1;
278 $value_add = 0.1;
279  
280 // initialize arrays
281 for ($i = 1; $i <= $n; $i++) {
282 $back[$i][0] = 0;
283 $dyn[$i][0] = $value_del * $i;
284 }
285 for ($j = 1; $j <= $m; $j++) {
286 $back[0][$j] = 1;
287 $dyn[0][$j] = $value_add * $j;
288 }
289  
290 // main dynamic programming
291 for ($i = 1; $i <= $n; $i++) {
292 for ($j = 1; $j <= $m; $j++) {
293 $best = - 1.0;
294 $b = -1;
295 if ($dyn[$i - 1][$j] + $value_del >= $best) {
296 $b = 0;
297 $best = $dyn[$i - 1][$j] + $value_del;
298 }
299 if ($dyn[$i][$j - 1] + $value_add >= $best) {
300 $b = 1;
301 $best = $dyn[$i][$j - 1] + $value_add;
302 }
303 $sim = $this->_lineDiff->lineSimilarity($this->_deleted[$i - 1][0], $this->_added[$j - 1][0]);
304 if ($dyn[$i - 1][$j - 1] + $sim >= $best) {
305 $b = 2;
306 $best = $dyn[$i - 1][$j - 1] + $sim;
307 }
308 $back[$i][$j] = $b;
309 $dyn[$i][$j] = $best;
310 }
311 }
312  
313 // compute layout for best result
314 $i = $n;
315 $j = $m;
316 $result = array();
317  
318 while ($i + $j >= 1) {
319 switch($back[$i][$j]) {
320 case 2: array_push($result, array($this->_deleted[$i - 1], $this->_added[$j - 1]));
321 $i--;
322 $j--;
323 break;
324 case 1: array_push($result, array(null, $this->_added[$j - 1]));
325 $j--;
326 break;
327 case 0: array_push($result, array($this->_deleted[$i - 1], null));
328 $i--;
329 break;
330 default:
331 assert(false);
332 }
333 }
334 return array_reverse($result);
335 }
336 // }}}
337  
338 // {{{ addChangesToListing
339 // add computed changes to the listing
340 function addChangesToListing(&$listingHelper, $highlighted) {
341 $matching = $this->_computeBestMatching();
342 foreach ($matching as $change) {
343 if ($change[1] == null) {
344 // deleted -- preserve original highlighted text
345 $listingHelper->addDeletedLine($change[0][1], $change[0][2]);
346 } else if ($change[0] == null) {
347 // added -- preserve original highlighted text
348 $listingHelper->addAddedLine($change[1][1], $change[1][2]);
349 } else {
350 // this is fully changed line, make inline diff
351 $diff = $this->_lineDiff->inlineDiff($change[0][0], $change[0][1], $change[1][0], $change[1][1], $highlighted);
352 $listingHelper->addChangedLine($diff[0], $change[0][2], $diff[1], $change[1][2]);
353 }
354 }
355 $this->clear();
356 }
357 // }}}
358  
359 function clear() {
360 $this->_added = array();
361 $this->_deleted = array();
362 }
363 }
364