Auto merge of #102 - servo:quad, r=jdm

bors-servo · bors-servo · commit 1950c7557113 · 2016-03-19T05:52:28.000+05:30
Fix accidentally quadratic algorithm `Tokenize::source_location` takes an "position" counting UTF-8 bytes since the start of the stylesheet and returns a "location" made of a line number and column number. To avoid counting lines from the start every time, it saves some results after each call. Previously we would only save the position of the last known line break, so calling source_location() for name positions within a single long line would keep searching from that point for the next line break and therefore take O(n²) time. Very long lines can easily happen when a CSS minifier is used. We now save the full (line, column) result from the last call. See servo/servo#9897 (comment) and servo/servo#9897 (comment) r? @jdm  --- This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/102)
diff --git a/Cargo.toml b/Cargo.toml
@@ -16,25 +16,13 @@ license = "MPL-2.0"
 rustc-serialize = "0.3"
 tempdir = "0.3"
 
-[dependencies.serde]
-version = ">=0.6.6, <0.8"
-optional = true
-
-[dependencies.serde_macros]
-version = ">=0.6.5, <0.8"
-optional = true
-
-[dependencies.heapsize]
-version = ">=0.1.1, <0.4.0"
-optional = true
-
-[dependencies.heapsize_plugin]
-version = "0.1.0"
-optional = true
-
 [dependencies]
 encoding = "0.2"
+heapsize = {version = ">=0.1.1, <0.4.0", optional = true}
+heapsize_plugin = {version = "0.1.0", optional = true}
 matches = "0.1"
+serde = {version = ">=0.6.6, <0.8", optional = true}
+serde_macros = {version = ">=0.6.5, <0.8", optional = true}
 
 [features]
 serde-serialization = [ "serde", "serde_macros" ]
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
@@ -210,7 +210,7 @@ pub struct Tokenizer<'a> {
     /// Counted in bytes, not code points. From 0.
     position: usize,
     /// Cache for `source_location()`
-    last_known_line_break: Cell<(usize, usize)>,
+    last_known_source_location: Cell<(SourcePosition, SourceLocation)>,
     var_functions: SeenStatus,
     viewport_percentages: SeenStatus,
 }
@@ -229,7 +229,8 @@ impl<'a> Tokenizer<'a> {
         Tokenizer {
             input: input,
             position: 0,
-            last_known_line_break: Cell::new((1, 0)),
+            last_known_source_location: Cell::new((SourcePosition(0),
+                                                   SourceLocation { line: 1, column: 1 })),
             var_functions: SeenStatus::DontCare,
             viewport_percentages: SeenStatus::DontCare,
         }
@@ -292,37 +293,33 @@ impl<'a> Tokenizer<'a> {
 
     pub fn source_location(&self, position: SourcePosition) -> SourceLocation {
         let target = position.0;
-        let mut line_number;
+        let mut location;
         let mut position;
-        let (last_known_line_number, position_after_last_known_newline) =
-            self.last_known_line_break.get();
-        if target >= position_after_last_known_newline {
-            position = position_after_last_known_newline;
-            line_number = last_known_line_number;
+        let (SourcePosition(last_known_position), last_known_location) =
+            self.last_known_source_location.get();
+        if target >= last_known_position {
+            position = last_known_position;
+            location = last_known_location;
         } else {
+            // For now we’re only traversing the source *forwards* to count newlines.
+            // So if the requested position is before the last known one,
+            // start over from the beginning.
             position = 0;
-            line_number = 1;
+            location = SourceLocation { line: 1, column: 1 };
         }
         let mut source = &self.input[position..target];
-        while let Some(newline_position) = source.find(&['\n', '\r', '\x0C'][..]) {
+        while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) {
             let offset = newline_position +
-            if source[newline_position..].starts_with("\r\n") {
-                2
-            } else {
-                1
-            };
+                if source[newline_position..].starts_with("\r\n") { 2 } else { 1 };
             source = &source[offset..];
             position += offset;
-            line_number += 1;
+            location.line += 1;
+            location.column = 1;
         }
         debug_assert!(position <= target);
-        self.last_known_line_break.set((line_number, position));
-        SourceLocation {
-            line: line_number,
-            // `target == position` when `target` is at the beginning of the line,
-            // so add 1 so that the column numbers start at 1.
-            column: target - position + 1,
-        }
+        location.column += target - position;
+        self.last_known_source_location.set((SourcePosition(target), location));
+        location
     }
 
     #[inline]
@@ -385,7 +382,7 @@ pub struct SourceLocation {
     /// The line number, starting at 1 for the first line.
     pub line: usize,
 
-    /// The column number within a line, starting at 1 for the character of the line.
+    /// The column number within a line, starting at 1 for first the character of the line.
     pub column: usize,
 }