Skip to content

Commit 1950c75

Browse files
author
bors-servo
committed
Auto merge of #102 - servo:quad, r=jdm
Fix accidentally quadratic algorithm `Tokenize::source_location` takes an "position" counting UTF-8 bytes since the start of the stylesheet and returns a "location" made of a line number and column number. To avoid counting lines from the start every time, it saves some results after each call. Previously we would only save the position of the last known line break, so calling source_location() for name positions within a single long line would keep searching from that point for the next line break and therefore take O(n²) time. Very long lines can easily happen when a CSS minifier is used. We now save the full (line, column) result from the last call. See servo/servo#9897 (comment) and servo/servo#9897 (comment) r? @jdm <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/102) <!-- Reviewable:end -->
2 parents 4ab1d41 + 375ad58 commit 1950c75

File tree

2 files changed

+25
-40
lines changed

2 files changed

+25
-40
lines changed

Cargo.toml

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,13 @@ license = "MPL-2.0"
1616
rustc-serialize = "0.3"
1717
tempdir = "0.3"
1818

19-
[dependencies.serde]
20-
version = ">=0.6.6, <0.8"
21-
optional = true
22-
23-
[dependencies.serde_macros]
24-
version = ">=0.6.5, <0.8"
25-
optional = true
26-
27-
[dependencies.heapsize]
28-
version = ">=0.1.1, <0.4.0"
29-
optional = true
30-
31-
[dependencies.heapsize_plugin]
32-
version = "0.1.0"
33-
optional = true
34-
3519
[dependencies]
3620
encoding = "0.2"
21+
heapsize = {version = ">=0.1.1, <0.4.0", optional = true}
22+
heapsize_plugin = {version = "0.1.0", optional = true}
3723
matches = "0.1"
24+
serde = {version = ">=0.6.6, <0.8", optional = true}
25+
serde_macros = {version = ">=0.6.5, <0.8", optional = true}
3826

3927
[features]
4028
serde-serialization = [ "serde", "serde_macros" ]

src/tokenizer.rs

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ pub struct Tokenizer<'a> {
210210
/// Counted in bytes, not code points. From 0.
211211
position: usize,
212212
/// Cache for `source_location()`
213-
last_known_line_break: Cell<(usize, usize)>,
213+
last_known_source_location: Cell<(SourcePosition, SourceLocation)>,
214214
var_functions: SeenStatus,
215215
viewport_percentages: SeenStatus,
216216
}
@@ -229,7 +229,8 @@ impl<'a> Tokenizer<'a> {
229229
Tokenizer {
230230
input: input,
231231
position: 0,
232-
last_known_line_break: Cell::new((1, 0)),
232+
last_known_source_location: Cell::new((SourcePosition(0),
233+
SourceLocation { line: 1, column: 1 })),
233234
var_functions: SeenStatus::DontCare,
234235
viewport_percentages: SeenStatus::DontCare,
235236
}
@@ -292,37 +293,33 @@ impl<'a> Tokenizer<'a> {
292293

293294
pub fn source_location(&self, position: SourcePosition) -> SourceLocation {
294295
let target = position.0;
295-
let mut line_number;
296+
let mut location;
296297
let mut position;
297-
let (last_known_line_number, position_after_last_known_newline) =
298-
self.last_known_line_break.get();
299-
if target >= position_after_last_known_newline {
300-
position = position_after_last_known_newline;
301-
line_number = last_known_line_number;
298+
let (SourcePosition(last_known_position), last_known_location) =
299+
self.last_known_source_location.get();
300+
if target >= last_known_position {
301+
position = last_known_position;
302+
location = last_known_location;
302303
} else {
304+
// For now we’re only traversing the source *forwards* to count newlines.
305+
// So if the requested position is before the last known one,
306+
// start over from the beginning.
303307
position = 0;
304-
line_number = 1;
308+
location = SourceLocation { line: 1, column: 1 };
305309
}
306310
let mut source = &self.input[position..target];
307-
while let Some(newline_position) = source.find(&['\n', '\r', '\x0C'][..]) {
311+
while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) {
308312
let offset = newline_position +
309-
if source[newline_position..].starts_with("\r\n") {
310-
2
311-
} else {
312-
1
313-
};
313+
if source[newline_position..].starts_with("\r\n") { 2 } else { 1 };
314314
source = &source[offset..];
315315
position += offset;
316-
line_number += 1;
316+
location.line += 1;
317+
location.column = 1;
317318
}
318319
debug_assert!(position <= target);
319-
self.last_known_line_break.set((line_number, position));
320-
SourceLocation {
321-
line: line_number,
322-
// `target == position` when `target` is at the beginning of the line,
323-
// so add 1 so that the column numbers start at 1.
324-
column: target - position + 1,
325-
}
320+
location.column += target - position;
321+
self.last_known_source_location.set((SourcePosition(target), location));
322+
location
326323
}
327324

328325
#[inline]
@@ -385,7 +382,7 @@ pub struct SourceLocation {
385382
/// The line number, starting at 1 for the first line.
386383
pub line: usize,
387384

388-
/// The column number within a line, starting at 1 for the character of the line.
385+
/// The column number within a line, starting at 1 for first the character of the line.
389386
pub column: usize,
390387
}
391388

0 commit comments

Comments
 (0)