Skip to content

Commit 5f40718

Browse files
author
bors-servo
committed
Auto merge of #102 - servo:quad, r=jdm
Fix accidentally quadratic algorithm `Tokenize::source_location` takes an "position" counting UTF-8 bytes since the start of the stylesheet and returns a "location" made of a line number and column number. To avoid counting lines from the start every time, it saves some results after each call. Previously we would only save the position of the last known line break, so calling source_location() for name positions within a single long line would keep searching from that point for the next line break and therefore take O(n²) time. Very long lines can easily happen when a CSS minifier is used. We now save the full (line, column) result from the last call. See servo/servo#9897 (comment) and servo/servo#9897 (comment) r? @jdm <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/102) <!-- Reviewable:end -->
2 parents d5c93cf + d1eefdf commit 5f40718

File tree

2 files changed

+25
-40
lines changed

2 files changed

+25
-40
lines changed

Cargo.toml

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,13 @@ license = "MPL-2.0"
1616
rustc-serialize = "0.3"
1717
tempdir = "0.3"
1818

19-
[dependencies.serde]
20-
version = ">=0.6.6, <0.8"
21-
optional = true
22-
23-
[dependencies.serde_macros]
24-
version = ">=0.6.5, <0.8"
25-
optional = true
26-
27-
[dependencies.heapsize]
28-
version = ">=0.1.1, <0.4.0"
29-
optional = true
30-
31-
[dependencies.heapsize_plugin]
32-
version = "0.1.0"
33-
optional = true
34-
3519
[dependencies]
3620
encoding = "0.2"
21+
heapsize = {version = ">=0.1.1, <0.4.0", optional = true}
22+
heapsize_plugin = {version = "0.1.0", optional = true}
3723
matches = "0.1"
24+
serde = {version = ">=0.6.6, <0.8", optional = true}
25+
serde_macros = {version = ">=0.6.5, <0.8", optional = true}
3826

3927
[features]
4028
serde-serialization = [ "serde", "serde_macros" ]

src/tokenizer.rs

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ pub struct Tokenizer<'a> {
210210
/// Counted in bytes, not code points. From 0.
211211
position: usize,
212212
/// Cache for `source_location()`
213-
last_known_line_break: Cell<(usize, usize)>,
213+
last_known_source_location: Cell<(SourcePosition, SourceLocation)>,
214214
var_functions: VarFunctions,
215215
}
216216

@@ -228,7 +228,8 @@ impl<'a> Tokenizer<'a> {
228228
Tokenizer {
229229
input: input,
230230
position: 0,
231-
last_known_line_break: Cell::new((1, 0)),
231+
last_known_source_location: Cell::new((SourcePosition(0),
232+
SourceLocation { line: 1, column: 1 })),
232233
var_functions: VarFunctions::DontCare,
233234
}
234235
}
@@ -278,37 +279,33 @@ impl<'a> Tokenizer<'a> {
278279

279280
pub fn source_location(&self, position: SourcePosition) -> SourceLocation {
280281
let target = position.0;
281-
let mut line_number;
282+
let mut location;
282283
let mut position;
283-
let (last_known_line_number, position_after_last_known_newline) =
284-
self.last_known_line_break.get();
285-
if target >= position_after_last_known_newline {
286-
position = position_after_last_known_newline;
287-
line_number = last_known_line_number;
284+
let (SourcePosition(last_known_position), last_known_location) =
285+
self.last_known_source_location.get();
286+
if target >= last_known_position {
287+
position = last_known_position;
288+
location = last_known_location;
288289
} else {
290+
// For now we’re only traversing the source *forwards* to count newlines.
291+
// So if the requested position is before the last known one,
292+
// start over from the beginning.
289293
position = 0;
290-
line_number = 1;
294+
location = SourceLocation { line: 1, column: 1 };
291295
}
292296
let mut source = &self.input[position..target];
293-
while let Some(newline_position) = source.find(&['\n', '\r', '\x0C'][..]) {
297+
while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) {
294298
let offset = newline_position +
295-
if source[newline_position..].starts_with("\r\n") {
296-
2
297-
} else {
298-
1
299-
};
299+
if source[newline_position..].starts_with("\r\n") { 2 } else { 1 };
300300
source = &source[offset..];
301301
position += offset;
302-
line_number += 1;
302+
location.line += 1;
303+
location.column = 1;
303304
}
304305
debug_assert!(position <= target);
305-
self.last_known_line_break.set((line_number, position));
306-
SourceLocation {
307-
line: line_number,
308-
// `target == position` when `target` is at the beginning of the line,
309-
// so add 1 so that the column numbers start at 1.
310-
column: target - position + 1,
311-
}
306+
location.column += target - position;
307+
self.last_known_source_location.set((SourcePosition(target), location));
308+
location
312309
}
313310

314311
#[inline]
@@ -371,7 +368,7 @@ pub struct SourceLocation {
371368
/// The line number, starting at 1 for the first line.
372369
pub line: usize,
373370

374-
/// The column number within a line, starting at 1 for the character of the line.
371+
/// The column number within a line, starting at 1 for first the character of the line.
375372
pub column: usize,
376373
}
377374

0 commit comments

Comments
 (0)