Skip to content

Commit 81d2015

Browse files
committed
Change spans to use byte offsets instead of char offsets
1 parent 8cba337 commit 81d2015

File tree

12 files changed

+161
-89
lines changed

12 files changed

+161
-89
lines changed

src/librustc/middle/trans/debuginfo.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ type compile_unit_md = {name: ~str};
112112
type subprogram_md = {id: ast::node_id};
113113
type local_var_md = {id: ast::node_id};
114114
type tydesc_md = {hash: uint};
115-
type block_md = {start: codemap::Loc<CharPos>, end: codemap::Loc<CharPos>};
115+
type block_md = {start: codemap::Loc, end: codemap::Loc};
116116
type argument_md = {id: ast::node_id};
117117
type retval_md = {id: ast::node_id};
118118

src/libsyntax/ast_util.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
use codemap::{span, CharPos};
1+
use codemap::{span, BytePos};
22
use ast::*;
33

4-
pure fn spanned<T>(+lo: CharPos, +hi: CharPos, +t: T) -> spanned<T> {
4+
pure fn spanned<T>(+lo: BytePos, +hi: BytePos, +t: T) -> spanned<T> {
55
respan(mk_sp(lo, hi), move t)
66
}
77

@@ -14,12 +14,12 @@ pure fn dummy_spanned<T>(+t: T) -> spanned<T> {
1414
}
1515

1616
/* assuming that we're not in macro expansion */
17-
pure fn mk_sp(+lo: CharPos, +hi: CharPos) -> span {
17+
pure fn mk_sp(+lo: BytePos, +hi: BytePos) -> span {
1818
span {lo: lo, hi: hi, expn_info: None}
1919
}
2020

2121
// make this a const, once the compiler supports it
22-
pure fn dummy_sp() -> span { return mk_sp(CharPos(0), CharPos(0)); }
22+
pure fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }
2323

2424

2525

src/libsyntax/attr.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use either::Either;
66
use diagnostic::span_handler;
77
use ast_util::{spanned, dummy_spanned};
88
use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
9-
use codemap::CharPos;
9+
use codemap::BytePos;
1010

1111
// Constructors
1212
export mk_name_value_item_str;
@@ -76,7 +76,7 @@ fn mk_attr(item: @ast::meta_item) -> ast::attribute {
7676
}
7777

7878
fn mk_sugared_doc_attr(text: ~str,
79-
+lo: CharPos, +hi: CharPos) -> ast::attribute {
79+
+lo: BytePos, +hi: BytePos) -> ast::attribute {
8080
let lit = spanned(lo, hi, ast::lit_str(@text));
8181
let attr = {
8282
style: doc_comment_style(text),

src/libsyntax/codemap.rs

+106-39
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ impl CharPos: to_bytes::IterBytes {
118118
}
119119

120120
pub struct span {
121-
lo: CharPos,
122-
hi: CharPos,
121+
lo: BytePos,
122+
hi: BytePos,
123123
expn_info: Option<@ExpnInfo>
124124
}
125125

@@ -141,8 +141,10 @@ impl<D: Deserializer> span: Deserializable<D> {
141141
}
142142
}
143143

144-
pub struct Loc<A: Pos> {
145-
file: @FileMap, line: uint, col: A
144+
// XXX col shouldn't be CharPos because col is not an absolute location in the
145+
// codemap, and BytePos and CharPos always represent absolute positions
146+
pub struct Loc {
147+
file: @FileMap, line: uint, col: CharPos
146148
}
147149

148150
/// An absolute offset within the CodeMap (not a relative offset within a
@@ -178,12 +180,24 @@ pub enum FileSubstr {
178180
pub FssExternal({filename: ~str, line: uint, col: CharPos})
179181
}
180182

183+
/// Identifies an offset of a multi-byte character in a FileMap
184+
pub struct MultiByteChar {
185+
/// The absolute offset of the character in the CodeMap
186+
pos: BytePos,
187+
/// The number of bytes, >=2
188+
bytes: uint,
189+
/// The complete number of 'extra' bytes through this character in the
190+
/// FileMap
191+
sum: uint
192+
}
193+
181194
pub struct FileMap {
182195
name: FileName,
183196
substr: FileSubstr,
184197
src: @~str,
185198
start_pos: FilePos,
186-
mut lines: ~[FilePos]
199+
mut lines: ~[FilePos],
200+
multibyte_chars: DVec<MultiByteChar>
187201
}
188202

189203
pub impl FileMap {
@@ -194,7 +208,8 @@ pub impl FileMap {
194208
return FileMap {
195209
name: filename, substr: substr, src: src,
196210
start_pos: start_pos,
197-
mut lines: ~[]
211+
mut lines: ~[],
212+
multibyte_chars: DVec()
198213
};
199214
}
200215

@@ -219,6 +234,21 @@ pub impl FileMap {
219234
str::slice(*self.src, begin, end)
220235
}
221236

237+
pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
238+
assert bytes >=2 && bytes <= 4;
239+
let sum = if self.multibyte_chars.len() > 0 {
240+
self.multibyte_chars.last().sum
241+
} else {
242+
0
243+
};
244+
let sum = sum + bytes;
245+
let mbc = MultiByteChar {
246+
pos: pos,
247+
bytes: bytes,
248+
sum: sum
249+
};
250+
self.multibyte_chars.push(mbc);
251+
}
222252
}
223253

224254
pub struct CodeMap {
@@ -254,12 +284,11 @@ pub impl CodeMap {
254284
pos.line, pos.col.to_uint());
255285
}
256286
257-
pub fn lookup_char_pos(&self, +pos: CharPos) -> Loc<CharPos> {
258-
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
259-
return self.lookup_pos(pos, lookup);
287+
pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
288+
return self.lookup_pos(pos);
260289
}
261290
262-
pub fn lookup_char_pos_adj(&self, +pos: CharPos)
291+
pub fn lookup_char_pos_adj(&self, +pos: BytePos)
263292
-> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>}
264293
{
265294
let loc = self.lookup_char_pos(pos);
@@ -272,7 +301,7 @@ pub impl CodeMap {
272301
}
273302
FssInternal(sp) => {
274303
self.lookup_char_pos_adj(
275-
sp.lo + (pos - loc.file.start_pos.ch))
304+
sp.lo + (pos - loc.file.start_pos.byte))
276305
}
277306
FssExternal(eloc) => {
278307
{filename: /* FIXME (#2543) */ copy eloc.filename,
@@ -284,14 +313,13 @@ pub impl CodeMap {
284313
}
285314
286315
pub fn adjust_span(&self, sp: span) -> span {
287-
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
288-
let line = self.lookup_line(sp.lo, lookup);
316+
let line = self.lookup_line(sp.lo);
289317
match (line.fm.substr) {
290318
FssNone => sp,
291319
FssInternal(s) => {
292320
self.adjust_span(span {
293-
lo: s.lo + (sp.lo - line.fm.start_pos.ch),
294-
hi: s.lo + (sp.hi - line.fm.start_pos.ch),
321+
lo: s.lo + (sp.lo - line.fm.start_pos.byte),
322+
hi: s.lo + (sp.hi - line.fm.start_pos.byte),
295323
expn_info: sp.expn_info
296324
})
297325
}
@@ -321,18 +349,6 @@ pub impl CodeMap {
321349
return @FileLines {file: lo.file, lines: lines};
322350
}
323351

324-
fn lookup_byte_offset(&self, +chpos: CharPos)
325-
-> {fm: @FileMap, pos: BytePos} {
326-
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
327-
let {fm, line} = self.lookup_line(chpos, lookup);
328-
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
329-
let col = chpos - fm.lines[line].ch;
330-
let col_offset = str::count_bytes(*fm.src,
331-
line_offset.to_uint(),
332-
col.to_uint());
333-
{fm: fm, pos: line_offset + BytePos(col_offset)}
334-
}
335-
336352
pub fn span_to_snippet(&self, sp: span) -> ~str {
337353
let begin = self.lookup_byte_offset(sp.lo);
338354
let end = self.lookup_byte_offset(sp.hi);
@@ -351,15 +367,14 @@ pub impl CodeMap {
351367
}
352368

353369
priv impl CodeMap {
354-
fn lookup_line<A: Pos>(&self, pos: A, lookup: LookupFn)
355-
-> {fm: @FileMap, line: uint}
356-
{
370+
371+
fn lookup_filemap_idx(&self, +pos: BytePos) -> uint {
357372
let len = self.files.len();
358373
let mut a = 0u;
359374
let mut b = len;
360375
while b - a > 1u {
361376
let m = (a + b) / 2u;
362-
if lookup(self.files[m].start_pos) > pos.to_uint() {
377+
if self.files[m].start_pos.byte > pos {
363378
b = m;
364379
} else {
365380
a = m;
@@ -369,22 +384,40 @@ priv impl CodeMap {
369384
fail fmt!("position %u does not resolve to a source location",
370385
pos.to_uint())
371386
}
372-
let f = self.files[a];
373-
a = 0u;
374-
b = vec::len(f.lines);
387+
388+
return a;
389+
}
390+
391+
fn lookup_line(&self, +pos: BytePos)
392+
-> {fm: @FileMap, line: uint}
393+
{
394+
let idx = self.lookup_filemap_idx(pos);
395+
let f = self.files[idx];
396+
let mut a = 0u;
397+
let mut b = vec::len(f.lines);
375398
while b - a > 1u {
376399
let m = (a + b) / 2u;
377-
if lookup(f.lines[m]) > pos.to_uint() { b = m; } else { a = m; }
400+
if f.lines[m].byte > pos { b = m; } else { a = m; }
378401
}
379402
return {fm: f, line: a};
380403
}
381404

382-
fn lookup_pos<A: Pos Num>(&self, pos: A, lookup: LookupFn) -> Loc<A> {
383-
let {fm: f, line: a} = self.lookup_line(pos, lookup);
405+
fn lookup_pos(&self, +pos: BytePos) -> Loc {
406+
let {fm: f, line: a} = self.lookup_line(pos);
407+
let line = a + 1u; // Line numbers start at 1
408+
let chpos = self.bytepos_to_local_charpos(pos);
409+
let linebpos = f.lines[a].byte;
410+
let linechpos = self.bytepos_to_local_charpos(linebpos);
411+
debug!("codemap: byte pos %? is on the line at byte pos %?",
412+
pos, linebpos);
413+
debug!("codemap: char pos %? is on the line at char pos %?",
414+
chpos, linechpos);
415+
debug!("codemap: byte is on line: %?", line);
416+
assert chpos >= linechpos;
384417
return Loc {
385418
file: f,
386-
line: a + 1u,
387-
col: pos - from_uint(lookup(f.lines[a]))
419+
line: line,
420+
col: chpos - linechpos
388421
};
389422
}
390423

@@ -394,6 +427,40 @@ priv impl CodeMap {
394427
return fmt!("%s:%u:%u: %u:%u", lo.file.name,
395428
lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
396429
}
430+
431+
fn lookup_byte_offset(&self, +bpos: BytePos)
432+
-> {fm: @FileMap, pos: BytePos} {
433+
let idx = self.lookup_filemap_idx(bpos);
434+
let fm = self.files[idx];
435+
let offset = bpos - fm.start_pos.byte;
436+
return {fm: fm, pos: offset};
437+
}
438+
439+
// Converts an absolute BytePos to a CharPos relative to the file it is
440+
// located in
441+
fn bytepos_to_local_charpos(&self, +bpos: BytePos) -> CharPos {
442+
debug!("codemap: converting %? to char pos", bpos);
443+
let idx = self.lookup_filemap_idx(bpos);
444+
let map = self.files[idx];
445+
446+
// The number of extra bytes due to multibyte chars in the FileMap
447+
let mut total_extra_bytes = 0;
448+
449+
for map.multibyte_chars.each |mbc| {
450+
debug!("codemap: %?-byte char at %?", mbc.bytes, mbc.pos);
451+
if mbc.pos < bpos {
452+
total_extra_bytes += mbc.bytes;
453+
// We should never see a byte position in the middle of a
454+
// character
455+
assert bpos == mbc.pos
456+
|| bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes;
457+
} else {
458+
break;
459+
}
460+
}
461+
462+
CharPos(bpos.to_uint() - total_extra_bytes)
463+
}
397464
}
398465

399466
//

src/libsyntax/ext/qquote.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use parse::parser;
44
use parse::parser::{Parser, parse_from_source_str};
55
use dvec::DVec;
66
use parse::token::ident_interner;
7-
use codemap::CharPos;
7+
use codemap::{CharPos, BytePos};
88

99
use fold::*;
1010
use visit::*;
@@ -16,13 +16,13 @@ use io::*;
1616
use codemap::span;
1717

1818
struct gather_item {
19-
lo: CharPos,
20-
hi: CharPos,
19+
lo: BytePos,
20+
hi: BytePos,
2121
e: @ast::expr,
2222
constr: ~str
2323
}
2424

25-
type aq_ctxt = @{lo: CharPos, gather: DVec<gather_item>};
25+
type aq_ctxt = @{lo: BytePos, gather: DVec<gather_item>};
2626
enum fragment {
2727
from_expr(@ast::expr),
2828
from_ty(@ast::Ty)
@@ -115,7 +115,7 @@ impl @ast::pat: qq_helper {
115115
fn get_fold_fn() -> ~str {~"fold_pat"}
116116
}
117117

118-
fn gather_anti_quotes<N: qq_helper>(lo: CharPos, node: N) -> aq_ctxt
118+
fn gather_anti_quotes<N: qq_helper>(lo: BytePos, node: N) -> aq_ctxt
119119
{
120120
let v = @{visit_expr: |node, &&cx, v| visit_aq(node, ~"from_expr", cx, v),
121121
visit_ty: |node, &&cx, v| visit_aq(node, ~"from_ty", cx, v),
@@ -227,7 +227,7 @@ fn finish<T: qq_helper>
227227
let mut str2 = ~"";
228228
enum state {active, skip(uint), blank};
229229
let mut state = active;
230-
let mut i = CharPos(0u);
230+
let mut i = BytePos(0u);
231231
let mut j = 0u;
232232
let g_len = cx.gather.len();
233233
for str::chars_each(*str) |ch| {
@@ -244,7 +244,7 @@ fn finish<T: qq_helper>
244244
blank if is_space(ch) => str::push_char(&mut str2, ch),
245245
blank => str::push_char(&mut str2, ' ')
246246
}
247-
i += CharPos(1u);
247+
i += BytePos(1u);
248248
if (j < g_len && i == cx.gather[j].hi) {
249249
assert ch == ')';
250250
state = active;

src/libsyntax/ext/tt/macro_parser.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use dvec::DVec;
1111
use ast::{matcher, match_tok, match_seq, match_nonterminal, ident};
1212
use ast_util::mk_sp;
1313
use std::map::HashMap;
14-
use codemap::CharPos;
14+
use codemap::BytePos;
1515

1616
/* This is an Earley-like parser, without support for in-grammar nonterminals,
1717
only by calling out to the main rust parser for named nonterminals (which it
@@ -103,7 +103,7 @@ type matcher_pos = ~{
103103
mut up: matcher_pos_up, // mutable for swapping only
104104
matches: ~[DVec<@named_match>],
105105
match_lo: uint, match_hi: uint,
106-
sp_lo: CharPos,
106+
sp_lo: BytePos,
107107
};
108108

109109
fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos {
@@ -123,7 +123,7 @@ fn count_names(ms: &[matcher]) -> uint {
123123
}
124124

125125
#[allow(non_implicitly_copyable_typarams)]
126-
fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: CharPos)
126+
fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: BytePos)
127127
-> matcher_pos {
128128
let mut match_idx_hi = 0u;
129129
for ms.each() |elt| {

0 commit comments

Comments
 (0)