@@ -118,8 +118,8 @@ impl CharPos: to_bytes::IterBytes {
118
118
}
119
119
120
120
pub struct span {
121
- lo : CharPos ,
122
- hi : CharPos ,
121
+ lo : BytePos ,
122
+ hi : BytePos ,
123
123
expn_info : Option < @ExpnInfo >
124
124
}
125
125
@@ -141,8 +141,10 @@ impl<D: Deserializer> span: Deserializable<D> {
141
141
}
142
142
}
143
143
144
- pub struct Loc < A : Pos > {
145
- file : @FileMap , line : uint , col : A
144
+ // XXX col shouldn't be CharPos because col is not an absolute location in the
145
+ // codemap, and BytePos and CharPos always represent absolute positions
146
+ pub struct Loc {
147
+ file : @FileMap , line : uint , col : CharPos
146
148
}
147
149
148
150
/// An absolute offset within the CodeMap (not a relative offset within a
@@ -178,12 +180,24 @@ pub enum FileSubstr {
178
180
pub FssExternal ( { filename: ~str , line: uint, col: CharPos } )
179
181
}
180
182
183
+ /// Identifies an offset of a multi-byte character in a FileMap
184
+ pub struct MultiByteChar {
185
+ /// The absolute offset of the character in the CodeMap
186
+ pos: BytePos ,
187
+ /// The number of bytes, >=2
188
+ bytes: uint,
189
+ /// The complete number of 'extra' bytes through this character in the
190
+ /// FileMap
191
+ sum: uint
192
+ }
193
+
181
194
pub struct FileMap {
182
195
name: FileName ,
183
196
substr: FileSubstr ,
184
197
src: @~str ,
185
198
start_pos: FilePos ,
186
- mut lines: ~[ FilePos ]
199
+ mut lines: ~[ FilePos ] ,
200
+ multibyte_chars: DVec <MultiByteChar >
187
201
}
188
202
189
203
pub impl FileMap {
@@ -194,7 +208,8 @@ pub impl FileMap {
194
208
return FileMap {
195
209
name: filename, substr: substr, src: src,
196
210
start_pos: start_pos,
197
- mut lines: ~[ ]
211
+ mut lines: ~[ ] ,
212
+ multibyte_chars: DVec ( )
198
213
} ;
199
214
}
200
215
@@ -219,6 +234,21 @@ pub impl FileMap {
219
234
str :: slice( * self . src, begin, end)
220
235
}
221
236
237
+ pub fn record_multibyte_char( & self , pos: BytePos , bytes: uint) {
238
+ assert bytes >=2 && bytes <= 4 ;
239
+ let sum = if self . multibyte_chars. len( ) > 0 {
240
+ self . multibyte_chars. last( ) . sum
241
+ } else {
242
+ 0
243
+ } ;
244
+ let sum = sum + bytes ;
245
+ let mbc = MultiByteChar {
246
+ pos: pos,
247
+ bytes: bytes,
248
+ sum: sum
249
+ } ;
250
+ self . multibyte_chars. push( mbc) ;
251
+ }
222
252
}
223
253
224
254
pub struct CodeMap {
@@ -254,12 +284,11 @@ pub impl CodeMap {
254
284
pos.line, pos.col.to_uint());
255
285
}
256
286
257
- pub fn lookup_char_pos(&self, +pos: CharPos) -> Loc<CharPos> {
258
- pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
259
- return self.lookup_pos(pos, lookup);
287
+ pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
288
+ return self.lookup_pos(pos);
260
289
}
261
290
262
- pub fn lookup_char_pos_adj(&self, +pos: CharPos )
291
+ pub fn lookup_char_pos_adj(&self, +pos: BytePos )
263
292
-> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>}
264
293
{
265
294
let loc = self.lookup_char_pos(pos);
@@ -272,7 +301,7 @@ pub impl CodeMap {
272
301
}
273
302
FssInternal(sp) => {
274
303
self.lookup_char_pos_adj(
275
- sp.lo + (pos - loc.file.start_pos.ch ))
304
+ sp.lo + (pos - loc.file.start_pos.byte ))
276
305
}
277
306
FssExternal(eloc) => {
278
307
{filename: /* FIXME (#2543) */ copy eloc.filename,
@@ -284,14 +313,13 @@ pub impl CodeMap {
284
313
}
285
314
286
315
pub fn adjust_span(&self, sp: span) -> span {
287
- pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
288
- let line = self.lookup_line(sp.lo, lookup);
316
+ let line = self.lookup_line(sp.lo);
289
317
match (line.fm.substr) {
290
318
FssNone => sp,
291
319
FssInternal(s) => {
292
320
self.adjust_span(span {
293
- lo: s.lo + (sp.lo - line.fm.start_pos.ch ),
294
- hi: s.lo + (sp.hi - line.fm.start_pos.ch ),
321
+ lo: s.lo + (sp.lo - line.fm.start_pos.byte ),
322
+ hi: s.lo + (sp.hi - line.fm.start_pos.byte ),
295
323
expn_info: sp.expn_info
296
324
})
297
325
}
@@ -321,18 +349,6 @@ pub impl CodeMap {
321
349
return @FileLines { file : lo. file , lines : lines} ;
322
350
}
323
351
324
- fn lookup_byte_offset ( & self , +chpos : CharPos )
325
- -> { fm : @FileMap , pos : BytePos } {
326
- pure fn lookup ( pos : FilePos ) -> uint { return pos. ch . to_uint ( ) ; }
327
- let { fm, line} = self . lookup_line ( chpos, lookup) ;
328
- let line_offset = fm. lines [ line] . byte - fm. start_pos . byte ;
329
- let col = chpos - fm. lines [ line] . ch ;
330
- let col_offset = str:: count_bytes ( * fm. src ,
331
- line_offset. to_uint ( ) ,
332
- col. to_uint ( ) ) ;
333
- { fm: fm, pos: line_offset + BytePos ( col_offset) }
334
- }
335
-
336
352
pub fn span_to_snippet ( & self , sp : span ) -> ~str {
337
353
let begin = self . lookup_byte_offset ( sp. lo ) ;
338
354
let end = self . lookup_byte_offset ( sp. hi ) ;
@@ -351,15 +367,14 @@ pub impl CodeMap {
351
367
}
352
368
353
369
priv impl CodeMap {
354
- fn lookup_line<A : Pos >( & self , pos: A , lookup : LookupFn )
355
- -> { fm: @FileMap , line : uint}
356
- {
370
+
371
+ fn lookup_filemap_idx( & self , +pos: BytePos ) -> uint {
357
372
let len = self . files . len ( ) ;
358
373
let mut a = 0 u;
359
374
let mut b = len;
360
375
while b - a > 1 u {
361
376
let m = ( a + b) / 2 u;
362
- if lookup ( self . files [ m] . start_pos ) > pos. to_uint ( ) {
377
+ if self . files [ m] . start_pos . byte > pos {
363
378
b = m;
364
379
} else {
365
380
a = m;
@@ -369,22 +384,40 @@ priv impl CodeMap {
369
384
fail fmt ! ( "position %u does not resolve to a source location" ,
370
385
pos. to_uint( ) )
371
386
}
372
- let f = self . files [ a] ;
373
- a = 0 u;
374
- b = vec:: len ( f. lines ) ;
387
+
388
+ return a;
389
+ }
390
+
391
+ fn lookup_line ( & self , +pos : BytePos )
392
+ -> { fm : @FileMap , line : uint }
393
+ {
394
+ let idx = self . lookup_filemap_idx ( pos) ;
395
+ let f = self . files [ idx] ;
396
+ let mut a = 0 u;
397
+ let mut b = vec:: len ( f. lines ) ;
375
398
while b - a > 1 u {
376
399
let m = ( a + b) / 2 u;
377
- if lookup ( f. lines [ m] ) > pos. to_uint ( ) { b = m; } else { a = m; }
400
+ if f. lines [ m] . byte > pos { b = m; } else { a = m; }
378
401
}
379
402
return { fm: f, line: a} ;
380
403
}
381
404
382
- fn lookup_pos < A : Pos Num > ( & self , pos : A , lookup : LookupFn ) -> Loc < A > {
383
- let { fm: f , line : a } = self . lookup_line ( pos, lookup) ;
405
+ fn lookup_pos ( & self , +pos : BytePos ) -> Loc {
406
+ let { fm: f , line : a } = self . lookup_line ( pos) ;
407
+ let line = a + 1 u; // Line numbers start at 1
408
+ let chpos = self . bytepos_to_local_charpos ( pos) ;
409
+ let linebpos = f. lines [ a] . byte ;
410
+ let linechpos = self . bytepos_to_local_charpos ( linebpos) ;
411
+ debug ! ( "codemap: byte pos %? is on the line at byte pos %?" ,
412
+ pos, linebpos) ;
413
+ debug ! ( "codemap: char pos %? is on the line at char pos %?" ,
414
+ chpos, linechpos) ;
415
+ debug ! ( "codemap: byte is on line: %?" , line) ;
416
+ assert chpos >= linechpos;
384
417
return Loc {
385
418
file : f,
386
- line : a + 1 u ,
387
- col : pos - from_uint ( lookup ( f . lines [ a ] ) )
419
+ line : line ,
420
+ col : chpos - linechpos
388
421
} ;
389
422
}
390
423
@@ -394,6 +427,40 @@ priv impl CodeMap {
394
427
return fmt ! ( "%s:%u:%u: %u:%u" , lo. file. name,
395
428
lo. line, lo. col. to_uint( ) , hi. line, hi. col. to_uint( ) )
396
429
}
430
+
431
+ fn lookup_byte_offset ( & self , +bpos : BytePos )
432
+ -> { fm : @FileMap , pos : BytePos } {
433
+ let idx = self . lookup_filemap_idx ( bpos) ;
434
+ let fm = self . files [ idx] ;
435
+ let offset = bpos - fm. start_pos . byte ;
436
+ return { fm: fm, pos: offset} ;
437
+ }
438
+
439
+ // Converts an absolute BytePos to a CharPos relative to the file it is
440
+ // located in
441
+ fn bytepos_to_local_charpos ( & self , +bpos : BytePos ) -> CharPos {
442
+ debug ! ( "codemap: converting %? to char pos" , bpos) ;
443
+ let idx = self . lookup_filemap_idx ( bpos) ;
444
+ let map = self . files [ idx] ;
445
+
446
+ // The number of extra bytes due to multibyte chars in the FileMap
447
+ let mut total_extra_bytes = 0 ;
448
+
449
+ for map. multibyte_chars. each |mbc| {
450
+ debug ! ( "codemap: %?-byte char at %?" , mbc. bytes, mbc. pos) ;
451
+ if mbc. pos < bpos {
452
+ total_extra_bytes += mbc. bytes ;
453
+ // We should never see a byte position in the middle of a
454
+ // character
455
+ assert bpos == mbc. pos
456
+ || bpos. to_uint ( ) >= mbc. pos . to_uint ( ) + mbc. bytes ;
457
+ } else {
458
+ break ;
459
+ }
460
+ }
461
+
462
+ CharPos ( bpos. to_uint ( ) - total_extra_bytes)
463
+ }
397
464
}
398
465
399
466
//
0 commit comments