|
| 1 | +# How to find int4 PKs with out-of-range risks in a large database |
| 2 | + |
| 3 | +>我每天都会发布一篇新的 PostgreSQL "howto" 文章。加入我的旅程吧 — 订阅、提供反馈、分享! |
| 4 | +
|
| 5 | +如今,现代 ORM 比如 Rails 或 Django 使用 int8 (bigint) 类型的主键 (PK)。然而,在旧项目中,可能存在使用 int4 (integer, int, serial) 主键的老表,这些表可能已增长并存在 int4 溢出的风险 — int4 的最大值为 [2,147,483,647](https://postgresql.org/docs/current/datatype-numeric.html),且不停机将主键从 int4 转换为 int8 并不是一项简单的任务 (TODO:在另一篇使用指南中介绍转换过程)。 |
| 6 | + |
| 7 | +以下是快速检查是否存在 int2 或 int4 PK 表的方法,并了解在每种情况下已使用了多少"容量" (来自postgres-checkup 的查询): |
| 8 | + |
| 9 | +```sql |
| 10 | +do $$ |
| 11 | +declare |
| 12 | + min_relpages int8 := 0; -- in very large DBs, skip small tables by setting this to 100 |
| 13 | + rec record; |
| 14 | + out text := ''; |
| 15 | + out1 json; |
| 16 | + i numeric := 0; |
| 17 | + val int8; |
| 18 | + ratio numeric; |
| 19 | + sql text; |
| 20 | +begin |
| 21 | + for rec in |
| 22 | + select |
| 23 | + c.oid, |
| 24 | + spcname as tblspace, |
| 25 | + nspname as schema_name, |
| 26 | + relname as table_name, |
| 27 | + t.typname, |
| 28 | + pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) as seq, |
| 29 | + min(attname) as attname |
| 30 | + from pg_index i |
| 31 | + join pg_class c on c.oid = i.indrelid |
| 32 | + left join pg_tablespace tsp on tsp.oid = reltablespace |
| 33 | + left join pg_namespace n on n.oid = c.relnamespace |
| 34 | + join pg_attribute a on |
| 35 | + a.attrelid = i.indrelid |
| 36 | + and a.attnum = any(i.indkey) |
| 37 | + join pg_type t on t.oid = atttypid |
| 38 | + where |
| 39 | + i.indisprimary |
| 40 | + and ( |
| 41 | + c.relpages >= min_relpages |
| 42 | + or pg_get_serial_sequence(format('%I.%I', nspname, relname), attname) is not null |
| 43 | + ) and t.typname in ('int2', 'int4') |
| 44 | + and nspname <> 'pg_toast' |
| 45 | + group by 1, 2, 3, 4, 5, 6 |
| 46 | + having count(*) = 1 -- skip PKs with 2+ columns (TODO: analyze them too) |
| 47 | + loop |
| 48 | + raise debug 'table: %', rec.table_name; |
| 49 | + |
| 50 | + if rec.seq is null then |
| 51 | + sql := format('select max(%I) from %I.%I;', rec.attname, rec.schema_name, rec.table_name); |
| 52 | + else |
| 53 | + sql := format('select last_value from %s;', rec.seq); |
| 54 | + end if; |
| 55 | + |
| 56 | + raise debug 'sql: %', sql; |
| 57 | + execute sql into val; |
| 58 | + |
| 59 | + if rec.typname = 'int4' then |
| 60 | + ratio := (val::numeric / 2^31)::numeric; |
| 61 | + elsif rec.typname = 'int2' then |
| 62 | + ratio := (val::numeric / 2^15)::numeric; |
| 63 | + else |
| 64 | + assert false, 'unreachable point'; |
| 65 | + end if; |
| 66 | + |
| 67 | + if ratio > 0.1 then -- report only if > 10% of capacity is reached |
| 68 | + i := i + 1; |
| 69 | + |
| 70 | + out1 := json_build_object( |
| 71 | + 'table', ( |
| 72 | + coalesce(nullif(quote_ident(rec.schema_name), 'public') || '.', '') |
| 73 | + || quote_ident(rec.table_name) |
| 74 | + ), |
| 75 | + 'pk', rec.attname, |
| 76 | + 'type', rec.typname, |
| 77 | + 'current_max_value', val, |
| 78 | + 'capacity_used_percent', round(100 * ratio, 2) |
| 79 | + ); |
| 80 | + |
| 81 | + raise debug 'cur: %', out1; |
| 82 | + |
| 83 | + if out <> '' then |
| 84 | + out := out || e',\n'; |
| 85 | + end if; |
| 86 | + |
| 87 | + out := out || format(' "%s": %s', rec.table_name, out1); |
| 88 | + end if; |
| 89 | + end loop; |
| 90 | + |
| 91 | + raise info e'{\n%\n}', out; |
| 92 | +end; |
| 93 | +$$ language plpgsql; |
| 94 | +``` |
| 95 | + |
| 96 | +输出示例 |
| 97 | + |
| 98 | +```sql |
| 99 | +INFO: { |
| 100 | + "oldbig": {"table" : "oldbig", "pk" : "id", "type" : "int4", "current_max_value" : 2107480000, "capacity_used_percent" : 98.14}, |
| 101 | + "oldbig2": {"table" : "oldbig2", "pk" : "id", "type" : "int4", "current_max_value" : 1107480000, "capacity_used_percent" : 51.57} |
| 102 | +} |
| 103 | +``` |
0 commit comments