使用Postgresql 实现快速插入测试数据_PostgreSQL

使用Postgresql 实现快速插入测试数据

2021-03-09 20:06kmblack1 PostgreSQL

这篇文章主要介绍了使用Postgresql 实现快速插入测试数据，具有很好的参考价值，希望对大家有所帮助。一起跟随小编过来看看吧

1.创建常规的企业信息表

									create table t_centerprises(

									 objectid bigint not null, /*唯一编号(6位行政区号+6位sn)*/

									 divid uuid not null, /*行政区唯一代码*/

									 name text not null, /*企业名称*/

									 address text not null, /*企业地址*/

									 post text, /*企业邮编*/

									 contacts text, /*联系人*/

									 tel text, /*联系电话*/

									 fax text, /*传真*/

									 describe text, /*企业备注*/ 

									 date timestamp default now() not null, /*创建日期*/

									 constraint pk_centerprisess_objectid primary key (objectid),

									 constraint fk_centerprises_divid foreign key(divid) references ts_divisions(objectid) on delete cascade

									);

									create index idx_centerprises_divid on t_centerprises(divid);

2.需要使用的函数

									/*转换16进制到字符*/

									drop function if exists hex_to_string(text);

									create or replace function hex_to_string( text) 

									 returns text as

									$$

									 declare

									 result text;

									 begin

									 execute 'select U&''\' || $1 || '''' INTO result;

									 return result;

									 end;

									$$ language plpgsql;

									/*随机生成汉字

									 汉字范围U+4E00..U+9FA5

									*/

									drop function if exists gen_random_zh(int,int);

									create or replace function gen_random_zh(imin int,imax int) 

									 returns text as 

									$$

									 declare

									 vlen integer;

									 result text;

									 begin

									 result := '';

									 vlen = floor(random()*(imax-imin)+imin);

									 for i in 1..vlen loop

									  result := result || hex_to_string(to_hex(floor(random()*(42191-19968)+19968)::integer));

									 end loop;

									 return result;

									 end;

									$$ language plpgsql;

3.常规测试数据插入(5000000条)

									insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe)

									  select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,'110101',

									  gen_random_zh(5,25) as name,gen_random_zh(10,50) as address,

									  floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts,

									  floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax,

									  gen_random_zh(32,128) as describe

									 from generate_series(1,5000000) as id;

在普通pc机上插入,大概完成时间约8小时,过程不可监控,并且cpu/内存占用率高,磁盘基本满负荷动作,读写率基本上都是100%.

4.改进后的方法, 插入(10000000条)

									do $$

									 declare vStart bigint;

									 declare vEnd bigint;

									 declare MAXVALE bigint;

									 declare INTERVAL bigint;

									 declare vprovince integer;

									 declare vprefecture integer;

									 declare vcounty integer;

									 declare vdivid text;

									 declare vdividex uuid;

									begin

									 vprovince := 10;vprefecture := 1;vcounty := 1;

									 MAXVALE := 1000000;

									 INTERVAL := 1000; vStart := 1 ;vEnd := INTERVAL;

									 vdivid := (lpad(vprovince::text,2,'0') || lpad(vprefecture::text,2,'0') || lpad(vcounty::text,2,'0'))::text;

									 vdividex := (select objectid from ts_divisions where province=vprovince and prefecture=vprefecture and county=vcounty);

									 loop 

									 insert into t_centerprises(objectid,divid,name,address,post,contacts,tel,fax,describe)

									  select (vdivid|| lpad(id::text,6,'0'))::bigint as objectid,vdividex as divid,

									  gen_random_zh(5,25) as name,gen_random_zh(10,50) as address,

									  floor(random()*(699999-600000)+600000) as post,gen_random_zh(2,8) as contacts,

									  floor(random()*(69999999-60000000)+60000000) as tel,floor(random()*(69999999-60000000)+60000000) as fax,

									  gen_random_zh(32,128) as describe

									 from generate_series(vStart,vEnd) as id;

									 raise notice '%', vEnd;

									 vStart := vEnd + 1; vEnd := vEnd + INTERVAL;

									 if( vEnd > MAXVALE ) then

									  return;

									 elsif(vEnd = MAXVALE) then

									  vEnd := vEnd - 1;

									 end if;

									 end loop;

									end$$;

因为运算原因, cpu/内存占用率仍然很高, 硬盘负荷较小,读写率也比较低,大概完成时间约1.5小时.

补充：postgreSQL数据库向表中快速插入1000000条数据

不用创建函数，直接向表中快速插入1000000条数据

									create table tbl_test (id int, info text, c_time timestamp);

									insert into tbl_test select generate_series(1,100000),md5(random()::text),clock_timestamp();

									select count(id) from tbl_test; --查看个数据条数