|
Example 6 : load CSV data in your database
In this example we load the CSV city data, made available by Geonames.org, into a postgres table. For a detailed description of the data, see http://download.geonames.org/export/dump/readme.txt
Start by downloading and extracting the amalegeni source files of this file: ex06.zip
unzip ex06.zip
cd city_data
Then download datafile cities1000.zip from the geonomes site. This file contains a CSV file with all cities with a population > 1000 or seats of adm div (ca 150.000).
wget http://download.geonames.org/export/dump/cities1000.zip && \
unzip cities1000.zip && \
rm cities1000.zip
The scripts
Since you have read the documentation to previous execercises, you won't spot much new here.
Create
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| func DropTable()
<<
drop table if exists t_city
>>
run
func CreateTable()
<<
create table t_city(
geonameid int primary key
,name varchar(128)
,asciiname varchar(128)
,latitude numeric
,longitude numeric
,country varchar(5)
,population int
,elevation int
)
>>
run
|
Check
Check if table exists, and if it contains any records.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
| func Check(tablename string)
{{
if CountTableExists(tablename)<1 {
fmt.Printf("Table %q doesn't exist.\n", tablename);
} else {
fmt.Printf("Table %q exists, and contains %v records\n",
tablename, Count(tablename));
}
fmt.Println()
}}
run
{{
tablename:="t_city"
}}
func CountTableExists(name string) int
<<
select count(1)
from pg_tables
where tableowner=user
and lower(tablename)=$1
>>
func Count(tablename injection) int
<<
select count(1)
from tablename
>>
|
Load
Read the CSV file, but only insert the cities of the Benelux countries.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
| import func ( "bufio" "strings" )
func Read()
{{
f, err := os.Open("cities1000.txt")
defer f.Close()
if err != nil {
LogErr(err, "Opening datafile")
}
sc:= bufio.NewScanner(f)
for sc.Scan() {
rec := strings.Split(sc.Text(), "\t")
if len(rec) > 16 && ( rec[8] == "BE" || rec[8] == "LU" || rec[8] == "NL" ) {
Insert( rec[0], rec[1], rec[2], rec[4], rec[5],rec[8],rec[14],rec[16] )
}
}
if err := sc.Err(); err != nil {
LogErr(err, "After scanning")
}
}}
run
func Insert(
geonameid string,
name string,
asciiname string,
latitude string,
longitude string,
country string,
population string,
elevation string
)
<<
insert into t_city (
geonameid
,name
,asciiname
,latitude
,longitude
,country
,population
,elevation
) values (
cast($1 as integer)
,$2
,$3
,cast($4 as float)
,cast($5 as float)
,$6
,cast($7 as integer)
,cast($8 as integer)
)
>>
|
Bulk Load
The above load is a bit slow, because of inserting/committing 1 row at a time, here's a faster version, doing bulk-inserts:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
| import func ( "bufio" "strings" )
func Read()
{{
f, err := os.Open("cities1000.txt")
defer f.Close()
if err != nil {
LogErr(err, "opening datafile")
return
}
citylist:=make([]City,0,0)
sc:= bufio.NewScanner(f)
for sc.Scan() {
rec := strings.Split(sc.Text(), "\t")
if len(rec) > 16 && ( rec[8] == "BE" || rec[8] == "LU" || rec[8] == "NL" ) {
citylist=append(citylist, City{ rec[0], rec[1], rec[2], rec[4],
rec[5],rec[8],rec[14],rec[16] } )
}
}
if err := sc.Err(); err != nil {
fmt.Printf("ERROR: %v\n",err)
}
fmt.Printf("Inserting %v records\n", len(citylist))
Insert(citylist)
}}
run
type City struct {
Geonameid string
Name string
Asciiname string
Latitude string
Longitude string
Country string
Population string
Elevation string
}
func Insert( citylist []City )
<<
insert into t_city (
geonameid
,name
,asciiname
,latitude
,longitude
,country
,population
,elevation
) values (
cast($1 as integer)
,$2
,$3
,cast($4 as float)
,cast($5 as float)
,$6
,cast($7 as integer)
,cast($8 as integer)
)
>>
|
Destroy
Drop the created table
1
2
3
4
5
| func DropTable()
<<
drop table if exists t_city
>>
run
|
Install all
. ./setenv.sh
ls *amg | while read S
do
amgo $S
go build ${S%.amg}
done
Run 1: plain load (commit every insert)
./create
time ./load
./check
Run 2: bulk load
./create
time ./bulkload
./check
| |