Amalegeni-Go
 

Example 6 : load CSV data in your database

In this example we load the CSV city data, made available by Geonames.org, into a postgres table. For a detailed description of the data, see http://download.geonames.org/export/dump/readme.txt

Start by downloading and extracting the amalegeni source files of this file: ex06.zip

unzip ex06.zip 
cd city_data

Then download datafile cities1000.zip from the geonomes site. This file contains a CSV file with all cities with a population > 1000 or seats of adm div (ca 150.000).

wget http://download.geonames.org/export/dump/cities1000.zip && \
                                        unzip cities1000.zip && \
                                           rm cities1000.zip 

The scripts

Since you have read the documentation to previous execercises, you won't spot much new here.

Create

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
func DropTable()
<<
    drop table if exists t_city
>>
run


func CreateTable()
<<
    create table t_city(
         geonameid  int primary key
        ,name       varchar(128)
        ,asciiname  varchar(128)
        ,latitude   numeric
        ,longitude  numeric
        ,country    varchar(5)
        ,population int
        ,elevation  int
    )
>>
run

Check

Check if table exists, and if it contains any records.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
func Check(tablename string)
{{
    if CountTableExists(tablename)<1 {
        fmt.Printf("Table %q doesn't exist.\n", tablename);
    } else {
        fmt.Printf("Table %q exists, and contains %v records\n", 
                   tablename, Count(tablename));
    }
    fmt.Println()
}}
run
{{
    tablename:="t_city"
}}



func CountTableExists(name string) int
<<
    select count(1)
    from   pg_tables
    where  tableowner=user
    and    lower(tablename)=$1
>>



func Count(tablename injection) int
<<
    select count(1)
    from   tablename
>>

Load

Read the CSV file, but only insert the cities of the Benelux countries.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import func ( "bufio" "strings" ) 

func Read()
{{ 

    f, err := os.Open("cities1000.txt")
    defer f.Close()
    if err != nil {
        LogErr(err, "Opening datafile") 
    }

    sc:= bufio.NewScanner(f)
    for sc.Scan() {
        rec := strings.Split(sc.Text(), "\t")
        if len(rec) > 16 && ( rec[8] == "BE" ||  rec[8] == "LU" || rec[8] == "NL" ) { 
            Insert( rec[0], rec[1], rec[2], rec[4], rec[5],rec[8],rec[14],rec[16] )  
        }
    }
    if err := sc.Err(); err != nil {
        LogErr(err, "After scanning") 
    }
}}
run 


func Insert( 
         geonameid  string,
         name       string, 
         asciiname  string,
         latitude   string, 
         longitude  string, 
         country    string, 
         population string,
         elevation  string
    )
<<
    insert into t_city ( 
         geonameid 
        ,name     
        ,asciiname 
        ,latitude  
        ,longitude
        ,country  
        ,population 
        ,elevation 
    ) values ( 
         cast($1 as integer) 
        ,$2
        ,$3
        ,cast($4 as float) 
        ,cast($5 as float) 
        ,$6
        ,cast($7 as integer) 
        ,cast($8 as integer) 
    ) 
>>

Bulk Load

The above load is a bit slow, because of inserting/committing 1 row at a time, here's a faster version, doing bulk-inserts:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import func ( "bufio" "strings" ) 

func Read()
{{ 

    f, err := os.Open("cities1000.txt")
    defer f.Close()
    if err != nil {
        LogErr(err, "opening datafile")
        return
    }

    citylist:=make([]City,0,0) 
    sc:= bufio.NewScanner(f)
    for sc.Scan() {
        rec := strings.Split(sc.Text(), "\t")
        if len(rec) > 16 && ( rec[8] == "BE" ||  rec[8] == "LU" || rec[8] == "NL" ) { 
            citylist=append(citylist, City{ rec[0], rec[1], rec[2], rec[4], 
                                            rec[5],rec[8],rec[14],rec[16] } ) 
        }
    }
    if err := sc.Err(); err != nil {
        fmt.Printf("ERROR: %v\n",err)
    }
    fmt.Printf("Inserting %v records\n",  len(citylist))
    Insert(citylist) 
}}
run 


type City struct { 
     Geonameid string
     Name      string 
     Asciiname string
     Latitude  string 
     Longitude  string 
     Country   string 
     Population string
     Elevation string
}

func Insert( citylist []City ) 
<<
    insert into t_city ( 
         geonameid 
        ,name     
        ,asciiname 
        ,latitude  
        ,longitude
        ,country  
        ,population 
        ,elevation 
    ) values ( 
         cast($1 as integer) 
        ,$2
        ,$3
        ,cast($4 as float) 
        ,cast($5 as float) 
        ,$6
        ,cast($7 as integer) 
        ,cast($8 as integer) 
    ) 
>>

Destroy

Drop the created table

1
2
3
4
5
func DropTable()
<<
    drop table if exists t_city
>>
run

Install all

. ./setenv.sh 
ls *amg | while read S
do 
    amgo $S
    go build ${S%.amg} 
done

Run 1: plain load (commit every insert)

./create
time ./load 
./check 

Run 2: bulk load

./create
time ./bulkload 
./check 
 
© Willem Moors, 2013 - 2018