• R/O
  • SSH

Tags
Aucun tag

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

File Info

Révision 6d251fb28c86ef80ecea7ffcc452a676b4544509
Taille 10,265 octets
l'heure 2013-10-29 22:23:56
Auteur Lorenzo Isella
Message de Log

I finalized the code for the generation of the first part of the country fiche.
Now the data files needs to be in a separate folder.
I am about to write a readme file with all the documentation.

Content

rm(list=ls())

library(reshape2)

#function to add a row in a specific position in a data frame
#see http://bit.ly/19Afqzy

insertRow <- function(existingDF, newrow, r) {
  existingDF[seq(r+1,nrow(existingDF)+1),] <- existingDF[seq(r,nrow(existingDF)),]
  existingDF[r,] <- newrow
  existingDF
}





n_ind <- 17 # the number of the highest-labelled indicator

n_extra <- 2 # number of extra indicators calculated by Alexander

n_missing <- 0 #number of missing indicators

n_ind_seq <- seq(n_ind)  #list of indicator files (with possible
 ## missing indicator numbers)

#remove the number for the missing indicator
## n_ind_seq <- n_ind_seq[-6]

sel_quarter <- "2012Q"

sel_year <- 2012
extra_year <- 2013 #sometimes I need a forecast

year_seq <- seq(2008,2013)


data_list <- list()

for (i in seq((n_ind-n_missing))){



    
 fn <- paste("./script_data/",n_ind_seq[i],".csv",sep="")

print("fn is, ")
print(fn)

 
 data_list[[i]] <- read.csv(fn,header=T)
data_list[[i]]$Value <-as.numeric(gsub(",","", data_list[[i]]$Value))

 
sel  <-grep("Germany",data_list[[i]]$GEO)

levels(data_list[[i]]$GEO) <- c(levels(data_list[[i]]$GEO),
                                "Germany")
 
data_list[[i]]$GEO[sel] <- "Germany"

data_list[[i]]$GEO <- droplevels(data_list[[i]])$GEO 

 
}


#Read the labels!

labels <- read.csv('./script_data/labels.csv',header=F)

#############################################

## #Some data manipulation on the population structure (second indicator)


## data_to_fix <- data_list[[2]]

## sel_14 <- which(data_to_fix$INDIC_DE=="Proportion of population aged 0-14 years")


## sel_24 <-   which(data_to_fix$INDIC_DE=="Proportion of population aged 15-24 years")



############################################

country_list <- unique(as.character(data_list[[1]]$GEO))[-c(1)]

#Now start building the country list!

country_data <- list()


res <- c()

res_extra <- c()


for (j in seq(length(data_list))){

sel <- which(data_list[[j]]$TIME==sel_year)

## print("sel is, ")
## print(sel)

data_temp <- data_list[[j]][sel, ]

## res <- rbind(res,as.data.frame(cbind(data_temp$TIME,
##                    data_temp$GEO,
##                    data_temp$Value
##                    )))


res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))

if (j==5){

sel <- which(data_list[[j]]$TIME==extra_year)

## print("sel is, ")
## print(sel)

data_temp <- data_list[[j]][sel, ]

res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))


}



if (j==13 ## the value of j will need to be changed later on
    ){



## print("sel is, ")
## print(sel)

data_temp <- data_list[[j]][grep(as.character(sel_year),
            as.character(data_list[[j]]$TIME)), ]



res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))


}



if (j==17 ## the value of j will need to be changed later on
    ){



year_temp <- max(data_list[[j]]$TIME)

sel <- which(data_list[[j]]$TIME==year_temp)


data_temp <- data_list[[j]][sel, ]

res <- rbind(res,subset(data_temp,select=c(TIME,GEO,Value)))



}




## for (i in seq(length(country_list))){

## sel <- which(data_temp$GEO==country_list[i])

## print("data_temp[sel, ]")
## print(data_temp[sel, ])

## res <- rbind(res,c(data_temp[sel, ]$TIME,
##                    data_temp[sel, ]$GEO,
##                    data_temp[sel, ]$Value
##                    ))

## country_data[[i]] <- data_temp[sel, ]

    
## }

}


extra_list <- list()

#Extra indicators by Alexander
for (i in seq(n_extra)){

 fn <- paste("./script_data/extra",i,".csv",sep="")

    
indic_extra <- read.csv(fn,header=T)
indic_extra$Value <-as.numeric(gsub(",","", indic_extra$Value))

 
sel  <-grep("Germany",indic_extra$GEO)

levels(indic_extra$GEO) <- c(levels(indic_extra$GEO),
                                "Germany")
 
indic_extra$GEO[sel] <- "Germany"

indic_extra$GEO <- droplevels(indic_extra)$GEO

extra_list[[i]] <- indic_extra

 
}

#######################

for (m in seq(n_extra)){

indic_extra <- extra_list[[m]]

    
for (i in seq(length(country_list))){

    
sel <- which(indic_extra$GEO==country_list[i])

country_extra <- indic_extra[sel, ]


sel <- which(country_extra$TIME==sel_year)

country_extra <- country_extra[sel, ]

sel <- which(country_extra$NACE_R2=="TOTAL")

total <- country_extra$Value[sel]

sel <- which(country_extra$NACE_R2=="A")

## print("The country is")
## print(country_list[i])
agri <- country_extra$Value[sel]/total


## print("agri is, ")

## print(agri)

sel <- which((country_extra$NACE_R2 == "B-E" ) |
             (country_extra$NACE_R2 == "F" ))

industry <- sum(country_extra$Value[sel])/total

## print("industry is, ")



## print(industry)

sel <- which(country_extra$NACE_R2 == "C")

manufacturing <- country_extra$Value[sel]/total


sel <- which((country_extra$NACE_R2 == "G-I" ) |
             (country_extra$NACE_R2 == "J" )|
             (country_extra$NACE_R2 == "K" )|
             (country_extra$NACE_R2 == "L" )|
             (country_extra$NACE_R2 == "M_N" )|
             (country_extra$NACE_R2 == "O-Q" )|
             (country_extra$NACE_R2 == "R-U" )
             )


services <- sum(country_extra$Value[sel])/total

## print("services is, ")

## print(services)

temp <- cbind(as.data.frame(rep(sel_year,4)),
              as.data.frame(rep(country_list[i],4)),
              as.data.frame(c(agri, industry,
                              manufacturing, services)))

names(temp) <- c("TIME","GEO", "Value")

res_extra <- rbind(res_extra, temp)

## temp$V2 <- as.numeric(temp$V2)

}
}


#some minor adjustements!

res_extra$Value <- round(res_extra$Value*100, 1)

#generate an extra column
n_seg <- dim(res_extra)[1]/8
ex1 <- rep(c("Agriculture weight in the economy",
           "Industry & construction weight in the economy",
  "Industry & construction weight in the economy [manufacturing]",
           "Services weight in the economy"), n_seg*2)




res_extra$V1 <- ex1

ex2 <- c(rep(c("% of GVA"), n_seg*4),
      rep(c("% of employment"), n_seg*4))


res_extra$V2 <- ex2


# This way I am done with the calculations of the indicators by
# Alexander.





#Now I deal with the renewable energies

data_renewable <- read.csv("./script_data/renewable.csv",header=T)
data_renewable <- melt(data_renewable, c("GEO", "TARGET"))

data_renewable$value <-as.numeric(gsub(",","",
                                       data_renewable$value))

data_renewable$TARGET <-as.numeric(gsub(",","",
                                       data_renewable$TARGET))



names(data_renewable) <- c("GEO", "TARGET","TIME","Value")

data_renewable$TIME <- as.character(data_renewable$TIME)


for (i in seq(dim(data_renewable)[1]) ){

data_renewable$TIME[i] <- substring(data_renewable$TIME[i],2)

}

data_renewable$TIME <- as.numeric(data_renewable$TIME)


#Now I can extract the data by country!!!



for (i in seq(length(country_list))){

sel <- which(res$GEO==country_list[i])

country_end <- res[sel,]

country_end[3,3] <- country_end[3,3]+country_end[4,3]
country_end[4,3] <- country_end[5,3]+country_end[6,3]

country_end <- country_end[-c(2,5,6),]
## country_end[5:6,3] <- country_end[6:5,3]

sel <- grep(sel_quarter,country_end$TIME)

val <- sum(country_end$Value[sel])

country_end$Value[sel[1]] <- val

country_end$TIME[sel[1]] <- sel_year

country_end <- country_end[-(sel[2:(length(sel))]),]




country_end <- cbind(country_end, labels)

country_end$V1 <- as.character(country_end$V1)
country_end$V2 <- as.character(country_end$V2)


###################################

#Calculate another indicator

data <- data_list[[5]]

sel <- which(data$GEO==country_list[i])

sel2 <- which(res_extra$GEO==country_list[i])

data <- data[sel, ]

gdp_vec <- seq(length(year_seq))

for (m in seq(length(year_seq))){

sel <- which(data$TIME==year_seq[m])
gdp_vec[m] <- data$Value[sel]


} 

gdp_vec <- 1+gdp_vec/100.

final_res <- round(((prod(gdp_vec)-1)*100),2)

newrow <- country_end[9, ]

country_end <- insertRow(country_end, newrow, 10)
country_end$V1[10] <- "Real GDP growth rate 2008-2013"
country_end$V2[10] <- "%"
country_end$Value[10] <- final_res


extra_data <- res_extra[sel2, ]

## country_end <- insertRow(country_end, extra_data[1, ], 11)
## country_end <- insertRow(country_end, extra_data[4, ], 12)
## country_end <- insertRow(country_end, extra_data[2, ], 13)
## country_end <- insertRow(country_end, extra_data[5, ], 14)
## country_end <- insertRow(country_end, extra_data[3, ], 15)
## country_end <- insertRow(country_end, extra_data[6, ], 16)


country_end <- insertRow(country_end, extra_data[1, ], 11)
country_end <- insertRow(country_end, extra_data[5, ], 12)
country_end <- insertRow(country_end, extra_data[2, ], 13)
country_end <- insertRow(country_end, extra_data[6, ], 14)
country_end <- insertRow(country_end, extra_data[3, ], 15)
country_end <- insertRow(country_end, extra_data[7, ], 16)
country_end <- insertRow(country_end, extra_data[4, ], 16)
country_end <- insertRow(country_end, extra_data[8, ], 17)

##############################

#Calculate the total government expenditure as a part of the GDP

sel_gov <- which(country_end$V1=="Total goverment expenditure" )
sel_gdp <- which(country_end$V1=="GDP" )

gov_over_gdp <-round((country_end$Value[sel_gov]/
                     country_end$Value[sel_gdp]*100),1) 



newrow <- country_end[sel_gov, ]

newrow$Value <- gov_over_gdp
newrow$V2 <- "% GDP"

country_end <- insertRow(country_end, newrow, (sel_gov+1))


###Add the last indicator
year_temp <- max(data_renewable$TIME)

sel <- which(data_renewable$TIME==year_temp)


data_temp <- data_renewable[sel, ]

sel <- which(data_temp$GEO==country_list[i])

data_temp <- data_temp[sel, ]

final_row <- c(    as.data.frame(year_temp),
               as.data.frame(country_list[i]),
               as.data.frame(data_temp$Value),
               as.data.frame("Share of renewable energy in gross financial energy consumption should be increased to 20%"),
               as.data.frame("%") )

names(final_row) <- names(country_end)



country_end <- rbind(country_end,  final_row)


country_end$Value[1] <- round((country_end$Value[1]/1e6),2)

###################################

 fn <- paste(country_list[i],".csv",sep="")
write.table(country_end,
          fn,
          row.names=FALSE, col.names=FALSE, sep=",")

}


print("So far so good")