using TSML
using DataFrames
using Dates
using CSV
using Random
fname ="testdata.csv"
dat = CSV.read(fname)
rename!(dat,names(dat)[1]=>:Date,names(dat)[2]=>:Value)
dat.Date = DateTime.(dat.Date,"d/m/y H:M")
orig = deepcopy(dat)
filter1 = DateValgator()
filter2 = DateValNNer(Dict(:nnsize=>1))
DateValNNer(nothing, Dict{Symbol,Any}(:strict => true,:dateinterval => 1 hour,:aggregator => :median,:nnsize => 1,:missdirection => :symmetric))
fit!(filter1,dat,[])
res1=transform!(filter1,dat)
(first(res1,5),size(res1,1))
(5×2 DataFrame │ Row │ Date │ Value │ │ │ DateTime │ Float64⍰ │ ├─────┼─────────────────────┼──────────┤ │ 1 │ 2014-01-01T00:00:00 │ 10.0 │ │ 2 │ 2014-01-01T01:00:00 │ 9.9 │ │ 3 │ 2014-01-01T02:00:00 │ 10.0 │ │ 4 │ 2014-01-01T03:00:00 │ 10.0 │ │ 5 │ 2014-01-01T04:00:00 │ 10.0 │, 8761)
fit!(filter2,res1,[])
res2=transform!(filter2,res1)
(first(res2,5),size(res2,1))
(5×2 DataFrame │ Row │ Date │ Value │ │ │ DateTime │ Float64⍰ │ ├─────┼─────────────────────┼──────────┤ │ 1 │ 2014-01-01T00:00:00 │ 10.0 │ │ 2 │ 2014-01-01T01:00:00 │ 9.9 │ │ 3 │ 2014-01-01T02:00:00 │ 10.0 │ │ 4 │ 2014-01-01T03:00:00 │ 10.0 │ │ 5 │ 2014-01-01T04:00:00 │ 10.0 │, 8761)
mypipeline = Pipeline(Dict(
:transformers => [filter1,filter2]
)
)
Pipeline(nothing, Dict{Symbol,Union{Nothing, Array{Transformer,1}}}(:transformers => Transformer[DateValgator(Dict{Symbol,Any}(:dateinterval => 1 hour,:aggregator => :median), Dict{Symbol,Any}(:dateinterval => 1 hour,:aggregator => :median)), DateValNNer(Dict{Symbol,Any}(:loopcount => 1,:strict => true,:dateinterval => 1 hour,:missingcount => 4931,:aggregator => :median,:nnsize => 1,:missdirection => :symmetric), Dict{Symbol,Any}(:loopcount => 1,:strict => true,:dateinterval => 1 hour,:missingcount => 4931,:aggregator => :median,:nnsize => 1,:missdirection => :symmetric))],:transformer_args => nothing))
fit!(mypipeline,dat,[])
resp = transform!(mypipeline,dat)
first(resp,5)
Date | Value | |
---|---|---|
DateTime | Float64⍰ | |
1 | 2014-01-01T00:00:00 | 10.0 |
2 | 2014-01-01T01:00:00 | 9.9 |
3 | 2014-01-01T02:00:00 | 10.0 |
4 | 2014-01-01T03:00:00 | 10.0 |
5 | 2014-01-01T04:00:00 | 10.0 |
using TSML.TSMLTypes
import TSML.TSMLTypes.fit!
import TSML.TSMLTypes.transform!
mutable struct MyCSVDateValReader <: Transformer
model
args
function MyCSVDateValReader(args=Dict())
default_args = Dict(
:filename => "",
:dateformat => ""
)
new(nothing,mergedict(default_args,args))
end
end
function fit!(csvrdr::MyCSVDateValReader,x::T=[],y::Vector=[]) where {T<:Union{DataFrame,Vector,Matrix}}
fname = csvrdr.args[:filename]
fmt = csvrdr.args[:dateformat]
(fname != "" && fmt != "") || error("missing filename or date format")
model = csvrdr.args
end
function transform!(csvrdr::MyCSVDateValReader,x::T=[]) where {T<:Union{DataFrame,Vector,Matrix}}
fname = csvrdr.args[:filename]
fmt = csvrdr.args[:dateformat]
df = CSV.read(fname)
ncol(df) == 2 || error("dataframe should have only two columns: Date,Value")
rename!(df,names(df)[1]=>:Date,names(df)[2]=>:Value)
df.Date = DateTime.(df.Date,fmt)
df
end
transform! (generic function with 42 methods)
csvreader = MyCSVDateValReader(Dict(:filename=>"testdata.csv",:dateformat=>"d/m/y H:M"))
fit!(csvreader)
res=transform!(csvreader)
first(res,5)
Date | Value | |
---|---|---|
DateTime | Float64 | |
1 | 2014-01-01T00:06:00 | 10.0 |
2 | 2014-01-01T00:18:00 | 10.0 |
3 | 2014-01-01T00:29:00 | 10.0 |
4 | 2014-01-01T00:40:00 | 9.9 |
5 | 2014-01-01T00:51:00 | 9.9 |
mypipeline = Pipeline(Dict(
:transformers => [csvreader,filter1,filter2]
)
)
fit!(mypipeline)
res=transform!(mypipeline)
first(res,5)
Date | Value | |
---|---|---|
DateTime | Float64⍰ | |
1 | 2014-01-01T00:00:00 | 10.0 |
2 | 2014-01-01T01:00:00 | 9.9 |
3 | 2014-01-01T02:00:00 | 10.0 |
4 | 2014-01-01T03:00:00 | 10.0 |
5 | 2014-01-01T04:00:00 | 10.0 |
filter2.args[:missingcount]
4931
mydate=DateTime(2014,1,1):Dates.Minute(15):DateTime(2014,1,3) # 15 minutes interval
values = Array{Union{Float64,Missing}}(sin.(1:0.1:length(mydate)) .+ cos.(1:0.1:length(mydate)))[1:length(mydate)];
x = DataFrame(Date=mydate,Value=values); xx = deepcopy(x);first(x,10)
nmissing=floor(nrow(x) * 0.80) |> Integer
ndxmissing=Random.shuffle(1:nrow(x))[1:nmissing]
x.Value[ndxmissing] .= missing; first(x,15)
dvtr = TSML.DateValgator()
TSML.fit!(dvtr,x,[]);TSML.fit!(dvtr,xx,[])
inputx = TSML.transform!(dvtr,x); inputxx = TSML.transform!(dvtr,xx);
dvnnr = TSML.DateValNNer(Dict(:dateinterval=>Dates.Hour(1),:nnsize=>1))
TSML.fit!(dvnnr,inputx,[])
pred_y=TSML.transform!(dvnnr,inputx);pred_yy=TSML.transform!(dvnnr,inputxx);
sqrt(sum(pred_y.Value-pred_yy.Value).^2)
0.522726120677754
rename!(pred_y,:Value=>:MissingVals);
jx = join(pred_y,pred_yy,on=:Date)
jx.error = (jx.MissingVals .- jx.Value).^2;jx
Date | MissingVals | Value | error | |
---|---|---|---|---|
DateTime | Float64⍰ | Float64⍰ | Float64 | |
1 | 2014-01-01T00:00:00 | 0.746646 | 1.36329 | 0.380248 |
2 | 2014-01-01T01:00:00 | 0.746646 | 1.19324 | 0.199444 |
3 | 2014-01-01T02:00:00 | 0.746646 | 0.804733 | 0.00337414 |
4 | 2014-01-01T03:00:00 | -0.105454 | 0.289179 | 0.155736 |
5 | 2014-01-01T04:00:00 | -0.957554 | -0.272029 | 0.469945 |
6 | 2014-01-01T05:00:00 | -0.957554 | -0.790291 | 0.0279772 |
7 | 2014-01-01T06:00:00 | -1.18563 | -1.18378 | 3.40069e-6 |
8 | 2014-01-01T07:00:00 | -1.4137 | -1.39038 | 0.000543709 |
9 | 2014-01-01T08:00:00 | -1.38614 | -1.37747 | 7.52128e-5 |
10 | 2014-01-01T09:00:00 | -1.05908 | -1.14708 | 0.00774523 |
11 | 2014-01-01T10:00:00 | -0.63353 | -0.735601 | 0.0104185 |
12 | 2014-01-01T11:00:00 | -0.207982 | -0.207982 | 0.0 |
13 | 2014-01-01T12:00:00 | 0.526896 | 0.352472 | 0.0304236 |
14 | 2014-01-01T13:00:00 | 1.26177 | 0.857279 | 0.163616 |
15 | 2014-01-01T14:00:00 | 1.26177 | 1.22674 | 0.00122732 |
16 | 2014-01-01T15:00:00 | 0.658726 | 1.40253 | 0.55324 |
17 | 2014-01-01T16:00:00 | 0.0556771 | 1.35688 | 1.69314 |
18 | 2014-01-01T17:00:00 | 0.0556771 | 1.09702 | 1.0844 |
19 | 2014-01-01T18:00:00 | 0.0556771 | 0.663961 | 0.370009 |
20 | 2014-01-01T19:00:00 | 0.0556771 | 0.126076 | 0.00495603 |
21 | 2014-01-01T20:00:00 | -0.499012 | -0.431713 | 0.00452912 |
22 | 2014-01-01T21:00:00 | -0.956576 | -0.921344 | 0.00124128 |
23 | 2014-01-01T22:00:00 | -1.41414 | -1.26552 | 0.0220894 |
24 | 2014-01-01T23:00:00 | -1.41414 | -1.40708 | 4.99116e-5 |
25 | 2014-01-02T00:00:00 | -1.30811 | -1.33167 | 0.000555052 |
26 | 2014-01-02T01:00:00 | -0.303306 | -1.04321 | 0.547464 |
27 | 2014-01-02T02:00:00 | 0.701501 | -0.590056 | 1.66812 |
28 | 2014-01-02T03:00:00 | 0.701501 | -0.0437399 | 0.555384 |
29 | 2014-01-02T04:00:00 | 0.701501 | 0.509482 | 0.0368714 |
30 | 2014-01-02T05:00:00 | 1.03306 | 0.982267 | 0.00257969 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ |