diff --git a/.travis.yml b/.travis.yml index fd70f4f..4432602 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,7 @@ os: - linux - osx julia: - - 0.4 - - 0.5 + - 0.6 - nightly notifications: email: false diff --git a/README.md b/README.md index 28dfda8..ad68a94 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ Swiss knife for machine learning. [![Build Status](https://travis-ci.org/JuliaStats/MLBase.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/MLBase.jl) -[![MLBase](http://pkg.julialang.org/badges/MLBase_0.4.svg)](http://pkg.julialang.org/?pkg=MLBase) -[![MLBase](http://pkg.julialang.org/badges/MLBase_0.5.svg)](http://pkg.julialang.org/?pkg=MLBase) +[![MLBase](http://pkg.julialang.org/badges/MLBase_0.6.svg)](http://pkg.julialang.org/?pkg=MLBase) [![Coveralls](https://coveralls.io/repos/github/JuliaStats/MLBase.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaStats/MLBase.jl?branch=master) This package does not implement specific machine learning algorithms. Instead, it provides a collection of useful tools to support machine learning programs, including: diff --git a/REQUIRE b/REQUIRE index 5a0c40e..4a27dad 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,5 +1,4 @@ -julia 0.4 +julia 0.6 Reexport StatsBase 0.6.9- -Iterators -Compat 0.8.7 +IterTools diff --git a/src/MLBase.jl b/src/MLBase.jl index c9cca17..675e4a5 100644 --- a/src/MLBase.jl +++ b/src/MLBase.jl @@ -1,7 +1,7 @@ module MLBase using Reexport - using Iterators + using IterTools using Compat using Compat: view @reexport using StatsBase diff --git a/src/classification.jl b/src/classification.jl index 48749ae..9c0f72f 100644 --- a/src/classification.jl +++ b/src/classification.jl @@ -32,7 +32,7 @@ end classify!(r::IntegerVector, x::RealMatrix) = classify!(r, x, Forward) -classify(x::RealMatrix, ord::Ordering) = classify!(Array(Int, size(x,2)), x, ord) +classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(size(x,2)), x, ord) classify(x::RealMatrix) = classify(x, Forward) # classify with score(s) @@ -70,8 +70,8 @@ classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) = function classify_withscores{T<:Real}(x::RealMatrix{T}, ord::Ordering) n = size(x, 2) - r = Array(Int, n) - s = Array(T, n) + r = Array{Int}(n) + s = Array{T}(n) return classify_withscores!(r, s, x, ord) end @@ -97,7 +97,7 @@ end classify!(r::IntegerVector, x::RealMatrix, t::Real) = classify!(r, x, t, Forward) -classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array(Int, size(x,2)), x, t, ord) +classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(size(x,2)), x, t, ord) classify(x::RealMatrix, t::Real) = classify(x, t, Forward) @@ -107,13 +107,13 @@ immutable LabelMap{K} vs::Vector{K} v2i::Dict{K,Int} - function LabelMap(vs, v2i) + function LabelMap{K}(vs, v2i) where K length(vs) == length(v2i) || throw(DimensionMismatch("lengths of vs and v2i mismatch")) - new(vs, v2i) + new(vs,v2i) end end -LabelMap{K}(vs::Vector{K}, v2i::Dict{K,Int}) = LabelMap{K}(vs, v2i) +LabelMap{K}(vs::Vector{K}, v2i::Dict{K,Int})= LabelMap{K}(vs, v2i) length(lmap::LabelMap) = length(lmap.vs) keys(lmap::LabelMap) = lmap.vs @@ -154,7 +154,7 @@ labeldecode{T}(lmap::LabelMap{T}, ys::AbstractArray{Int}) = ## group labels function groupindices(k::Int, xs::IntegerVector; warning::Bool=true) - gs = Array(Vector{Int}, k) + gs = Array{Vector{Int}}(k) for i = 1:k gs[i] = Int[] end @@ -176,7 +176,7 @@ end function groupindices{T}(lmap::LabelMap{T}, xs::AbstractArray{T}) k = length(lmap) - gs = Array(Vector{Int}, k) + gs = Array{Vector{Int}}(k) for i = 1:k gs[i] = Int[] end diff --git a/src/crossval.jl b/src/crossval.jl index 7b7c3b4..718d32d 100644 --- a/src/crossval.jl +++ b/src/crossval.jl @@ -2,7 +2,7 @@ ## cross validation generators -abstract CrossValGenerator +abstract type CrossValGenerator end # K-fold @@ -25,9 +25,9 @@ immutable KfoldState e::Int # ending index end -start(c::Kfold) = KfoldState(1, 1, @compat(round(Integer,c.coeff))) +start(c::Kfold) = KfoldState(1, 1, round.(Integer,c.coeff)) next(c::Kfold, s::KfoldState) = - (i = s.i+1; (setdiff(1:length(c.permseq), c.permseq[s.s:s.e]), KfoldState(i, s.e+1, @compat(round(Integer,c.coeff * i))))) + (i = s.i+1; (setdiff(1:length(c.permseq), c.permseq[s.s:s.e]), KfoldState(i, s.e+1, round.(Integer,c.coeff * i)))) done(c::Kfold, s::KfoldState) = (s.i > c.k) # Stratified K-fold @@ -56,7 +56,7 @@ start(c::StratifiedKfold) = 1 function next(c::StratifiedKfold, s::Int) r = Int[] for (permseq, coeff) in zip(c.permseqs, c.coeffs) - a, b = @compat(round(Integer, [s-1, s] .* coeff)) + a, b = round.(Integer, [s-1, s] .* coeff) append!(r, view(permseq, a+1:b)) end setdiff(1:c.n, r), s+1 @@ -67,7 +67,7 @@ done(c::StratifiedKfold, s::Int) = (s > c.k) function leave_one_out(n::Int, i::Int) @assert 1 <= i <= n - x = Array(Int, n-1) + x = Array{Int}(n-1) for j = 1:i-1 x[j] = j end @@ -122,7 +122,7 @@ immutable StratifiedRandomSub <: CrossValGenerator for stratum_num in lengths_ord stratum_n = length(idxs[stratum_num]) remaining_proportion = remaining_sn/remaining_n - stratum_sn = max(@compat(round(Integer, remaining_proportion*stratum_n)), 1) + stratum_sn = max(round.(Integer, remaining_proportion*stratum_n), 1) remaining_n -= stratum_n remaining_sn -= stratum_sn sns[stratum_num] = stratum_sn @@ -136,7 +136,7 @@ length(c::StratifiedRandomSub) = c.k start(c::StratifiedRandomSub) = 1 function next(c::StratifiedRandomSub, s::Int) - idxs = Array(Int, 0) + idxs = Array{Int}(0) sizehint!(idxs, c.sn) for (stratum_sn, stratum_idxs) in zip(c.sns, c.idxs) append!(idxs, sample(stratum_idxs, stratum_sn, replace=false)) diff --git a/src/deprecated/datapre.jl b/src/deprecated/datapre.jl index 0b8dc21..40a47f9 100644 --- a/src/deprecated/datapre.jl +++ b/src/deprecated/datapre.jl @@ -104,8 +104,8 @@ end transform!{T<:AbstractFloat}(t::Standardize, x::DenseArray{T,1}) = transform!(x, t, x) transform!{T<:AbstractFloat}(t::Standardize, x::DenseArray{T,2}) = transform!(x, t, x) -transform{T<:Real}(t::Standardize, x::DenseArray{T,1}) = transform!(Array(Float64, size(x)), t, x) -transform{T<:Real}(t::Standardize, x::DenseArray{T,2}) = transform!(Array(Float64, size(x)), t, x) +transform{T<:Real}(t::Standardize, x::DenseArray{T,1}) = transform!(Array{Float64}(size(x)), t, x) +transform{T<:Real}(t::Standardize, x::DenseArray{T,2}) = transform!(Array{Float64}(size(x)), t, x) # estimate a standardize transform @@ -113,8 +113,8 @@ function estimate{T<:Real}(::Type{Standardize}, X::DenseArray{T,2}; center::Bool d, n = size(X) n >= 2 || error("X must contain at least two columns.") - m = Array(Float64, ifelse(center, d, 0)) - s = Array(Float64, ifelse(scale, d, 0)) + m = Array{Float64}(ifelse(center, d, 0)) + s = Array{Float64}(ifelse(scale, d, 0)) if center fill!(m, 0.0) diff --git a/src/modeltune.jl b/src/modeltune.jl index e42472f..7ecce56 100644 --- a/src/modeltune.jl +++ b/src/modeltune.jl @@ -4,7 +4,7 @@ _always_true(xs...) = true function gridtune(estfun::Function, # model estimation function evalfun::Function, # model evaluation function - params::@compat(Tuple{AbstractString, Any})...; # parameters to tune + params::Tuple{AbstractString, Any}...; # parameters to tune ord::Ordering=Forward, # ordering of score verbose::Bool=false) # whether to display the progress diff --git a/src/perfeval.jl b/src/perfeval.jl index dbbb454..772e8bf 100644 --- a/src/perfeval.jl +++ b/src/perfeval.jl @@ -24,7 +24,7 @@ end function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) n = length(gt) size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch.")) - m = min(size(rklst, 1), @compat(Int(k))) + m = min(size(rklst, 1), Int(k)) cnt = 0 @inbounds for j = 1:n @@ -74,7 +74,7 @@ function hitrates(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector) n = length(gt) h = counthits(gt, rklst, ks) nk = length(ks) - r = Array(Float64, nk) + r = Array{Float64}(nk) for i = 1:nk r[i] = h[i] / n end @@ -211,10 +211,10 @@ length(v::ThresPredVec) = length(v.preds) getindex(v::ThresPredVec, i::Integer) = ifelse(lt(v.ord, v.scores[i], v.thres), 0, v.preds[i]) # compute roc numbers based on predictions & scores & threshold -roc{PV<:IntegerVector,SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), t::Real, ord::Ordering) = +roc{PV<:IntegerVector,SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) = _roc(gt, ThresPredVec(preds..., t, ord)) -roc{PV<:IntegerVector,SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), thres::Real) = +roc{PV<:IntegerVector,SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) = roc(gt, preds, thres, Forward) @@ -278,7 +278,7 @@ function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord: end # produce results - r = Array(ROCNums{Int}, nt) + r = Array{ROCNums{Int}}(nt) fn = 0 tn = 0 @inbounds for i = 1:nt @@ -303,7 +303,7 @@ roc(gt::IntegerVector, scores::RealVector) = roc(gt, scores, Forward) # roc for multi-way predictions function roc{PV<:IntegerVector,SV<:RealVector}( - gt::IntegerVector, preds::@compat(Tuple{PV,SV}), thresholds::RealVector, ord::Ordering) + gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector, ord::Ordering) issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.") pr::PV = preds[1] @@ -341,7 +341,7 @@ function roc{PV<:IntegerVector,SV<:RealVector}( end # produce results - r = Array(ROCNums{Int}, nt) + r = Array{ROCNums{Int}}(nt) fn = 0 tn = 0 @inbounds for i = 1:nt @@ -354,18 +354,18 @@ function roc{PV<:IntegerVector,SV<:RealVector}( return r end -roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), thresholds::RealVector) = +roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector) = roc(gt, preds, thresholds, Forward) -roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), n::Integer, ord::Ordering) = +roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) = roc(gt, preds, lin_thresholds(preds[2],n,ord), ord) -roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), n::Integer) = +roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) = roc(gt, preds, n, Forward) -roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV}), ord::Ordering) = +roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) = roc(gt, preds, 100, ord) -roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::@compat(Tuple{PV,SV})) = +roc{PV<:IntegerVector, SV<:RealVector}(gt::IntegerVector, preds::Tuple{PV,SV}) = roc(gt, preds, Forward) diff --git a/src/utils.jl b/src/utils.jl index eb7cb17..89785bf 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -4,7 +4,7 @@ function repeach{T}(x::AbstractVector{T}, n::Integer) k = length(x) - r = Array(T, k * n) + r = Array{T}(k * n) p = 0 @inbounds for i = 1:k xi = x[i] @@ -18,7 +18,7 @@ end function repeach{T}(x::AbstractVector{T}, ns::IntegerVector) k = length(x) length(ns) == k || throw(DimensionMismatch("length(ns) should be equal to k.")) - r = Array(T, sum(ns)) + r = Array{T}(sum(ns)) p = 0 @inbounds for i = 1:k xi = x[i] @@ -35,7 +35,7 @@ end function repeachcol{T}(x::DenseArray{T,2}, n::Integer) m = size(x, 1) k = size(x, 2) - r = Array(T, m, k * n) + r = Array{T}(m, k * n) p = 0 @inbounds for i = 1:k xi = view(x, :, i) @@ -66,7 +66,7 @@ end function repeachrow{T}(x::DenseArray{T,2}, n::Integer) k = size(x, 1) m = size(x, 2) - r = Array(T, k * n, m) + r = Array{T}(k * n, m) p = 0 @inbounds for icol = 1:m p = 0 @@ -83,7 +83,7 @@ end function repeachrow{T}(x::DenseArray{T,2}, ns::IntegerVector) k = size(x, 1) m = size(x, 2) - r = Array(T, sum(ns), m) + r = Array{T}(sum(ns), m) @inbounds for icol = 1:m p = 0 for i = 1:k @@ -101,8 +101,8 @@ end ## the original array function unique_inverse(A) - out = Array(eltype(A),0) - out_idx = Array(Vector{Int}, 0) + out = Array{eltype(A)}(0) + out_idx = Array{Vector{Int}}(0) seen = Dict{eltype(A), Int}() for (idx, x) in enumerate(A) if !in(x, keys(seen)) diff --git a/test/modeltune.jl b/test/modeltune.jl index d8021a0..19890e4 100644 --- a/test/modeltune.jl +++ b/test/modeltune.jl @@ -4,7 +4,7 @@ using Base.Test ## gridtune -oracle = @compat Dict{@compat(Tuple{Int, Symbol}),Float64}( +oracle = Dict{Tuple{Int, Symbol},Float64}( (1, :a) => 2.0, (2, :a) => 1.0, (3, :a) => 3.0, diff --git a/test/perfeval.jl b/test/perfeval.jl index 5f48af2..61ba21b 100644 --- a/test/perfeval.jl +++ b/test/perfeval.jl @@ -31,10 +31,10 @@ rs = [1 2 2 1 3 2 1 1 3 3; @test counthits(gt, rs, [2, 4]) == [8, 10] @test counthits(gt, rs, 1:2:5) == [3, 8, 10] -@test_approx_eq [hitrate(gt, rs, k) for k=1:5] [0.3, 0.8, 0.8, 1.0, 1.0] -@test_approx_eq hitrates(gt, rs, 1:3) [0.3, 0.8, 0.8] -@test_approx_eq hitrates(gt, rs, [2, 4]) [0.8, 1.0] -@test_approx_eq hitrates(gt, rs, 1:2:5) [0.3, 0.8, 1.0] +@test [hitrate(gt, rs, k) for k=1:5] ≈ [0.3, 0.8, 0.8, 1.0, 1.0] +@test hitrates(gt, rs, 1:3) ≈ [0.3, 0.8, 0.8] +@test hitrates(gt, rs, [2, 4]) ≈ [0.8, 1.0] +@test hitrates(gt, rs, 1:2:5) ≈ [0.3, 0.8, 1.0] ## ROCNums @@ -58,7 +58,7 @@ r = ROCNums{Int}( @test recall(r) == 0.80 @test precision(r) == (8/13) -@test_approx_eq f1score(r) harmmean([recall(r), precision(r)]) +@test f1score(r) ≈ harmmean([recall(r), precision(r)]) ## auxiliary: find_thresbin & lin_threshold @@ -126,4 +126,3 @@ r100 = roc(gt, (pr, ss), 1.00) @test roc(gt, (pr, ss), 0.0:0.25:1.0) == [r00, r25, r50, r75, r100] # @test roc(gt, (pr, ss), 7) == roc(gt, (pr, ss), 0.2:0.1:0.8, Forward) @test roc(gt, (pr, ss)) == roc(gt, (pr, ss), MLBase.lin_thresholds([0.2, 0.8], 100, Forward)) - diff --git a/test/runtests.jl b/test/runtests.jl index faf560b..d2cacb8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,3 @@ - tests = ["utils", "classification", "perfeval", diff --git a/test/utils.jl b/test/utils.jl index a60b221..ce21cd4 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -25,7 +25,7 @@ a = rand(3, 4) # unique_inverse a = [:a, :a, :b, :c, :b, :a] ui = MLBase.unique_inverse(a) -@test isa(ui, @compat(Tuple{Vector{Symbol}, Vector{Vector{Int}}})) -b = Array(Symbol, mapreduce(length, +, ui[2])) +@test isa(ui, Tuple{Vector{Symbol}, Vector{Vector{Int}}}) +b = Array{Symbol}(mapreduce(length, +, ui[2])) for (obj, idx) in zip(ui...) b[idx] = obj end @test a == b