Bug#805395: predict.gbm() using single.tree=TRUE does not return the correct predictions with multinomial distribution
Andrei POPESCU
andreimpopescu at gmail.com
Tue Nov 17 21:23:36 UTC 2015
Control: reassign -1 r-cran-gbm 2.1-1
On Ma, 17 nov 15, 09:29:12, David Paulsen wrote:
> Package: gbm
> Version: 2.1.1
>
> For the bernoulli distribution model, predict.gbm(model, n.trees=c(1,2), single.tree=TRUE) returns the correct results.
>
> For the multinomial distribution model with 3 classes, the results are incorrect. The first tree is accurate, but the results for the second tree appears to contain predictions for two different classes, and third value I cannot identify. Although the data I’m using can’t be shared, the results are clearly inaccurate as they do not appear in the first 6 trees.
>
>
> pretty.gbm.tree(current_upsell_gbm, 1)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 0.99070843 1 5 6 79.811835 4585 -0.01284530
> # 1 0 0.78340727 2 3 4 9.726234 1669 0.02624326
> # 2 -1 0.06888412 -1 -1 -1 0.000000 233 0.06888412
> # 3 -1 0.01932451 -1 -1 -1 0.000000 1436 0.01932451
> # 4 -1 0.02624326 -1 -1 -1 0.000000 1669 0.02624326
> # 5 -1 -0.03568803 -1 -1 -1 0.000000 2856 -0.03568803
> # 6 -1 -0.01284530 -1 -1 -1 0.000000 60 -0.01284530
>
> pretty.gbm.tree(current_upsell_gbm, 2)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 9.907084e-01 1 2 6 25.187460 4585 0.0001823204
> # 1 -1 -2.174955e-02 -1 -1 -1 0.000000 1669 -0.0217495506
> # 2 17 4.118500e+04 3 4 5 5.837505 2856 0.0129989496
> # 3 -1 1.728153e-02 -1 -1 -1 0.000000 2426 0.0172815334
> # 4 -1 -1.116279e-02 -1 -1 -1 0.000000 430 -0.0111627907
> # 5 -1 1.299895e-02 -1 -1 -1 0.000000 2856 0.0129989496
> # 6 -1 1.823204e-04 -1 -1 -1 0.000000 60 0.0001823204
>
> pretty.gbm.tree(current_upsell_gbm, 3)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 0.968661668 1 2 6 16.796788 4585 0.012662983
> # 1 -1 -0.006388166 -1 -1 -1 0.000000 1538 -0.006388166
> # 2 4 3.500000000 3 4 5 7.753722 2987 0.022472380
> # 3 -1 0.030072289 -1 -1 -1 0.000000 2075 0.030072289
> # 4 -1 0.005180921 -1 -1 -1 0.000000 912 0.005180921
> # 5 -1 0.022472380 -1 -1 -1 0.000000 2987 0.022472380
> # 6 -1 0.012662983 -1 -1 -1 0.000000 60 0.012662983
>
>
>
> pretty.gbm.tree(current_upsell_gbm, 4)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 0.96843894 1 5 6 80.145080 4585 -0.01039978
> # 1 0 0.92423139 2 3 4 7.579153 1497 0.03221919
> # 2 -1 0.04372024 -1 -1 -1 0.000000 977 0.04372024
> # 3 -1 0.01061048 -1 -1 -1 0.000000 520 0.01061048
> # 4 -1 0.03221919 -1 -1 -1 0.000000 1497 0.03221919
> # 5 -1 -0.03153981 -1 -1 -1 0.000000 3018 -0.03153981
> # 6 -1 -0.01039978 -1 -1 -1 0.000000 70 -0.01039978
>
> pretty.gbm.tree(current_upsell_gbm, 5)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 9.907084e-01 1 2 6 22.723666 4585 0.0009275118
> # 1 -1 -2.021911e-02 -1 -1 -1 0.000000 1644 -0.0202191098
> # 2 15 5.216700e+04 3 4 5 6.990267 2871 0.0130365491
> # 3 -1 1.779042e-02 -1 -1 -1 0.000000 2423 0.0177904212
> # 4 -1 -1.267468e-02 -1 -1 -1 0.000000 448 -0.0126746834
> # 5 -1 1.303655e-02 -1 -1 -1 0.000000 2871 0.0130365491
> # 6 -1 9.275118e-04 -1 -1 -1 0.000000 70 0.0009275118
>
> pretty.gbm.tree(current_upsell_gbm, 6)
> # SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight Prediction
> # 0 0 9.684389e-01 1 2 6 21.27357 4585 0.008641809
> # 1 -1 -1.311138e-02 -1 -1 -1 0.00000 1497 -0.013111385
> # 2 17 4.118500e+04 3 4 5 8.95109 3018 0.019431912
> # 3 -1 1.425335e-02 -1 -1 -1 0.00000 2548 0.014253350
> # 4 -1 4.750633e-02 -1 -1 -1 0.00000 470 0.047506331
> # 5 -1 1.943191e-02 -1 -1 -1 0.00000 3018 0.019431912
> # 6 -1 8.641809e-03 -1 -1 -1 0.00000 70 0.008641809
>
>
> predict(current_upsell_gbm, off_test1[1,], n.trees=c(1,2))
> # , , 1
> # -1 0 1
> # [1,] 0.06888412 -0.02174955 -0.006388166
>
> # , , 2
> # -1 0 1
> # [1,] 0.1126044 -0.04196866 -0.01949955
>
> 0.06888412 + 0.04372024
> # [1] 0.1126044
>
> -2.174955e-02 + -2.021911e-02
> # [1] -0.04196866
>
> -0.006388166 + -1.311138e-02
> # [1] -0.01949955
>
>
> predict(current_upsell_gbm, off_test1[1,], n.trees=c(1,2), single.tree=TRUE)
> #, , 1
> # -1 0 1
> #[1,] 0.06888412 -0.02174955 -0.006388166
>
> #, , 2
> # -1 0 1
> #[1,] -0.04196866 -0.01949955 0.07857462
>
>
> I am using Ubuntu 14.04.3 LTS (GNU/Linux 3.13.0-63-generic x86_64),
> R version 3.2.2 (2015-08-14) -- "Fire Safety"
> Platform: x86_64-pc-linux-gnu (64-bit)
>
>
>
>
--
http://wiki.debian.org/FAQsFromDebianUser
Offtopic discussions among Debian users and developers:
http://lists.alioth.debian.org/mailman/listinfo/d-community-offtopic
http://nuvreauspam.ro/gpg-transition.txt
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.alioth.debian.org/pipermail/debian-science-maintainers/attachments/20151117/8977f6e5/attachment.sig>
More information about the debian-science-maintainers
mailing list