Bug#805395: predict.gbm() using single.tree=TRUE does not return the correct predictions with multinomial distribution

Andrei POPESCU andreimpopescu at gmail.com
Tue Nov 17 21:23:36 UTC 2015


Control: reassign -1 r-cran-gbm 2.1-1

On Ma, 17 nov 15, 09:29:12, David Paulsen wrote:
> Package: gbm
> Version:  2.1.1
> 
> For the bernoulli distribution model, predict.gbm(model, n.trees=c(1,2), single.tree=TRUE) returns the correct results.
> 
> For the multinomial distribution model with 3 classes, the results are incorrect. The first tree is accurate, but the results for the second tree appears to contain predictions for two different classes, and third value I cannot identify. Although the data I’m using can’t be shared, the results are clearly inaccurate as they do not appear in the first 6 trees. 
> 
> 
> pretty.gbm.tree(current_upsell_gbm, 1)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight  Prediction
> # 0        0    0.99070843        1         5           6      79.811835   4585 -0.01284530
> # 1        0    0.78340727        2         3           4       9.726234   1669  0.02624326
> # 2       -1    0.06888412       -1        -1          -1       0.000000    233  0.06888412
> # 3       -1    0.01932451       -1        -1          -1       0.000000   1436  0.01932451
> # 4       -1    0.02624326       -1        -1          -1       0.000000   1669  0.02624326
> # 5       -1   -0.03568803       -1        -1          -1       0.000000   2856 -0.03568803
> # 6       -1   -0.01284530       -1        -1          -1       0.000000     60 -0.01284530
> 
> pretty.gbm.tree(current_upsell_gbm, 2)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight    Prediction
> # 0        0  9.907084e-01        1         2           6      25.187460   4585  0.0001823204
> # 1       -1 -2.174955e-02       -1        -1          -1       0.000000   1669 -0.0217495506
> # 2       17  4.118500e+04        3         4           5       5.837505   2856  0.0129989496
> # 3       -1  1.728153e-02       -1        -1          -1       0.000000   2426  0.0172815334
> # 4       -1 -1.116279e-02       -1        -1          -1       0.000000    430 -0.0111627907
> # 5       -1  1.299895e-02       -1        -1          -1       0.000000   2856  0.0129989496
> # 6       -1  1.823204e-04       -1        -1          -1       0.000000     60  0.0001823204
> 
> pretty.gbm.tree(current_upsell_gbm, 3)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight   Prediction
> # 0        0   0.968661668        1         2           6      16.796788   4585  0.012662983
> # 1       -1  -0.006388166       -1        -1          -1       0.000000   1538 -0.006388166
> # 2        4   3.500000000        3         4           5       7.753722   2987  0.022472380
> # 3       -1   0.030072289       -1        -1          -1       0.000000   2075  0.030072289
> # 4       -1   0.005180921       -1        -1          -1       0.000000    912  0.005180921
> # 5       -1   0.022472380       -1        -1          -1       0.000000   2987  0.022472380
> # 6       -1   0.012662983       -1        -1          -1       0.000000     60  0.012662983
> 
> 
> 
> pretty.gbm.tree(current_upsell_gbm, 4)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight  Prediction
> # 0        0    0.96843894        1         5           6      80.145080   4585 -0.01039978
> # 1        0    0.92423139        2         3           4       7.579153   1497  0.03221919
> # 2       -1    0.04372024       -1        -1          -1       0.000000    977  0.04372024
> # 3       -1    0.01061048       -1        -1          -1       0.000000    520  0.01061048
> # 4       -1    0.03221919       -1        -1          -1       0.000000   1497  0.03221919
> # 5       -1   -0.03153981       -1        -1          -1       0.000000   3018 -0.03153981
> # 6       -1   -0.01039978       -1        -1          -1       0.000000     70 -0.01039978
> 
> pretty.gbm.tree(current_upsell_gbm, 5)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight    Prediction
> # 0        0  9.907084e-01        1         2           6      22.723666   4585  0.0009275118
> # 1       -1 -2.021911e-02       -1        -1          -1       0.000000   1644 -0.0202191098
> # 2       15  5.216700e+04        3         4           5       6.990267   2871  0.0130365491
> # 3       -1  1.779042e-02       -1        -1          -1       0.000000   2423  0.0177904212
> # 4       -1 -1.267468e-02       -1        -1          -1       0.000000    448 -0.0126746834
> # 5       -1  1.303655e-02       -1        -1          -1       0.000000   2871  0.0130365491
> # 6       -1  9.275118e-04       -1        -1          -1       0.000000     70  0.0009275118
> 
> pretty.gbm.tree(current_upsell_gbm, 6)
> #   SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight   Prediction
> # 0        0  9.684389e-01        1         2           6       21.27357   4585  0.008641809
> # 1       -1 -1.311138e-02       -1        -1          -1        0.00000   1497 -0.013111385
> # 2       17  4.118500e+04        3         4           5        8.95109   3018  0.019431912
> # 3       -1  1.425335e-02       -1        -1          -1        0.00000   2548  0.014253350
> # 4       -1  4.750633e-02       -1        -1          -1        0.00000    470  0.047506331
> # 5       -1  1.943191e-02       -1        -1          -1        0.00000   3018  0.019431912
> # 6       -1  8.641809e-03       -1        -1          -1        0.00000     70  0.008641809
> 
> 
> predict(current_upsell_gbm, off_test1[1,], n.trees=c(1,2))
> # , , 1
> #              -1           0            1
> # [1,] 0.06888412 -0.02174955 -0.006388166
> 
> # , , 2
> #             -1           0           1
> # [1,] 0.1126044 -0.04196866 -0.01949955
> 
> 0.06888412 + 0.04372024
> # [1] 0.1126044
> 
> -2.174955e-02 + -2.021911e-02
> # [1] -0.04196866
> 
> -0.006388166 + -1.311138e-02
> # [1] -0.01949955
> 
> 
> predict(current_upsell_gbm, off_test1[1,], n.trees=c(1,2), single.tree=TRUE)
> #, , 1
> #             -1           0            1
> #[1,] 0.06888412 -0.02174955 -0.006388166
> 
> #, , 2
> #              -1           0          1
> #[1,] -0.04196866 -0.01949955 0.07857462
> 
> 
> I am using Ubuntu 14.04.3 LTS (GNU/Linux 3.13.0-63-generic x86_64),
> R version 3.2.2 (2015-08-14) -- "Fire Safety"
> Platform: x86_64-pc-linux-gnu (64-bit)
> 
> 
> 
> 

-- 
http://wiki.debian.org/FAQsFromDebianUser
Offtopic discussions among Debian users and developers:
http://lists.alioth.debian.org/mailman/listinfo/d-community-offtopic
http://nuvreauspam.ro/gpg-transition.txt
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.alioth.debian.org/pipermail/debian-science-maintainers/attachments/20151117/8977f6e5/attachment.sig>


More information about the debian-science-maintainers mailing list