From a7b817ece6c9cd30ba53a04200d2b490d563d7db Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:10:32 +0530 Subject: [PATCH 01/10] Initial exploration for GSoC: Copula-based distributional regression experiments for gamboostLSS Initial prototype experiments and plots while studying gamboostLSS for my GSoC 2026 proposal under R Project for Statistical Computing. --- .gitignore | 8 +- README.md | 252 +++++++++++++++++++++++++++++++++----------- easy_plot.png | Bin 0 -> 8094 bytes easy_task.R | 94 +++++++++++++++++ hard_sigma_plot.png | Bin 0 -> 3801 bytes hard_task.R | 120 +++++++++++++++++++++ 6 files changed, 408 insertions(+), 66 deletions(-) create mode 100644 easy_plot.png create mode 100644 easy_task.R create mode 100644 hard_sigma_plot.png create mode 100644 hard_task.R diff --git a/.gitignore b/.gitignore index 5b6a065..d44df33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/README.md b/README.md index f2641cf..cb1a567 100644 --- a/README.md +++ b/README.md @@ -1,62 +1,190 @@ -gamboostLSS -=========== - -[![Build Status (Linux)](https://travis-ci.org/boost-R/gamboostLSS.svg?branch=master)](https://app.travis-ci.com/boost-R/gamboostLSS) -[![Build status (Windows)](https://ci.appveyor.com/api/projects/status/373t0tvx5v1i5ooq/branch/master?svg=true)](https://ci.appveyor.com/project/hofnerb/gamboostlss-s2whe/branch/master) -[![CRAN Status Badge](http://www.r-pkg.org/badges/version/gamboostLSS)](https://CRAN.R-project.org/package=gamboostLSS) -[![Coverage Status](https://coveralls.io/repos/github/boost-R/gamboostLSS/badge.svg?branch=master)](https://coveralls.io/github/boost-R/gamboostLSS?branch=master) -[![](http://cranlogs.r-pkg.org/badges/gamboostLSS)](https://CRAN.R-project.org/package=gamboostLSS) - -`gamboostLSS` implements boosting algorithms for fitting generalized linear, -additive and interaction models for to potentially high-dimensional data. -Instead of modeling only the mean, `gamboostLSS` enables the user to model -various distribution parameters such as location, scale and shape at the same -time (hence the name GAMLSS, generalized additive models for location, scale and -shape). - - -## Using gamboostLSS - -- For installation instructions see below. - -- Instructions on how to use `gamboostLSS` can be found in the - [gamboostLSS tutorial](https://www.jstatsoft.org/article/view/v074i01). - -- Details on the noncyclical fitting method can be found in - - Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2018), - Gradient boosting for distributional regression - faster tuning and improved - variable selection via noncyclical updates. - *Statistics and Computing*. 28: 673-687. DOI [10.1007/s11222-017-9754-6](http://dx.doi.org/10.1007/s11222-017-9754-6). - (Preliminary version: [ArXiv 1611.10171](https://arxiv.org/abs/1611.10171)). - -## Issues & Feature Requests - -For issues, bugs, feature requests etc. please use the [GitHub Issues](https://github.com/boost-R/gamboostLSS/issues). - -## Installation - -- Current version (from CRAN): - ``` - install.packages("gamboostLSS") - ``` - -- Latest **patch version** (patched version of CRAN package; under development) from GitHub: - ``` - library("devtools") - install_github("boost-R/gamboostLSS") - library("gamboostLSS") - ``` - -- Latest **development version** (version with new features; under development) from GitHub: - ``` - library("devtools") - install_github("boost-R/gamboostLSS", ref = "devel") - library("gamboostLSS") - ``` - - To be able to use the `install_github()` command, one needs to install `devtools` first: - ``` - install.packages("devtools") - ``` - +# gamboostLSS Project + +## πŸ“Œ Project Overview + +This project demonstrates the use of **gradient boosting for distributional regression** using the **gamboostLSS** framework in R. + +Unlike traditional regression models that only estimate the mean, **gamboostLSS** allows modeling of multiple distribution parameters such as: + +* **Location (mean, ΞΌ)** +* **Scale (variance, Οƒ)** +* **Shape parameters** + +This makes it especially useful for complex real-world datasets where variability and distributional characteristics change with predictors. + +--- + +## 🎯 Objectives + +* Understand and implement distributional regression using gamboostLSS +* Apply boosting techniques for variable selection +* Evaluate model performance using cross-validation +* Visualize model behavior and results + +--- + +## βœ… Tasks Completed + +### πŸ”Ή Easy Task + +* Dataset: `mtcars` +* Objective: Predict **mpg (miles per gallon)** using: + + * `wt` (weight) + * `hp` (horsepower) + +#### βœ” Method: + +* Fitted a **GaussianLSS model** +* Performed **cross-validation** to determine optimal boosting iterations + +#### πŸ“Š Results: + +* Optimal boosting iterations: + + * ΞΌ (mean) = 100 + * Οƒ (variance) = 60 +* Model coefficients extracted for both parameters + +#### πŸ“ˆ Visualization: + +* Cross-validation risk vs boosting iterations +* Demonstrates convergence and optimal stopping point + +![Cross Validation Plot](plots/easy_plot.png) + +This plot shows the cross-validation risk across boosting iterations. +The optimal stopping point corresponds to the minimum risk. + +--- + +### πŸ”Ή Hard Task + +#### πŸ“Š Data Simulation: + +* Generated dataset with: + + * 500 observations + * 20 predictor variables +* Only first **7 variables were informative**, rest were noise + +#### βš™οΈ Model Design: + +* Two response variables: **Y1 and Y2** +* Each had: + + * Different mean (ΞΌ) functions + * Different variance (Οƒ) functions +* Dependency introduced using a **Gaussian copula** + +#### 🧠 Model Fitting: + +* Separate **GaussianLSS models** fitted for Y1 and Y2 +* Applied **10-fold cross-validation** to determine optimal stopping + +#### πŸ“Š Results: + +* **Y1 important variables:** X1, X2, X5 +* **Y2 important variables:** X3, X4, X6 +* Noise variables (X8–X20) were mostly ignored + +#### πŸ“ˆ Visualizations: + +* Cross-validation plots +* Sigma (variance) behavior plots +* Demonstrates how variance changes with predictors + +![Sigma Plot](plots/hard_sigma_plot.png) + +This plot illustrates how the variance (sigma) changes with predictors, +highlighting the model’s ability to capture heteroscedasticity. + +--- + +## 🧠 Interpretation of Results + +The model successfully captures both the mean (ΞΌ) and variance (Οƒ) of the response variables. + +- Variables X1–X6 were correctly identified as important predictors, showing the effectiveness of boosting for variable selection. +- Noise variables were largely ignored, demonstrating robustness in high-dimensional settings. +- The sigma plots indicate heteroscedasticity, meaning the variance changes with predictors rather than remaining constant. + +This highlights the advantage of distributional regression over traditional regression models. + +--- + +## πŸ’‘ Why This Matters + +Traditional regression models only estimate the mean of the response variable. However, in many real-world problems, the variability also depends on predictors. + +The gamboostLSS framework allows modeling of the full distribution, making it useful in: +- Finance (risk modeling) +- Healthcare (uncertainty in predictions) +- Environmental studies (variable conditions) + +--- + +## πŸ§ͺ Key Insights + +* The model successfully identified **true underlying variables** +* Demonstrated strong **variable selection capability** +* Effectively handled **high-dimensional data with noise** +* Showed the advantage of modeling **both mean and variance** + +--- + +## ▢️ How to Run + +1. Install required packages: + +```r +install.packages("gamboostLSS") +``` + +2. Run scripts: + +```r +source("scripts/easy_task.R") +source("scripts/hard_task.R") +``` + +--- + +## πŸ“ Project Structure + +``` +gamboostLSS-project/ +β”‚ +β”œβ”€β”€ scripts/ +β”‚ β”œβ”€β”€ easy_task.R +β”‚ β”œβ”€β”€ hard_task.R +β”‚ +β”œβ”€β”€ plots/ +β”‚ β”œβ”€β”€ easy_plot.png +β”‚ β”œβ”€β”€ hard_plot.png +β”‚ +β”œβ”€β”€ README.md +``` + +--- + +## πŸ”— Repository Contents + +* Easy Task R Script +* Hard Task R Script +* Visualizations and outputs + +--- + +## πŸš€ Future Improvements + +* Extend to other distributions beyond GaussianLSS +* Apply model to real-world datasets +* Improve visualization and interpretability +* Explore hyperparameter tuning strategies + +--- + +## πŸ™Œ Acknowledgment + +* This project was completed as part of preparation for **Google Summer of Code (GSoC)**, demonstrating understanding of distributional regression and boosting techniques. diff --git a/easy_plot.png b/easy_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..17a92d961e49ac3208179b40c74bb7932fcb4c12 GIT binary patch literal 8094 zcmds+XIN9)+OB5+L8*#}lu#5DP$^O*(h&s#=>j4GA|O2!=>ZZ&K?MQ|Ql&{13@t$D zutcfSdkLZUfV9xz%&fKd`qtim&-I-j=SOmlYi4AQF~@wz^FHrA-sx(qG0<|*0sz3E ze*dmM06-?lpVJiJm5nl??*PDKqkdQUq0eU`UMGxiF<^WhM{OmDKF8Nc`}Q24Dt*ya zR+W0{XT!SZnw6#98QylKC585(7!5xZGM}-e>0z{M?|9EN*uS`?Wle`j%_)fzYXZt=0fJj?11O zj&-Tojycy$?f-NPJcprN| zo$2rrPgosEiq?N=_09b0VuYkxfg*oXCWjQLP-WuCd^?skOu8ClgJEHk%Zw(5xIy>J^jkS?_tQR%N{%G! zYxolHxp;K1-%3DNQ!O2ymvtnO2hUQ_%Cejr${WIc(r+bA@8+5&ZdYN_cpBZHAu6UqvkXQwe(5+7 z0SzGpY_v_EGn@-6tv4VBEp>Qf#&Vx8Rqs@2)r(45Cv2cMGG@1Qbvi9moiw`Y>s@9{ z823=`+>F-GUB7=~@xfn&_Qs{uOC9db!aLca)+RCf=BBdjl!6ox2K8T+nb8}K${7z? z!sMlvzYkOzuVeC79a6l!`ks2linv>BT}`mAldq5+bu4s)4Ur1+MS^4CLznA9DcYW; z-VL?HXTEyV$KwoxZ|D3>$@K0E2TWTgepkK_H52Fr-_us=B~3>bMxsfE$?}|z9s2#n z5TQ2i;L3{D($#){(Kc>I92+A%+#g{kAcG5~kWhs-*$hH}RAnX(;PvHHh>{@1|5hU> z#Kw4HoW4P@iWVq5+vKKgn{Td^Cx?UruA&uQ?CetjkVboEq1lCE&!>VBE$P;i4M11#6qN@oX(-7ktGvHRpjYgqjZ*2}g|l7H@f(f9lVML+Ogtt2 zx+WHyK2Wp4)t;n18c|hxD9Qv}uz3h?O|X1e&;g(g^WXiDlm*V_M$#Bw#@Umu=4F=g zL=^8Z?(8&jy8OcR@?F24e&Z<6LTLMNZ_{HkNDfO;K3c^Cs1-1-N9+O!J)h2uTSIG9h;xwwfaRXZURTsYVYL zb&v_iyR|zt1>X|*vO@TGq>8?u)mkiVsDkJpH@OYIsdh=kPd34 zt9bYB`7U4vhDIr{QE;H353rOdgS$$UC|xQ^;Pk!!e;RM7JZ5f%-w+U-8Ub_-E5YRn zQnjB+-r)$WZZ#%maS~rV17Hqqkfght?A0(~1DoyT4n6vAT&}5Eax0O*)dti(qF+Jgz z=Cvf&8pOaIQGFTEWh6>gHKr%%W6;EK%1j4*ErwI2HpDk&?2JBsQ`T=bLBSg;Q4X2# zu%;aOW;!UX=fGE4F}}?F0TJlS+L09$*L(a7ej1FIr!^t*A0O{beY2N|n9{=$#2%z) zM@!M~uuYxLj#ACObx;^ru7~K03scE-SVmK#dSe(CtRE-oU#q}Lj~p;#lYR2CqV=N| zov*4%4;mevAh-fv4g}v~+iCFk4M*m`@ZmB1L8s%9mu}jg! z)~8Z2+#+WOxU{mC*`49XWGRuFLk@VcVFjIL`Nr^HteF7DOMuC4kgM|S=X74weq=2r(crQS= z+DQo~Q&t>2IrXVXMRQm|bdnnN%HD+;Evt#6iu%~$Q(6rTQzqZxrX3dU8}!iPwmVI- zp>%qpqxp9R;8c(B5b}beyT0fN_Gnk*wrbU6)17Qp%KDqR3i zq}sRl`c*`U`eNu(j=IDMxQqRO0NMA?cY*mV0UqrnRu2KLbjvLreaQ~Kh#w)g9=XA) z;pe*n-I5hKyH7WUcq%A6W#}9=*8Qb{=u4bkn5Kr8_G}s^4TYw3UY~HOFt&D(nNdAjt-|K0q(#+yN zAWF>R>gBJ&Wunnxrl=SCPa$X3qyW+$-8cihepbW}uJX^1WKEIZ`XlHXFf*Zi0f61B zL=99ACMLcH=j${t=NeY_u|%eaHW&sp>m8mA!GB2T09<7xhk0O)sodtCj+qY^w6{_r z&3dj(GHeKk9trj!YLvTSfwTtGzx5@9VPWr@daxy1%Xf7E^~#1)#yxRv)`X2f+bNQIGAy{t6dimOf|kMw$qyPXUq1zOohmWGRV(K| zSfqp!$cxh~^&$777bsqTDWV`(*GntdOSo`VKnnkpHlg&fkB; z4I_ZUmtKDyyUo`xZ_}W=Nk();{5Y-a{KfZ!7RUhn%f$$t6a_;K2I?RW%Q}L^?WH!0 zWB2`O$AX$Ntt)l={RR2a)%#0%qtNo20_4&1VnjjpMyq9K9d<2nr|SBZl-(Z!8ei2b z0=Lq6+HpK`K83|aa9K0hNjRRE<55xOn#fh+*|q9lATyTtadQLj^~l>SpqJa4`obzl zZL{ZFl})3y?n#SDJjc)Vg)V_ps0Ph!qI*HU&oIi|_raASD;IB`Mz|tJgOB|7;&!%< zEoWzP=8m7&Xxr+^>Y50b!kRL{Tz*juxwp6ZX(N7*Fsrc_da>oCZhv?E8~^Yl+)4gy zf&ZKY_M`Q5*E}Ij4sUwA`t#o6wou+7A^ZKwCeNXAv87bvBL1X}=h(7%J+W@vbaCTk zUkw@UbXY~$4fohX9!x80-wGrSBIV7F2?YgF;mA?DjJA`eQ5ONulR0Cglo|K<7(XXZ zjxavsBn7!4(9&3U{K-N+_M|}ZsMmVh;%GxOyY1w#^Z3T_ZfxC_COE*+MdSyk!%&OE zpB{TvYt>Os)mz6qV5d%tW1HgjPUPgpo>}DPATr765Swzc=dnkq-7zm-*Fmn%_spK` ztR0IC?+Pd$V8L7M>>yK|xpej&KkxPDW`QF81tSWbD3^^hkG)iU4q!~Z{{OBRQ zSH@Z~uP&2Ue>3}N)Nn(j?{?@6q~33MC1Px@qOyEx0508^Y__mx*0dLI+`49BHK$D3 ztnOu9+Q(h8+PA43KH72-Arf11f2!VoTrN4uVQ}~|IquPxMf~k@Rc33l%wT30gr#oA z(FEKl|3X`;ne?SwKPI?hP@&EJ(o8COpcO zKy#u$O0|MsQ)$(DE7b&)B=HRD5|2OU%^9TKm_XUo{EVIhcyClQE+`MCk-|sJ0E_05 zUl#zJ`@1kI(P4JD`GFFk{lu_f)MPXu7UuE>OaOr^dAI0xC}HQsv)(&z@>}Znu($bq zkF@s!7sw|8pRk<@D!_-!w{4%sE-dytk7HlisN;D>d-+Ev3>b6_eW#*VjdQ;9g`mU7s~_f-R`> zv+)zm7a9g7^cM>bSyEqytvEFUFq>=c63s1%8au-sL=8kocr#O=bo)6){=7>DqZ|Lx zLi-f}HI|801m!my-^S4z@1;hT(~7G-x0aRL?))`{ zgv8gPehxm}m@Ax?(2$1ZTjv;ndu_FP_k{X=9%_zzc(M*ePiYkay1`)LgN-C(vv zM2+fvG}6%jH3jGb>{!^JLXH}1IlXWvYgo9Gc2ZcFEZ$ke_f~xiCY$p7`=7g?2I8&# zwM3sLn31jjO8OLrF-iq^8U*%=_iW>|4Uu?t^WPWxD~)R%Y=curTVo|}Qvs~jzk;5F zdEB}=?&l@xm9XuTQ|C=>0ku1-O0h)=W;3^NaaOVMq?-nJ86Zk1;jCezti^3tBh{4l z@w>jxS$8PwRZSmY^O#79w4ln?Vxsj)PU0T0lINp*=6w`Om)R`_LQUH9rQT1&R>Mrp zIDj&6iHw>$9$L32F(Td)Nw!cY6|&M zAv4;lN}D~8{)GT~AY8Bq52%5g+|M;YQ|0|*jCoACjxZQgASZjADH@oJM7 z?KDFBO@~YO@YL=86ongv%y*^~M9cYZSSM2Aq@j$xJcyd{Kl6?MyzC}%tJA;sXyT6` zVLIHfrFp*agIGdELx<;3Um)~o!S`vpf!?VVR*^Y4ORjc=TaM-H7_o6)C+x(Ve<2olI>qA z1S5kbq93fW1dYI=h@PE}Wc;^ok;^1{_L_QObB;Cx&>019_pZR-Y(pf{AgMlNO%n%W zYk=N`27NO}^eX%=y!SI{S}*T+<)T~ESy14{_#dRmkoPj7o%EAm62idiy+KjQ@h?@Z zG=YMQ<=IslfQ4w8#qT;Y*dWHbwZaRqJZK#BAj&N?^7XKTPsuL-j+`nYOAZ^E8_AaI zd-`~MEHfx66Vc6aG#K`vz-|N3<|-|+fx41b48i-!RdW}TK#dWWt;Pf-M+%&Qd}d~6 z%Kb}`-3y;8g09v{$&q`DyfIkTRy!(6=8f2ZNCxqzht)isleZ6Hb+1wIYUtadF-DkBNwpmkvTJr zwIzEVX@dWYT6vepmEla>azoWYUZ#k?nvOzKu2s(w3Wvb@EMi?ll3M14E-&INEi8zX z zApG;Jbh0K_vrnP;aF(y`yW?C>3}JscMl2QhN<(&1EZRXtP!V;rU0acwt3s>6&Vs37 z7wAoK#9y~4l}NwpJlp2Ge+rl}!6hW6&Mq5u1yM)JzLQ_5zx|}i*J63qIL1sA5NzJK zfiBCI6y+a7XpB8eF!LK%op6$Ko(rD+rEmd)qOGuEZNdb|^ex(ZU0fP_>dpl$T5p}A z^U=K`-=05E^ZvjfD&!$Bp?!*qY>wToMVh9=zA=T(O~{-$QYv|Z@B?e_wfG?%tX1U< zq7DlffU3xwJZXLg10XNp^=8fEa6OYRsA4be_DPviEBcxwd2OJ9~jx^d^nwzXs)fxIJL>{7=uqEeU+S z{IBuBwf5^$+f8i^QP@h`BL&A36_3{=KI!uu{|>?0HZS@{B9e>{Wz0MSLv%Jk?I(e&`)ACMNLaTqk2Ltl^K>C!dh|HHS69d=j@3-@3!${tjSX!KyN`ClfhHGEt zlKH84)$-qYyzX65`2CXU7p^T~)o8}PG`qEc(c2vD>*MYp+egS1`h4pys1_F2A0a{rU-fCTU#OlKD93*_Wb zuyWHvt;XJ|VVv7s9Lweo(E8*uccx9O&+E3;OvNL8h&&krOs}}AijlrmRvZ^Th(%m& zN}y1>quR&f8dXSUg<8lV(llK7Pkw0V;ynALP8X=Jc^L4P5-9vTp#Bq(=FU7*=j0Zr zLZ!Poy?7tt6SH+rT8qnb+3xX$Gxi)HMff`^|EuvXP6LGhpiTVG*!{0we}^@f7KkDK z1oilwm0YMQ$hp`B;$pc?B=~T?{+Ps*_RK^#7A@05?X2UT-0_l61m%Di8jjO$WF$7) z8UZ4wPLcL4ovK`lCbGYWp{HGa@Y;c*XiWSE#CT$c0@uA|iY73F!O&K9-Lg8z@;qw2 ztaVYL7A?8b!mjiZ+p;+>+FzZ+B3`;&AJw_lkS0aQ&wm)CpMuVdpX@4jLUe~s+u=6| zPs$1_wdg`(r08vV=7e)yd;Lv?Y#5F_N=w^?u;FUmL1=hBrZV1XkXx#68?2HYGfPz0 zb36|&u_27`a>dV8SNCRze@^dznGk$@5&y(=JMq(6LsNIjNP6SU2F3*E8+oWo!$h8fP_q~zYC7_+0QymND!oy!VBKmwyDCGl<^E^O!_iAmhbO3J`_dd1VbK|!LI zUaly`&>;2;M5Vc=25P+}rXzFFBaP{&!T9)O_Qsz|dx`LUUDDuur&^zk<1TD(Lt@RH zTdfyN4fIPbY-&h{tI`ALbo&?o`r86HF>8*iquuSL4syB93aPYU&yr6(7VSYRj}q8x zJa5NueawI4@};`0+asON-i)ZHV(LSsMVEbkd1W?Z4ueke?^Q%+wc95ez&YulYaKeU zl85e;=?>D@22O{>w3baTBvkaF`Rwsi=$@(k5j{!g?zc0?k9)$Fq@BNxgVPvpoIbta z`|Oew;QRp(QME0=`~U@9{t=i(tN!D z7D1&Ca*X25*Lqia1A(By)Ds8rat0MFTpr zrX?AyN<+$vZ71St-ffJiW{sA$8xCCPXfrC9qFNkZ{(`9tNu2@-FgSvluX1$mga!1&D?d1de^1MFQZy$P zIhVH*iv`C9q>QUQ!yiq zak~g`l|05N9@s*|E&_oYcDy@-w`YOnIv!$)S5 zT%JQW0={3YsX~f)m&#XGm~$+d>})?mxVZhQ_$m8k~u-8Me(^C_|qG;+|Hb@|*#IFM{Xpfcib{yZLvXzWP6x)b`>4 literal 0 HcmV?d00001 diff --git a/easy_task.R b/easy_task.R new file mode 100644 index 0000000..504d0ab --- /dev/null +++ b/easy_task.R @@ -0,0 +1,94 @@ +# ============================== +# EASY TASK: gamboostLSS Example +# ============================== + +#Short-Explanation: +#================================================================ +# Objective: +# This task demonstrates how to apply the gamboostLSS model +# using a Gaussian distribution on the mtcars dataset. + +# Description: +# The goal is to predict the response variable 'mpg' +# (miles per gallon) using predictor variables such as +# horsepower (hp) and number of cylinders (cyl). + +# Approach: +# - Load required libraries +# - Use built-in dataset (mtcars) +# - Fit GaussianLSS model using gamboostLSS +# - Apply cross-validation to find optimal mstop +# - Improve model performance and avoid overfitting + +# Outcome: +# The model successfully fits the data and selects optimal +# boosting iterations using cross-validation. +# ================================================================ + + +# Install required packages (run once) +# install.packages("gamboostLSS") +# install.packages("mlbench") + +# Load libraries +library(gamboostLSS) +library(mboost) + +# Load dataset (mtcars is built-in) +data("mtcars") + +# Define response variable +# mpg = miles per gallon +# Using all other variables as predictors +df <- mtcars + +# Convert to proper format +df$mpg <- as.numeric(df$mpg) + +# ------------------------------ +# Fit GaussianLSS Model +# ------------------------------ + +model <- gamboostLSS( + mpg ~ wt + hp, # fewer variables + data = df, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +# ------------------------------ +# Cross-validation to find mstop +# ------------------------------ + +# 10-fold cross-validation +cv <- cvrisk(model, folds = cv(model.weights(model), type = "kfold")) + +# Plot CV results +plot(cv) + +# Save plot as image +png("plots/easy_plot.png") +plot(cv) +dev.off() + +# Get optimal mstop +mstop_opt <- mstop(cv) +mstop_opt + +# Apply optimal mstop +model[mstop_opt] + +# ------------------------------ +# Selected Variables +# ------------------------------ + +# Coefficients for mean (mu) +coef(model, parameter = "mu") + +# Coefficients for variance (sigma) +coef(model, parameter = "sigma") + +# ------------------------------ +# Summary +# ------------------------------ +summary(model) \ No newline at end of file diff --git a/hard_sigma_plot.png b/hard_sigma_plot.png new file mode 100644 index 0000000000000000000000000000000000000000..9914c53dec5015d80c441cb039e80efb6ce716bc GIT binary patch literal 3801 zcmbU^X;@RqvO$6&VGpayz6VjFGAv;eu7R)^Q9w{xf?|LXB?3btAm9nOWt1=|f`F*t z1O*`s62h<^BPzqVAiH7UD4QsoB6}e3;C#M2fA0O>_xf~Kbys!uIaS?ts``>Q*-Z|! z8G}F|Q`(0Y@Wgee{vG6zONZ!cdWj1dR|0zpL(2nYfU*$66Jc`ykc6A;vO z@_Jzts8j+>XTw!U{U$&~z$Cb>BPx|mW%H;oiw9RRn}A?{lVMX4Y$}2Mjm>7m(rh+Q z$c85f#XKs4M(hmYBze1;j#FZe0|nbz<;^u==&`dz^h4`I7}Qi9(+0`ACyn)x}4`e+cUF zI@J?2d2;;i+e1~s-EVyiSH&Tg`)>qXHB~~&rz+?)E}VH=tg1ju_1E2F}Yf(k|yXzCdyee|j~S)3?W6~&9bwuyg4s=p8^ycllq`O1WZqk@(i1N8Rma|u5>0CcrO z3P25NnB1A2fV3E}4+)ZbxIK%iW4*8G%(RMwI5GCi7hdl0%Z}{J$Z7n^io~XSb%r1B zlEG4+xQ@W#mj)6)VS?*d%Oc5+WfVk@8DHBlM-y%A2DD~UV*bxvv{Jt`QXeCKQT5>W z%{XSjSS}m7V~2Lt^_^L^_O}dr<>!Il02(mn`1f4#<{LR5`W7c(c1xK2t@t&nBs{Up z?032+@rhn0WXv^x$GQ&nvFIk_smXYi`_6pr@7lj{)G-(Q(+uug1J3{Ryk!M&WjU_@kguj}BLDpFb4ZQ>cjl}8 z?{YHP?~1=$2R^l7isk~`HzVh30z{YZU)8D~P~q(9#)Za9Y-=~q4-`MWPYHD?f#w_B zXWL}Zu13ByhbQK0k<6nzfE)deXRM&E_?yfQC41DXr|OeBH7ga4S$mLixqD!TIA+8M z&+Ll&{K_LIGZN{UYzDuCRbXDTfKo8`@1?|P*)PgLJM*RU3d%wg+sRTgTfSMY^-C_3 zX!fYx`EQ>JQ!ab>r{yrVOlR4|<(syasH`GCOWP&n3276(K^%b__wM{RwIk$>-AW_A zTeymSO5ztETEk#!+6_f)z9iastN?m?#re3Cqx~n%z{+>vo4_X2bfKnUj_kUXs+4QH zNU^l*=ap*?0m@mww_@|Nhqq-z#l_KFB**YyiMzzyJ7$7IQ#3XdHXK=s7Hf+d|nHk4ynUpp0mS<~6_#DgYM z5jk01ybdkm7E_`WJ+}qK(kQBT)&Jfsh_U;T7f^Jroht&I36HgZj(4Y-@4jgSX6}x> zj&$2_&kqDv2a-nV6zAqgIA88`VphGpIV#^c9W#G+Hn-jfn;`VzCtg zOYK%k`$VY^*4q$j;t3esRq~R5Ozv(z7TTJk9EZ{8N8#l0&juQtUmxI0#OcEJk3BWPd9z~I_!hku1*M%{Clsp8<&Si0~-gjCA$z&Pj~NtZKa zg=$(YJ==cnf}})2Z64G&eb?eQZr!-)>s-63u$GXpPuCiO-gReH)QWN!|JhOJY zkGfE#Aj={)D0HqJAq8DpE$E}E4kYSwEpI49}(aBl@ z=%jpMxBBal44m$}0bBl1t#{ttz4~7cxHOttH==rse~10`HnNTDupVz#B$f zW?vs}-57cwu`fEq8=O4iKYByGOZw@{a;7(UqOxZUD|@*`Iyh!!IQKgAWm)kiWXrH> zj1cqxN@2JIO3g=zo?IMug1loPbn>WC@2O0wt^9mD99=>aENL(Wg*n@T(Ps|cO!bh; ztC#3E#qacc30h4nSYcFFdMCnixBq6?i+!SGE;z3Nk6wY;DCzgdvBi`p`GIVTHNq!G z;5svzo)WdY8V$BRS3<{c*sd3D*o)?;<)6!EzFSt_RwzN@%0w*5B7E=x8Ap%rl10ZO zw&sPBXina(vP59zoCnnu+b?rqyFfx!wzJeGm!Suqg~HyaBNq@^7frDJl3x0^Zev`> z1K5_LOlT;z_+184N6LVuNRc|`oxH_xQA4zs4ZX>P46Y4s2m_Z2k$(g=p}9aFgatAo zMx``>6M4*hmi+^DkT!3dBe38?{VKQC)Xrbd>@@qx@4FCX9be(*NWspUt{rkA#tF-%J(oaBgHuq zKGLXchnWF7f^Z`#+4>u;0M1Iqg=~tfi&XT$gO0giEjU;*;1tBOTFH$&3gKnR?Ny)q z=t@#7U9V9%atyVEV`U{oW{1=Cz~KNiFR7s{MXhmUS9!UW$_~JQ?ld;neqI{LgEu?( z-j1?3wBwVwF!36cL)0=20u&TSt=d2;-~9=VBRPbE!HWJ8Dn*d2|?2n-lLaHr%jO)8nCr1>vbxnx!@)1bHHadZpt zY39gP%FKJpeLvc^WEpG}Cgv2c6Q^PG!-)qfKj-B-7@>K~l_&Qmdt#%08Y227Z&eAr zF}RHMAFVM2jbmvilUdj(#8L9;`<{4;N9BDFe6Z{1`|kKa7hDMmf7mgyghVS)G09ol zk)?6Xxc5X&(WnJc4tQu)$tt!71s95ti7A@f=1KQGnZ;Ce%~vj^nzfqM&D*I;l? zUrMKt`9l&N=QC4z_}+pNQD&gRIbraE2lwlg&m-uWA1PY>xXjCdAe^Ff`BFG>^_m6$ zW8wV#-zn>eOt)szwJ;!WL0shD_Hm$;Y8^9ILydk;5Pf((8T(8iuPyv6mRH>rt3xLd zu}LKw=xpd1(SwUz(FajZHz=i-4Fdq9UUjtNVlDEBpCO1f2OEo(%@r@2^l~*?p1A&{g_bF;9eOPN_#Jp@3_>q zldAk_{}aN1UHS^ouEJzsF8V$5cj(U1ro~_vDT`NICnJAcF0JcQPcS{nvZ@RwKKks8 zv-h3Q((x6?{JF7f&=XFRRH{Xo!NMh(fhP&7mCFFK7~fPc{h;ZJLP-jNP>Xrcq?6_M zbqW<#7QLkhLuSfM2Bs*L^)w6l$^Ax+YH3ZJ)hU+oG{1&rck*} zG@Cc?J5q&=I`Zh!oY~RD%L^W%L(l5g_SVx^^Ri+{D=U?J`QVoZwH3^U1NU#LS> zbt^ujVBkY2{L>imY1ZTrr?_)F|LogO32zb^wIr@9-e{WKK2{pu%(x#k<5n1<`ob&C qq#R^J{kT29#(<)Kby! literal 0 HcmV?d00001 diff --git a/hard_task.R b/hard_task.R new file mode 100644 index 0000000..52e8e91 --- /dev/null +++ b/hard_task.R @@ -0,0 +1,120 @@ +# ============================== +# HARD TASK: Data Simulation +# ============================== + +# Short-Explanation: +# =============================================================== +# Objective: +# This task extends the basic model to more advanced +# modeling and interpretation using gamboostLSS. + +# Description: +# The goal is to explore deeper insights from the model, +# including parameter estimation and visualization. + +# Approach: +# - Build advanced model using gamboostLSS +# - Analyze additional parameters (like sigma) +# - Use plots to visualize relationships +# - Interpret model outputs and patterns + +# Outcome: +# The model provides deeper understanding of how predictors +# influence both mean and variance of the response variable. +# =============================================================== + + +set.seed(123) + +n <- 500 +p <- 20 + +# Generate features +X <- matrix(rnorm(n * p), n, p) +colnames(X) <- paste0("X", 1:p) +X <- as.data.frame(X) + +# Mean and variance for Y1 +mu1 <- 1 + 2*X$X1 - 0.5*X$X2 +sigma1 <- exp(0.5 * X$X5) + +# Mean and variance for Y2 +mu2 <- 0.5 - 1.5*X$X3 + X$X4 +sigma2 <- exp(0.5 - 0.3*X$X6) + +# Correlation +rho <- tanh(1 + 1.5 * X$X7) + +library(MASS) + +Y1 <- numeric(n) +Y2 <- numeric(n) + +for(i in 1:n) { + Sigma <- matrix(c(1, rho[i], rho[i], 1), 2, 2) + + z <- mvrnorm(1, mu = c(0,0), Sigma = Sigma) + + Y1[i] <- mu1[i] + sigma1[i] * z[1] + Y2[i] <- mu2[i] + sigma2[i] * z[2] +} + +data <- cbind(X, Y1, Y2) +data <- as.data.frame(data) + +library(gamboostLSS) + +model_Y1 <- gamboostLSS( + Y1 ~ ., + data = data, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +cv_Y1 <- cvrisk(model_Y1, folds = cv(model.weights(model_Y1), type = "kfold")) + +plot(cv_Y1) + +mstop_Y1 <- mstop(cv_Y1) +mstop_Y1 + +model_Y1[mstop_Y1] + +model_Y2 <- gamboostLSS( + Y2 ~ ., + data = data, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +cv_Y2 <- cvrisk(model_Y2, folds = cv(model.weights(model_Y2), type = "kfold")) + +plot(cv_Y2) + +mstop_Y2 <- mstop(cv_Y2) +mstop_Y2 + +model_Y2[mstop_Y2] + +# Y1 results +coef(model_Y1, parameter = "mu") +coef(model_Y1, parameter = "sigma") + +# Y2 results +coef(model_Y2, parameter = "mu") +coef(model_Y2, parameter = "sigma") + +# Scatter plot +plot(data$Y1, data$Y2, + main = "Y1 vs Y2", + xlab = "Y1", + ylab = "Y2") + +# Model plots +plot(model_Y1) +plot(model_Y2) + +# Save plot as image +png("plots/hard_sigma_plot.png") +plot(model) +dev.off() \ No newline at end of file From 0ba3ddb544dd74adbb9f81c00ea4aba61ef816f9 Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:17:47 +0530 Subject: [PATCH 02/10] Update README with project overview and tasks From fa858cc3fbc52d1b8c186aea13c33550a625642e Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:19:52 +0530 Subject: [PATCH 03/10] Update README with project details and tasks --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cb1a567..d4257c6 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ This makes it especially useful for complex real-world datasets where variabilit * Cross-validation risk vs boosting iterations * Demonstrates convergence and optimal stopping point -![Cross Validation Plot](plots/easy_plot.png) +![Cross Validation Plot](easy_plot.png) This plot shows the cross-validation risk across boosting iterations. The optimal stopping point corresponds to the minimum risk. @@ -94,7 +94,7 @@ The optimal stopping point corresponds to the minimum risk. * Sigma (variance) behavior plots * Demonstrates how variance changes with predictors -![Sigma Plot](plots/hard_sigma_plot.png) +![Sigma Plot](hard_sigma_plot.png) This plot illustrates how the variance (sigma) changes with predictors, highlighting the model’s ability to capture heteroscedasticity. From e23b63c3b091004f1feb0669b432772b123a6aef Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:25:12 +0530 Subject: [PATCH 04/10] Add gamboostLSS example script for mpg prediction This script demonstrates the application of the gamboostLSS model using the mtcars dataset to predict miles per gallon (mpg) based on horsepower and weight. It includes model fitting, cross-validation, and saving plot results. --- scripts/easy_task.R | 94 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 scripts/easy_task.R diff --git a/scripts/easy_task.R b/scripts/easy_task.R new file mode 100644 index 0000000..0e22362 --- /dev/null +++ b/scripts/easy_task.R @@ -0,0 +1,94 @@ +# ============================== +# EASY TASK: gamboostLSS Example +# ============================== + +#Short-Explanation: +#================================================================ +# Objective: +# This task demonstrates how to apply the gamboostLSS model +# using a Gaussian distribution on the mtcars dataset. + +# Description: +# The goal is to predict the response variable 'mpg' +# (miles per gallon) using predictor variables such as +# horsepower (hp) and number of cylinders (cyl). + +# Approach: +# - Load required libraries +# - Use built-in dataset (mtcars) +# - Fit GaussianLSS model using gamboostLSS +# - Apply cross-validation to find optimal mstop +# - Improve model performance and avoid overfitting + +# Outcome: +# The model successfully fits the data and selects optimal +# boosting iterations using cross-validation. +# ================================================================ + + +# Install required packages (run once) +# install.packages("gamboostLSS") +# install.packages("mlbench") + +# Load libraries +library(gamboostLSS) +library(mboost) + +# Load dataset (mtcars is built-in) +data("mtcars") + +# Define response variable +# mpg = miles per gallon +# Using all other variables as predictors +df <- mtcars + +# Convert to proper format +df$mpg <- as.numeric(df$mpg) + +# ------------------------------ +# Fit GaussianLSS Model +# ------------------------------ + +model <- gamboostLSS( + mpg ~ wt + hp, # fewer variables + data = df, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +# ------------------------------ +# Cross-validation to find mstop +# ------------------------------ + +# 10-fold cross-validation +cv <- cvrisk(model, folds = cv(model.weights(model), type = "kfold")) + +# Plot CV results +plot(cv) + +# Save plot as image +png("plots/easy_plot.png") +plot(cv) +dev.off() + +# Get optimal mstop +mstop_opt <- mstop(cv) +mstop_opt + +# Apply optimal mstop +model[mstop_opt] + +# ------------------------------ +# Selected Variables +# ------------------------------ + +# Coefficients for mean (mu) +coef(model, parameter = "mu") + +# Coefficients for variance (sigma) +coef(model, parameter = "sigma") + +# ------------------------------ +# Summary +# ------------------------------ +summary(model) From 7b61cb55b1be96f745354643a2678841bec0e3f6 Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:26:32 +0530 Subject: [PATCH 05/10] Add data simulation and modeling script for Y1 and Y2 This script simulates data for two response variables Y1 and Y2 using advanced modeling techniques with gamboostLSS. It includes parameter estimation, visualization, and analysis of the relationships between predictors and response variables. --- scripts/hard_task.R | 120 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 scripts/hard_task.R diff --git a/scripts/hard_task.R b/scripts/hard_task.R new file mode 100644 index 0000000..f384968 --- /dev/null +++ b/scripts/hard_task.R @@ -0,0 +1,120 @@ +# ============================== +# HARD TASK: Data Simulation +# ============================== + +# Short-Explanation: +# =============================================================== +# Objective: +# This task extends the basic model to more advanced +# modeling and interpretation using gamboostLSS. + +# Description: +# The goal is to explore deeper insights from the model, +# including parameter estimation and visualization. + +# Approach: +# - Build advanced model using gamboostLSS +# - Analyze additional parameters (like sigma) +# - Use plots to visualize relationships +# - Interpret model outputs and patterns + +# Outcome: +# The model provides deeper understanding of how predictors +# influence both mean and variance of the response variable. +# =============================================================== + + +set.seed(123) + +n <- 500 +p <- 20 + +# Generate features +X <- matrix(rnorm(n * p), n, p) +colnames(X) <- paste0("X", 1:p) +X <- as.data.frame(X) + +# Mean and variance for Y1 +mu1 <- 1 + 2*X$X1 - 0.5*X$X2 +sigma1 <- exp(0.5 * X$X5) + +# Mean and variance for Y2 +mu2 <- 0.5 - 1.5*X$X3 + X$X4 +sigma2 <- exp(0.5 - 0.3*X$X6) + +# Correlation +rho <- tanh(1 + 1.5 * X$X7) + +library(MASS) + +Y1 <- numeric(n) +Y2 <- numeric(n) + +for(i in 1:n) { + Sigma <- matrix(c(1, rho[i], rho[i], 1), 2, 2) + + z <- mvrnorm(1, mu = c(0,0), Sigma = Sigma) + + Y1[i] <- mu1[i] + sigma1[i] * z[1] + Y2[i] <- mu2[i] + sigma2[i] * z[2] +} + +data <- cbind(X, Y1, Y2) +data <- as.data.frame(data) + +library(gamboostLSS) + +model_Y1 <- gamboostLSS( + Y1 ~ ., + data = data, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +cv_Y1 <- cvrisk(model_Y1, folds = cv(model.weights(model_Y1), type = "kfold")) + +plot(cv_Y1) + +mstop_Y1 <- mstop(cv_Y1) +mstop_Y1 + +model_Y1[mstop_Y1] + +model_Y2 <- gamboostLSS( + Y2 ~ ., + data = data, + families = GaussianLSS(), + control = boost_control(mstop = 100, nu = 0.1) +) + +cv_Y2 <- cvrisk(model_Y2, folds = cv(model.weights(model_Y2), type = "kfold")) + +plot(cv_Y2) + +mstop_Y2 <- mstop(cv_Y2) +mstop_Y2 + +model_Y2[mstop_Y2] + +# Y1 results +coef(model_Y1, parameter = "mu") +coef(model_Y1, parameter = "sigma") + +# Y2 results +coef(model_Y2, parameter = "mu") +coef(model_Y2, parameter = "sigma") + +# Scatter plot +plot(data$Y1, data$Y2, + main = "Y1 vs Y2", + xlab = "Y1", + ylab = "Y2") + +# Model plots +plot(model_Y1) +plot(model_Y2) + +# Save plot as image +png("plots/hard_sigma_plot.png") +plot(model) +dev.off() From a30340c11e46760e97131e4eacf0e5bd6e75401d Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:33:28 +0530 Subject: [PATCH 06/10] Move plots into plots folder --- easy_plot.png => plots/easy_plot.png | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename easy_plot.png => plots/easy_plot.png (100%) diff --git a/easy_plot.png b/plots/easy_plot.png similarity index 100% rename from easy_plot.png rename to plots/easy_plot.png From d75b6dcefef7bbe7b4ff3e68e25d67ea945ab665 Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:34:41 +0530 Subject: [PATCH 07/10] Move plots into plots folder --- hard_sigma_plot.png => plots/hard_sigma_plot.png | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename hard_sigma_plot.png => plots/hard_sigma_plot.png (100%) diff --git a/hard_sigma_plot.png b/plots/hard_sigma_plot.png similarity index 100% rename from hard_sigma_plot.png rename to plots/hard_sigma_plot.png From 18367520f6f238f3045bcdee9a76d3828423d8b8 Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:37:39 +0530 Subject: [PATCH 08/10] Update README with project details and insights --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d4257c6..cb1a567 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ This makes it especially useful for complex real-world datasets where variabilit * Cross-validation risk vs boosting iterations * Demonstrates convergence and optimal stopping point -![Cross Validation Plot](easy_plot.png) +![Cross Validation Plot](plots/easy_plot.png) This plot shows the cross-validation risk across boosting iterations. The optimal stopping point corresponds to the minimum risk. @@ -94,7 +94,7 @@ The optimal stopping point corresponds to the minimum risk. * Sigma (variance) behavior plots * Demonstrates how variance changes with predictors -![Sigma Plot](hard_sigma_plot.png) +![Sigma Plot](plots/hard_sigma_plot.png) This plot illustrates how the variance (sigma) changes with predictors, highlighting the model’s ability to capture heteroscedasticity. From 653754f46954129e2956fbe14f70e4509a6e1b01 Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:41:40 +0530 Subject: [PATCH 09/10] Delete duplicate easy_task.R --- easy_task.R | 94 ----------------------------------------------------- 1 file changed, 94 deletions(-) delete mode 100644 easy_task.R diff --git a/easy_task.R b/easy_task.R deleted file mode 100644 index 504d0ab..0000000 --- a/easy_task.R +++ /dev/null @@ -1,94 +0,0 @@ -# ============================== -# EASY TASK: gamboostLSS Example -# ============================== - -#Short-Explanation: -#================================================================ -# Objective: -# This task demonstrates how to apply the gamboostLSS model -# using a Gaussian distribution on the mtcars dataset. - -# Description: -# The goal is to predict the response variable 'mpg' -# (miles per gallon) using predictor variables such as -# horsepower (hp) and number of cylinders (cyl). - -# Approach: -# - Load required libraries -# - Use built-in dataset (mtcars) -# - Fit GaussianLSS model using gamboostLSS -# - Apply cross-validation to find optimal mstop -# - Improve model performance and avoid overfitting - -# Outcome: -# The model successfully fits the data and selects optimal -# boosting iterations using cross-validation. -# ================================================================ - - -# Install required packages (run once) -# install.packages("gamboostLSS") -# install.packages("mlbench") - -# Load libraries -library(gamboostLSS) -library(mboost) - -# Load dataset (mtcars is built-in) -data("mtcars") - -# Define response variable -# mpg = miles per gallon -# Using all other variables as predictors -df <- mtcars - -# Convert to proper format -df$mpg <- as.numeric(df$mpg) - -# ------------------------------ -# Fit GaussianLSS Model -# ------------------------------ - -model <- gamboostLSS( - mpg ~ wt + hp, # fewer variables - data = df, - families = GaussianLSS(), - control = boost_control(mstop = 100, nu = 0.1) -) - -# ------------------------------ -# Cross-validation to find mstop -# ------------------------------ - -# 10-fold cross-validation -cv <- cvrisk(model, folds = cv(model.weights(model), type = "kfold")) - -# Plot CV results -plot(cv) - -# Save plot as image -png("plots/easy_plot.png") -plot(cv) -dev.off() - -# Get optimal mstop -mstop_opt <- mstop(cv) -mstop_opt - -# Apply optimal mstop -model[mstop_opt] - -# ------------------------------ -# Selected Variables -# ------------------------------ - -# Coefficients for mean (mu) -coef(model, parameter = "mu") - -# Coefficients for variance (sigma) -coef(model, parameter = "sigma") - -# ------------------------------ -# Summary -# ------------------------------ -summary(model) \ No newline at end of file From 749ba8bc1638f7ca4fe0372ef13cda13c38a89df Mon Sep 17 00:00:00 2001 From: Mankameshwar Mishra Date: Sat, 28 Mar 2026 00:42:14 +0530 Subject: [PATCH 10/10] Delete duplicate hard_task.R --- hard_task.R | 120 ---------------------------------------------------- 1 file changed, 120 deletions(-) delete mode 100644 hard_task.R diff --git a/hard_task.R b/hard_task.R deleted file mode 100644 index 52e8e91..0000000 --- a/hard_task.R +++ /dev/null @@ -1,120 +0,0 @@ -# ============================== -# HARD TASK: Data Simulation -# ============================== - -# Short-Explanation: -# =============================================================== -# Objective: -# This task extends the basic model to more advanced -# modeling and interpretation using gamboostLSS. - -# Description: -# The goal is to explore deeper insights from the model, -# including parameter estimation and visualization. - -# Approach: -# - Build advanced model using gamboostLSS -# - Analyze additional parameters (like sigma) -# - Use plots to visualize relationships -# - Interpret model outputs and patterns - -# Outcome: -# The model provides deeper understanding of how predictors -# influence both mean and variance of the response variable. -# =============================================================== - - -set.seed(123) - -n <- 500 -p <- 20 - -# Generate features -X <- matrix(rnorm(n * p), n, p) -colnames(X) <- paste0("X", 1:p) -X <- as.data.frame(X) - -# Mean and variance for Y1 -mu1 <- 1 + 2*X$X1 - 0.5*X$X2 -sigma1 <- exp(0.5 * X$X5) - -# Mean and variance for Y2 -mu2 <- 0.5 - 1.5*X$X3 + X$X4 -sigma2 <- exp(0.5 - 0.3*X$X6) - -# Correlation -rho <- tanh(1 + 1.5 * X$X7) - -library(MASS) - -Y1 <- numeric(n) -Y2 <- numeric(n) - -for(i in 1:n) { - Sigma <- matrix(c(1, rho[i], rho[i], 1), 2, 2) - - z <- mvrnorm(1, mu = c(0,0), Sigma = Sigma) - - Y1[i] <- mu1[i] + sigma1[i] * z[1] - Y2[i] <- mu2[i] + sigma2[i] * z[2] -} - -data <- cbind(X, Y1, Y2) -data <- as.data.frame(data) - -library(gamboostLSS) - -model_Y1 <- gamboostLSS( - Y1 ~ ., - data = data, - families = GaussianLSS(), - control = boost_control(mstop = 100, nu = 0.1) -) - -cv_Y1 <- cvrisk(model_Y1, folds = cv(model.weights(model_Y1), type = "kfold")) - -plot(cv_Y1) - -mstop_Y1 <- mstop(cv_Y1) -mstop_Y1 - -model_Y1[mstop_Y1] - -model_Y2 <- gamboostLSS( - Y2 ~ ., - data = data, - families = GaussianLSS(), - control = boost_control(mstop = 100, nu = 0.1) -) - -cv_Y2 <- cvrisk(model_Y2, folds = cv(model.weights(model_Y2), type = "kfold")) - -plot(cv_Y2) - -mstop_Y2 <- mstop(cv_Y2) -mstop_Y2 - -model_Y2[mstop_Y2] - -# Y1 results -coef(model_Y1, parameter = "mu") -coef(model_Y1, parameter = "sigma") - -# Y2 results -coef(model_Y2, parameter = "mu") -coef(model_Y2, parameter = "sigma") - -# Scatter plot -plot(data$Y1, data$Y2, - main = "Y1 vs Y2", - xlab = "Y1", - ylab = "Y2") - -# Model plots -plot(model_Y1) -plot(model_Y2) - -# Save plot as image -png("plots/hard_sigma_plot.png") -plot(model) -dev.off() \ No newline at end of file