From b7e20e4e1f28322c0060c09065d3920630b0557a Mon Sep 17 00:00:00 2001 From: Muawiya-contact Date: Sat, 4 Oct 2025 12:33:36 +0500 Subject: [PATCH] Add integration with Chat-GPT APIs. --- SearchEngine/.gitignore | 1 + SearchEngine/README.md | 94 +++++++---- .../__pycache__/search.cpython-312.pyc | Bin 7321 -> 9864 bytes SearchEngine/gpt_docs/gpt_1.txt | 1 + SearchEngine/gpt_docs/gpt_2.txt | 1 + SearchEngine/requirements.txt | 13 ++ SearchEngine/search.py | 157 +++++++++++------- 7 files changed, 170 insertions(+), 97 deletions(-) create mode 100644 SearchEngine/.gitignore create mode 100644 SearchEngine/gpt_docs/gpt_1.txt create mode 100644 SearchEngine/gpt_docs/gpt_2.txt create mode 100644 SearchEngine/requirements.txt diff --git a/SearchEngine/.gitignore b/SearchEngine/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/SearchEngine/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/SearchEngine/README.md b/SearchEngine/README.md index 0b9ef85..7bf95f4 100644 --- a/SearchEngine/README.md +++ b/SearchEngine/README.md @@ -1,10 +1,11 @@ -# 🔍 Mini Search Engine with Stack +# 🔍 Mini Search Engine with Stack A **Mini Search Engine project** developed as part of my **2nd Semester DSA Lab Project (BS AI, NFC IET Multan)**. It demonstrates **core Data Structures and Algorithms (DSA)** concepts such as: - **Stack** (for search history navigation) - **Inverted Index / Hash Map** (for efficient keyword-based searching) - **String processing & searching algorithms** +- **GPT Integration** → fallback when no local documents match --- @@ -15,6 +16,7 @@ It demonstrates **core Data Structures and Algorithms (DSA)** concepts such as: - ✅ **Search History (REDO)** → supports `next` command just like a browser - ✅ **Document Viewer** → open `.txt` files directly from search results - ✅ **Automatic Crawler** → indexes all `.txt` files in the `documents/` folder +- ✅ **GPT Fallback** → uses OpenAI GPT-3.5-Turbo when no local results found, saves responses in `gpt_docs/gpt.txt` - ✅ **Clean modular structure** for GitHub --- @@ -25,7 +27,7 @@ Mini-Search-Engine/ │ ├── stack.py # Stack implementation (push, pop, peek, empty) ├── index.py # Inverted Index implementation -├── search.py # Search Engine logic +├── search.py # Search Engine logic + GPT integration ├── main.py # Entry point for running the project │ ├── documents/ # Folder containing sample text files @@ -33,26 +35,43 @@ Mini-Search-Engine/ │ ├── doc2.txt │ └── ... │ +├── gpt_docs/ # Folder storing GPT responses +│ └── gpt.txt +│ +├── .env # Stores OPENAI_API_KEY +├── requirements.txt # Required Python packages └── README.md # Project documentation ``` -## ⚡ How It Works -- The program scans the `documents/` folder and builds an **inverted index**. -- When the user searches, queries are **cleaned** (lowercased, punctuation removed, split into words). -- Matching documents are **ranked by query word frequency**. -- The query is **pushed onto the Stack (history)**. -- If the user types **back**, the last query is **popped** and the previous one is shown again. -- The user can open a result to see the **full content of the file**. +### ⚡ How It Works ---- ++ The program scans the `documents/` folder and builds an inverted index. + ++ When the user searches, queries are cleaned (lowercased, punctuation removed, split into words). + ++ Matching documents are ranked by query word frequency. + ++ The query is pushed onto the Stack (history). + ++ If the user types `back`, the last query is popped and the previous one is shown again. + ++ If no local document matches, the engine calls GPT, saves the result in `gpt_docs/gpt.txt`, indexes it, and shows it. + ++ Users can open a result to see the full content of the file. ## ▶️ Usage -### Run the program: -```bash +1. Go to base Dir: +```batch +cd SearchEngine +``` +2. Run +```batch python main.py ``` -### Example Session: -```Loading index... + +***Example Session:*** +```py +Loading index... Index built with 5 documents. Enter search query, 'back','next', 'show', or 'quit': ai @@ -68,10 +87,16 @@ Enter search query, 'back','next', 'show', or 'quit': cs [Stack] Pushed: cs Searching for: 'cs' -Found 1 document(s): -1. doc2.txt | Score: 1 +No matches found in local documents. Using ChatGPT... +--- GPT Answer --- +CS is the study of computers and computational systems... +------------------ +1. gpt.txt | Score: 1 -Enter document number to open, or 'continue': continue +Enter document number to open, or 'continue': 1 +--- gpt.txt --- +CS is the study of computers and computational systems... +------------------ Enter search query, 'back','next', 'show', or 'quit': back [Stack] Popped: cs @@ -80,41 +105,42 @@ Enter search query, 'back','next', 'show', or 'quit': back Back to: 'ai' 1. doc5.txt | Score: 2 -Enter document number to open, or 'continue': next -Please enter a valid number or 'continue'. +Enter search query, 'back','next', 'show', or 'quit': next +Redo: 'cs' +1. gpt.txt | Score: 1 Enter document number to open, or 'continue': continue Enter search query, 'back','next', 'show', or 'quit': quit Goodbye! ``` ---- + ## 🏫 Academic Info -+ 📖 Course: Data Structures & Algorithms (DSA) +📖 Course: Data Structures & Algorithms (DSA) -+ 🎓 Semester: 2nd Semester, BS Artificial Intelligence +🎓 Semester: 2nd Semester, BS Artificial Intelligence -+ 🏛️ University: NFC IET Multan +🏛️ University: NFC IET Multan -+ 👨‍💻 Student: Muawiya Amir +👨‍💻 Student: Muawiya Amir ----- -### 👥 Team Members +--- +## 👥 Team Members -+ 👨‍💻 Muawiya (Team Leader) +👨‍💻 Muawiya (Team Leader) -+ 👨‍💻 M. Umar +👨‍💻 M. Umar --- -### 🚀 Future Improvements +## 🚀 Future Improvements -> + Add ***synonym & fuzzy*** matching for queries +> Add synonym & fuzzy matching for queries -> + Implement ***OR / NOT*** search operators +> Implement OR / NOT search operators -> + Enhance ranking with ***TF-IDF instead*** of simple counts +> Enhance ranking with TF-IDF instead of simple counts -> + Build a ***GUI or Web-based*** interface +> Build a GUI or Web-based interface ------- \ No newline at end of file +> Maintain multiple GPT files (gpt_1.txt, gpt_2.txt, ...) to fully integrate undo/redo \ No newline at end of file diff --git a/SearchEngine/__pycache__/search.cpython-312.pyc b/SearchEngine/__pycache__/search.cpython-312.pyc index 608fe814737be440846c3792ca9168f29bffe98c..26d1e2954670deef43896d70783e3be4e5cca4bc 100644 GIT binary patch literal 9864 zcmbVSe@q*9mY=a_{2L6|4j2fGLlSU8oF*aUNAqJTBq7;`{2+-gELqF(3}7%enHfUx zn!D^?PD^^(3$%NJ?6wNMJE>HSZmCwfrhlAvyNyn%_K&q|Q@bN7MYpGu`X5L3)N87f z?!6h$j6;3xT?g^z`{R9o&AcC<_r||3E3*-Bz4y_Rq3UXa_#;--iz*bJ=Akf0umnp+ zi9Wm=qU5Nd&p=|?7&VSkeUx0LqV%Y#&m@=WsCm@VXCVm#!CBrZOeNQd;VN|r{mz=+ zA(dL6jk5_B7-eV8YTHjpsGB(31B=?y=U}aUWvs2Q9D0c127M;A#Fex52ab0PN*z}| zL9k`F2)0~p>#LX~{m##kp~p{3bgvi+k3ga9Wb8V}iyV70#&S2H!E|Pfi*=lYl06y^ zu|YO2a1w z61p1Y1RqRk4vj^EBiv-d9@xpo!{f*yBy545gJUAB6c#Q3lZCqx5yW78B&;3}?}f7w zPhW=096X0Uk|p{KEZJw|D2^U5u!dV?pNTczBKpiM1=PaQK&`9^sEsuPwX+tW@HBzK z^8{KxY4z4l!eVe2n86IC*?0&$019*jTh}+%?vpek|`=EWCGuP^BbE1hqDiR!0`~YO|H> zRppjQk}#7tK52lONu6zwfSOsYC5homwXENZR|mAR2huKB4Q9b!XQ5)$4mC& ziY-_W!?55tN|eAw2lxtDgvUb&xE#YU&w9fL`!5O{FZ8zw%+*jL5#syL#JDa#a-Hk% z;)D?~KGu(t&Gg2{`7qbtbG&oMSw23@g+-xXdD+Ul?iXZP2#ifimY}k?V4_MBkbv6e z#pO=}xk;>eHoX>{rta8Qs_S2yfP!Pi?S1V86s-9g25x2Qt=fyd`MSnj-L7=qu1wvY zX)0@}$y*$EowuFK7VlG-kO*rM0Tm1U%M$buTIUE|f`(O%z#)YT-k2s4Y8%K}VZJWn zE*Ulq5@ExY7J`@}ljN`_6*@Ods{8_Z#{)c?JQVR5ig=onweLxw!^MV&G#MFRCfR;9D)-Xr$|xL zo}`gAJus^lmZcJ~d@np*68@*iiK3FIyxUnhrY+;AIu+_Q)hdB??_#| z^pP{yeI?y}CH2Elru%9t%%;1!T=!_Ydvv)w{_{OE*XAZ>C+3CMk_#QPf03(hO;@)r z_GYSgE&0;bZOg8`sj9JQGmsT$)y%$yj;wR*VrAO7EoIsE@>NPfL2Wf-t@4#r zTX;jV+J2i#g4%+>*61pE(42ATyurypky^@+SV2!APQ$}6MzGXH|P4rbt zpA*(7?2DyX`hn@4!n4{-Oi?hR1b`~#mXgLKHH-qf?othex&{C?GyduIJ9nufv&vdT zJg9snH8b~1g0;S(nGbM%W?jvjh+Tw0PJr59CP?_Z^WF{PWnzMCBE(vCrp*LjLl&=M zQw*2ZI^u6Er*tC53%&X^8?z7X!faE>3YlN}v$PBqs zxAFeId;4;A+tYR17dtX_fyHY81!gQO9`Ea$?l<3SzTbMURk4^$p-k=mhfR;oSOd){}wzpl%=4}W0<4w-G0hNb>wA6y?0C;yqR%S`;^hR$~C1DZl=?P!FS-?eP-e3AapwF$}}w$n&oq`N5VBf<>jN9KmE z7UQF466kYA6BS%Gng_w5&7pZj$0lF8*&NnPrrM{YW;BA}@i&5>*D!AhE<+Ncf)T(v zyhGHGj*dJ~je&!bIV!=_lrXEhO2$qAFRk&Gz!yclyMWgu^VY$<4t;NGX;{N5YKwk4 zl8E7MN*Pei9D|wpC7=rL=@7(Q(Rh3elo6s3fWV;tW8-2VvEOzK(KMsrS?1a}$4|C0 z%^0~f!#xHXhIe5oJ^^F_d}K_bqwxuj=Xb$jBswVb(?J}-mqQbeA|#oE%2xbF1!>rDw}7y58e@2rRl&Rr zGbAHF9!r$z>Il_+FpKb0AUBC8T$F9k~i$y27_y z(ewoPp+4W2t!P3 zAZkL&f)apVG8FqISn>bu+1CY!_`=p*!skV+7I{Gy{WIF{br_I&!;HepC&2R#F` zVXB8v=OghgmU^Z>b*z3*8it$nnp$FM)~e11>lEY#=KWS~*2dajHwytErT z_aZ^xz_%eAJ;6u7%(r71k}6}-P?(df9K>!M9}=M$L==Un$!}5YE-kwP$$}tAhC`;1 zd`hJSF_;XRBr?M9fLR3;85t*OQ0WA;Br0{=TrljXkedfrev`nAhU2y)XYr;j-uW$A z3!|k(Rscd(Y+Bg6(7afgD&L;9w62%w@(vt#=BO0^f`z`5jXa-r+3 z({G+$+MlUDG~K;Y>6sZ?FfI;eE877kl`#*&8C1@hXU+4bpF06Pl`&5o6?aeFK9zGc zq#X_OH*$?T(~UcqOb-p2#up!6$~wC8n_F|6+tQocmM%R!pV|Dq$Gg(?r&D!3(_MF7 z#$CF%Ew(SVq;?-qRh`H>y7P{ToTL6@NBshscKDVZ^#HXz4fE{%=)LHYFY9hwBS`D_ z$lG1h9n;rf{LHzT&KZ8*H-GG2%Y5y^ktN^KiFdax)u*bCEIW>_8j<-4JlgWx<;#|u zFFdei`6SN-gcu*%s*cwf|D}dL?t}0I8q^DeOEiH`{}qUgx6~e@R2HdJh*~;R#2S($ zYeW-O2;C@UVle%NVa?QNi$Umx#-XG+ErF4G!6b;9X@U^SIb~!mX!PDNgbkM=B%3n+ zxv{r!pMZ=@!C=@D4d)Q<$SOQ8TLR4eb)00v(FmlR1Bo)wsU3R)dv}QA{MEQbArt{4 zK=JV?Cz&x518jo>^7ZdWKYt9n_EQo)8fUqvWEtfIAv6dn61&KaLiRTVvv86PvM50@ zKElNWIlxMI<#2=vg~PyKWR4CpJ${R1QuZQIsN#}693LHva$+PN6QB#?8_->Icqkr$ z!IHVKB7YS&B3X~!2ynjk0>%#-sSdyKgC-Koej4a=u>(q#EX$aF_%5--H%1HqsR|JT zIavmAiRN~TK#PGjCPDsg7)Ahy@HIv5u%u+GfxgWBix{K{VsoV-1;h--1V9bc5hW`+ zWlS1NrDa&nx$=(E?mK3O_7czx;E@K&z61E8WXNH9LxWq8O0hh!76U91J*|APHksQF zkV{K(sC|001F*qS3>Mfj{L^7l%w9BOx%xB#RQ&hX0E10SylCIZz-p8Oq}KQ3)MvO% z+-VXSbw^+0tay%dzh+%36HRDMm=NT1*$8ASLX!|=Md3>V2MvYfNFcGZ5PzvHMVVgo z$V$ki=6o^ivZxgqlE`2}M=i1|)^SUMmLW>QDuWFGB{u#X9Hb}VgztHwD15+DlDC0G zlM)4n;6t3=EQfpQa?OE+6O$-<9w*?hFyh%pbt8!c!;9$l^K$oR}4khb+8R`jFkp;tN zh^Xcoa&MGD2L^?n+Em@?jHf5(xsdi;$apU0JpE};f5!8}>En5)XTJ7hXA^{gs|H7z ze`SMre(P%^Gse8jJ#WmqUU*$tXnX6>n};%uFD$!W$k#T=sTg7L+?x~W+MP>uy0(3$ zGvB)NS3Pg{JZ#UlzBJ!<|L`vkKc@ar_EFhP*NS)J>sRj&-Wx>szSy?ZxwQ8owN#&J zIPzQL*GQC_l2hyY?9$`CLfbNubZ!T?|TPyPJe( zU7Y9!I|G(@{Z$G>sQSc`vMnsm$!LjI2n3A(%Zys!tFNVo50DQkX?siAPTi8rp*;3rdYN(ppF^9)_9uSmd!}Wu5!0}vq zhd&0~31^W$<4OJs8iiMYK%&)NJ}ur&-cIJ6zO>Vqv2R)keQa-*BY|C6$L_qNDqpo> zu64FmPxY8F>agCnB0Qd%$XXg9GMMY1?N_3MjHhMV2EoS+WN_wRUL;fH+p?DJ`Rcl& z$fGs`0R?`c)iAb_;{lP^E=>{k%ZdOmOzYAMrE)x~utG8t=a4Tw-3hN2^CfCW(Y+M( zR_BrhzXZ@O_4%_{@QZd}l(R_GIw39r$4jLw^*KR^wSB-jG?1YmMBOu<)K4es&ckm5 z^bex=1Bw_|kl#cFWb}s+GJ|a6ZwP!n35yK}XrgM9t$iwoONtLVFXdT zJbQWW%Ip>REr9L$_!k6i3re1Ke%pNg!nss=bJo)Gf0Dc&_+kP-L%=XS7?fVlB&|?mwFN9`G*O|`X*^Ud{k_8eUa*3~jz003%c@LIOA;A!y$FP7$y_MCG z$J4(}$P%wOT|0Im!Q7mHzWf&PDfRs)Ov}>#r-X&BU2{|P_BDrtcCFP{(*CurHMDoF z%}(!GtF3}+i;ea^t+CQxWlo$4kcAI!a@Hg~qNGCb+m7-h5)*p$P=rVQkW9)?k9dRv zl4+2`-y!iRW4wT52+3a}xdjA*GWhfaAHM~8!#tW2{UpyUhm;3*g#T;kKy?=4K)^YY zC8LeZOw~dv&#oK#2d>=E&80LVkxCBM7C$(=(7*ATp7; z1R2DqEy0c325lUs`2;^M3<{7I21OgWL}c+@B3C`M6rUnA28D&;&gF9i^j$v zQGGdiOQD(~Urx%(?TkBoR8Gax<4RIfW5t%mxMYSrJq!5-qL4vOCWAJa8{`#Xn3HYq za)YAGze@(IWC3D3c0cHls~~pDc8FcF17i1-Gvv~p14@*}Mh4>JnYQzi8XuoXD3Z#` zBqez>o`hU{Je`PY@l-M#GdjniV48>jr~e4i0?{~lu<)?-%#w&%rirCOQuY>GvrzG^ zG20}|fhDv3O#-mr_ zn22g4x}7R&I-#k$7)hj}a>%9&s*)I{ZfK$6_e#?CKCg|8 zN*|5iQil2zbxcc54E=CINlF8$G>s`k{TF+WT&1Z|C8nuE#p%AlW_GAJ&*6zF-4Thz zlW{E)$<&(*5;n>_*n1B|?~qM@+xwAe;jU}5Y0vwUknn8!0`Fgfgmc5!%pOevEZ%T< zW?eI`_dHLb9f_Ii02w>{vvr$*FDhR*nR{5O#}Oyd%+(n+R|0Jn1I)X*21XenM*BW8 z%f)OrNzC?UBO!OVEH~=3Y8ffnX4F@?yZ$UUZ@b$pkUKW0Z&TgYev)~k9!IjaKJw;! zXj3!yVx^Ic;*#wOS@Zj3Apr00@LIJ|<$tkOVUJaw0qCPNE?JbTer|tQn#B|3jsPvZ zMt|0Gw82UnsVturM*T+4dK&xM=mR*$fqz#1?yOLTMA@k|8LhMCZ2uFHT_2Xs>69Y| z%oT%qpCkv!aiVgQkiSki_`Cb*ZT>o$uRwOl!kOPJYbk4kZ~&q0RnG9V1%St0qmx zwGk=fwoHUGt$hanmdr`X6m*H51w%EuI6>n{O}C{~-JSq0k>ix{1a={-Xis;j6Nxx} zq%Nd@5OiTQ6;DzJj>#UCBLvQE4ky3>VDnwuPvWlTs!rHpS+&;U75ZB(bo;ln_hpu@#TW|{D!|} zi}2N_=D5wq=7pQ{H`g0GRvJ4N-(GDDEd^H^yXW{#f8c|*`bkFE#n-W@F7^HL^2e7S{-EGH`3(;>ShXi2 zboECEsjkoa+W#fC)c((-i$lw;`TFmzdCva#RxR5YX9kvRm2{JwH?#GeSIhRFM2qLchD&UoY83yOz;8{XNg1!$O8iyqb8X`?NTrRGHSXu;|&mL zDT&l6+s3+JWjz`aHw)#2IAw<#0;^F61XF=#4O<*ZUWU4&S=VjYvz}#j{|xF@vAV5b z7k$vK@a>K_*^uQZj;L-|Gss#*3S=BICG}O362-l%OtP z6J5X-;M<{of5?ftU?Q#QqN>sOgf1phlM1Cscc24UeKCR!C{8Zz#&Ub4=(|I+%=4`Uo_Wu@H@M;r zu6Yl9UDt|@peQ$3@OC|ECAAF{7iWj4!%t+p`2)6_Vf~RiPQhn^GQbXw}%<~q0Qz-f+0wb%%FTjs>V{h2D}YNSlH>G!(S(CI%Q zfq}0CJVdFsZ3|1Z14_1Qn9t2h1F;lU09&GpiI9`w0u*#w9bu>`dbp}0LMSo^4MLJXjD^lX9OK0I>3-nM3BfZt#fh8ycJ4L%d5QrxlEk}qZ0Z~K(YEML;xkwyPg@( zx+AdS2;|xe4rvoSowsfA#A4@CUB2dU!O{J=sdXVUpIL9}T50N9@~<`>WlFiWVK(~@ zS>`R;3!aV*PdzxvxvHP4xt{xH?wwg}>dJ#(+ncX>so*%d>D!x=?jwPlPcn@o~@aw zS#vZO;b*!F_z=_v41O=d_W3__Cft+<9F^%%ETG-7$$Zupv)#6hwZOVHYoq%nR)JiIN&U98=V>6!=fLN{~{+QWV^10#2ifY{EXAIa>0t z07-V~AOTiyfXCHR5<<1;Rnba3nO4xVI*bfG2?JydLGdWV@JsQ7D4hLBMsyuHazx5F z8F@g`m1)3|U7kl%N$(EKBi)oF~to!x>HCFd=?9A}WSKi*uhJ#oVVg;RFf&Snjx2+z7DOo;$y0@GB z=1{O#;y-u$ApN-{_J-`Ax7#t@?dxmizpyo6+{8h=Lw=irWe;OaOMC+hLCNk==`O>- zk<2j`D;L`4nHg@9_~%$O8Pe*@aMU2*D~gp-^WMQ~V{tIta#S9>DMGAPW=}SY&1Hc} z4Y6FnfW_~6_h5cAh%QrTL3{Mey8?(V(}%zfXUUV{T$SBBRBS18a5qv~pEAM313#M< z89O_&g6u2_CSYpW{anF>FLniEc5p;+^U!`N1vhrFF(fs-W|*MLt#~S}?l@M28J}hL zf?V4DK6)z}PegAd6p&7C*DFO!Ag}0)&@X)nqf;3DIYua%C|(rP??Du*E|L#!7C3Kc z%D7rQVek{F35wH9@je$+l%k9=0Y%SYAyZE;V+vWt;30b~O)2oA?!qBPm~^7OSc{3L zU3`QX+i@p=bMRMCsQw2;a3|odnbu~rGuidp;7V<9)!nuj{nFjZ6w&d5=LJwikLw#3 zy63ys>)Tf9+g9rjPTTQ<(K+KpB{Mf!aO~gk2Ns6rht~ZaEB=mEf7i4N1lSy$HF8&$ zxO~l_g5&T`{nfJC)bot~TJv=LzB#yf@T2Yr-OJHJ^T{HO%nNhe+;QNJTwRXOy^xD8 ziuXoxgNuK+9DQW}WOR8b-}uU!w--63k9+L#&i2pruX*-7tFKJ9*nqvdfH;F0*c~bz zZ;YJ;$8htwZXNGof7za6%iPUYFNQ6~4ygEcg>S#R1hrs|fM>@mVHeWB<6+%m96-7` zj>=52<9JneSedeE=KW>mmh2YvS)@OEG;+Xk$U-{*El4-jk~MM0oZ?CV(%b$5F7+A4 z6H!%@;65#dzho4D*I;mg+eNr)8^JRYumM&N_GcU}OzcE7Q#a3rwVPpA7nSh|ZK|js zuE3~@R|SaZ=VQ5a@Nwbq56?hYfTq^@=Q!kcAQ-PB#-LFBHAJ8$+_kf(W=_qXn>kl- z?_t0_Sn#wXZ~>Or=dUlkIsYd7aNt7JTE`>+WjmtOnLCu*v-oHXHDX{(s)JNVow)aD zhgaNNw1cffb|xL!0Xa6teorbcGp-u_ON>}g$gx)M+m18Ek2*^S;RQNNH3$)~7RUXL cIDbo;eoOYl`Ox8iNBBmd9JhCiV9ZSX58EHsuK)l5 diff --git a/SearchEngine/gpt_docs/gpt_1.txt b/SearchEngine/gpt_docs/gpt_1.txt new file mode 100644 index 0000000..f05477c --- /dev/null +++ b/SearchEngine/gpt_docs/gpt_1.txt @@ -0,0 +1 @@ +AI, or artificial intelligence, is the simulation of human intelligence processes by machines, especially computer systems. It encompasses activities such as learning, reasoning, problem-solving, perception, and language understanding. AI technologies are used in a wide range of applications, from virtual assistants like Siri and Alexa to self-driving cars and advanced medical diagnostics. \ No newline at end of file diff --git a/SearchEngine/gpt_docs/gpt_2.txt b/SearchEngine/gpt_docs/gpt_2.txt new file mode 100644 index 0000000..29c8f42 --- /dev/null +++ b/SearchEngine/gpt_docs/gpt_2.txt @@ -0,0 +1 @@ +Islam is a monotheistic religion founded by the Prophet Muhammad in the 7th century in the Arabian Peninsula. It is based on the belief in one god, Allah, and the teachings of the Quran, which is considered the holy book of Islam. Followers of Islam are called Muslims and they follow the Five Pillars of Islam, which are the declaration of faith, prayer, fasting during the month of Ramadan, giving to charity, and making pilgrimage to Mecca at least once in a lifetime if possible. Islam is the second largest religion in the world, with over 1.8 billion followers. \ No newline at end of file diff --git a/SearchEngine/requirements.txt b/SearchEngine/requirements.txt new file mode 100644 index 0000000..43c70a2 --- /dev/null +++ b/SearchEngine/requirements.txt @@ -0,0 +1,13 @@ +openai>=1.0.0 +python-dotenv>=1.0.0 +# Python version +python>=3.12 +# Core libraries +openai>=1.0.0 +python-dotenv>=1.0.0 + +# Optional but useful +rich>=13.0.0 # for pretty console output +typing-extensions>=4.5.0 # extra type hints if needed +# Development and testing +pytest>=7.0.0 # for testing \ No newline at end of file diff --git a/SearchEngine/search.py b/SearchEngine/search.py index 7ce599c..8afae44 100644 --- a/SearchEngine/search.py +++ b/SearchEngine/search.py @@ -2,72 +2,116 @@ import os from stack import Stack from index import InvertedIndex +from openai import OpenAI +from dotenv import load_dotenv -# Path to the folder that has all text files -PATH = "./documents" # to base dir +# Load environment variables +load_dotenv() +api_key = os.getenv("OPENAI_API_KEY") +if not api_key: + raise ValueError("OPENAI_API_KEY not found in environment variables!") + +# Initialize OpenAI client +client = OpenAI(api_key=api_key) + +# Folders +DOC_PATH = "./documents" # local documents folder +GPT_PATH = "./gpt_docs" # folder to store GPT results +os.makedirs(GPT_PATH, exist_ok=True) class SearchSim: - """A simple search engine simulation.""" - - def __init__(self, path=PATH): - self.index = InvertedIndex() # for word search - self.history = Stack() # to store search history - self.redo = Stack() # to redo - self.path = path # folder path - self.results = [] # last search results - self._load() # load documents into index - - def _load(self): - """Load all text files and build the index.""" - print("Loading index...") + """Hybrid Search Engine: local + GPT fallback with proper undo/redo.""" + + def __init__(self, doc_path=DOC_PATH, gpt_path=GPT_PATH): + self.index = InvertedIndex() + self.history = Stack() + self.redo = Stack() + self.doc_path = doc_path + self.gpt_path = gpt_path + self.results = [] + self.gpt_counter = self._init_gpt_counter() + self._load_docs() + + def _init_gpt_counter(self): + """Find the last GPT file number to continue numbering.""" + files = [f for f in os.listdir(self.gpt_path) if f.startswith("gpt_") and f.endswith(".txt")] + numbers = [int(f.split("_")[1].split(".")[0]) for f in files if f.split("_")[1].split(".")[0].isdigit()] + return max(numbers, default=0) + 1 + + def _load_docs(self): + """Load local text files into index.""" + print("Building index from local documents...") try: - files = [f for f in os.listdir(self.path) if f.endswith(".txt")] - if not files: - print("No .txt files found in documents folder.") + files = [f for f in os.listdir(self.doc_path) if f.endswith(".txt")] for f in files: - doc = os.path.splitext(f)[0] # filename without .txt - with open(os.path.join(self.path, f), "r", encoding="utf-8") as file: - self.index.add_doc(doc, file.read()) + doc_name = os.path.splitext(f)[0] + with open(os.path.join(self.doc_path, f), "r", encoding="utf-8") as file: + self.index.add_doc(doc_name, file.read()) print(f"Index built with {len(files)} documents.") except FileNotFoundError: - print("Documents folder not found!") + print("Documents folder not found! Make sure './documents' exists.") exit() def run(self): - """Main loop for user interaction.""" + """Main loop for user input.""" while True: - user_input = input("\nEnter search query, 'back', 'next', 'show', or 'quit': ").strip().lower() - - if user_input == "quit": + user_input = input("\nEnter search query, 'back', 'next', 'show', or 'quit': ").strip() + if not user_input: + continue + cmd = user_input.lower() + if cmd == "quit": print("Goodbye!") break - elif user_input == "back": + elif cmd == "back": self._back() - elif user_input == "next": + elif cmd == "next": self._next() - elif user_input == "show": + elif cmd == "show": self.history.show() - elif user_input: # if user typed something + else: self._search(user_input) def _search(self, query): - """Handle search queries.""" self.history.push(query) + self.redo = Stack() # clear redo stack print(f"\nSearching for: '{query}'") + # Search local docs first self.results = self.index.search(query) - if not self.results: - print("No matches found.") - return - print(f"Found {len(self.results)} document(s):") + # If not found locally, ask GPT + if not self.results: + print("No matches found locally. Using ChatGPT...") + filename = f"gpt_{self.gpt_counter}.txt" + answer = self._ask_chatgpt(query) + filepath = os.path.join(self.gpt_path, filename) + with open(filepath, "w", encoding="utf-8") as f: + f.write(answer) + self.index.add_doc(filename.replace(".txt", ""), answer) + self.results = [{"doc": filename.replace(".txt", ""), "score": 1}] + print(f"\n--- GPT Answer ---\n{answer}\n------------------") + self.gpt_counter += 1 + + # Show results for i, r in enumerate(self.results, start=1): print(f"{i}. {r['doc']}.txt | Score: {r['score']}") - self._open_doc() + def _ask_chatgpt(self, query): + """Query GPT using OpenAI client.""" + try: + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": query}], + temperature=0.7, + max_tokens=200 + ) + return response.choices[0].message.content + except Exception as e: + return f"Error accessing ChatGPT: {e}" + def _open_doc(self): - """Open and show the contents of a selected document.""" + """Open and display selected document.""" while True: choice = input("\nEnter document number to open, or 'continue': ").strip().lower() if choice == "continue": @@ -76,48 +120,35 @@ def _open_doc(self): num = int(choice) if 1 <= num <= len(self.results): doc = self.results[num - 1]["doc"] - with open(os.path.join(self.path, f"{doc}.txt"), "r", encoding="utf-8") as f: + path = self.gpt_path if doc.startswith("gpt_") else self.doc_path + with open(os.path.join(path, f"{doc}.txt"), "r", encoding="utf-8") as f: print(f"\n--- {doc}.txt ---\n{f.read()}\n------------------") else: print("Invalid number.") except ValueError: - print("Please enter a valid number or 'continue'.") + print("Enter a valid number or 'continue'.") except FileNotFoundError: print("File not found.") def _back(self): - """Go back to the previous search query.""" + """Undo: go back to previous query.""" if len(self.history.items) <= 1: print("No previous search available.") return - - current = self.history.pop() # remove current - self.redo.push(current) # store it for redo - prev_query = self.history.peek() # now last one left + current = self.history.pop() + self.redo.push(current) + prev_query = self.history.peek() print(f"\nBack to: '{prev_query}'") - self.results = self.index.search(prev_query) - if not self.results: - print("No matches found.") - else: - for i, r in enumerate(self.results, start=1): - print(f"{i}. {r['doc']}.txt | Score: {r['score']}") - self._open_doc() + self._open_doc() def _next(self): - """Redo the last undone search query.""" + """Redo the last undone query.""" if self.redo.empty(): - print("Nothing to Redo!") + print("Nothing to redo!") return - - redo_query = self.redo.pop() # get last undone query - self.history.push(redo_query) # put it back into history + redo_query = self.redo.pop() + self.history.push(redo_query) print(f"\nRedo: '{redo_query}'") - self.results = self.index.search(redo_query) - if not self.results: - print("No matches found.") - else: - for i, r in enumerate(self.results, start=1): - print(f"{i}. {r['doc']}.txt | Score: {r['score']}") - self._open_doc() + self._open_doc()