From 757f111f0f3a91749ab35aab643e04c5a270dfd5 Mon Sep 17 00:00:00 2001 From: Muawiya-contact Date: Tue, 30 Sep 2025 12:22:13 +0500 Subject: [PATCH] Made path global and project portable with local documents folder --- .../__pycache__/index.cpython-312.pyc | Bin 0 -> 3177 bytes .../__pycache__/search.cpython-312.pyc | Bin 0 -> 6142 bytes .../__pycache__/stack.cpython-312.pyc | Bin 0 -> 1832 bytes SearchEngine/documents/doc2.txt | 5 +- SearchEngine/index.py | 43 ++++++--- SearchEngine/search.py | 86 ++++++++++-------- SearchEngine/stack.py | 4 +- 7 files changed, 83 insertions(+), 55 deletions(-) create mode 100644 SearchEngine/__pycache__/index.cpython-312.pyc create mode 100644 SearchEngine/__pycache__/search.cpython-312.pyc create mode 100644 SearchEngine/__pycache__/stack.cpython-312.pyc diff --git a/SearchEngine/__pycache__/index.cpython-312.pyc b/SearchEngine/__pycache__/index.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bdf7c8f56813effff3f45912855d3c35b4537008 GIT binary patch literal 3177 zcmc&$O>7(25q|r}KT#D;iIl9!?Z%QLv6hKC5&WYnYGg}JrND|4DLoJjwrlQ_OqwF; zw@ce%C_q4hhHPC}PLNOr3RnS(R)$@~ha7as!H3>@$%ZMYY;!P>9-5mf6=>p9XLdt> zh}&u&&s?U=pxSswqgM;ZM`S%YmrGMQS)YdvEDXw5vP_eY49i+Jk)NX(1N$?KUdq!< zVm_K^d>_NWSOs{tf*>j+Ns2^)p3~+@gqw_TmSp>dj;Pc0jE?s5!`Bmo@i%nJ^!R`- zpH~Y7mBnAp&{3AWOyi?ezi4E0@mF&+BTr@XEJ5RwXHFkE$FeD!F!cBoRaxS~SY|ev zp>f-NG&gT~6eXES8j4Z~@7im$F%QWgdJ~96vf(1(j*|FW-xda&z)dt7YT28bhEOY{ z$ftI0xvq7m{!Z<*tj`&ZB$E{SdZcNb#u=W*WXgt0W8}Q=)P3Hme|JGB3aRGZIyh>? zupk!2QS#<%ge*u!sYp^kcwF}~$67Ewd#x3vAuxE-+J0C|?R(r~k<<7FST&*W6c0rX zzj19!B=cfKDs+u#nyh{gF(WG*7Z8eoz^FwS0_z*x{$#{!N$Knr$}F$J)Qp~14QdIx z0pmIKBDLo%7mSmcS*t0R&m@ezY9zB6OVV@cq`_R^#S&(z5pgk0ha~{8EzzJ?4a=8E zQ#GU5eF<@|TcZ6!BiU{YQ)RhTO;fc52!@HC^q0qZ;}}$p;wR_e?Dyn*q5RI9QS%wC&#{qIZldjjp$} zE)U!qyg6vL99r)_v?Nx1H+;(jp!uMu?~fB7O{^Mb&p@r`gxPaqt#wVU_PliesM#}G z8ofSIo>($Wf7kNV=l*B^=|z7GL6)c!3v>zD#`qs#02t@~?-=JGhW~=`DH7oeeLtt^ z9A)whW*Jf}18nRJ%DPPmGH?`2Qa#ErArUvji@;h?1W+J~8B!H%MbQQ#!aJal&~#qM zGm>Q|=YGXOhrLqh+@1KHYn@=E9|y5W9t1*_rW;M~H`fBaW}x@3@Nx6)=6mPY4t_dQ z8$M?apR4x0Rt>yflGa<=uD?}&>o*IZdAc4?55mu;pikfqyi4A7S4f%cm7*fIO%i;O z*ZEm81%#z>cxl06XUbs)8xVo5Wk}BWNqP-0_VwR`#v0_a{e1x}cunXeKt(SC6#*$d zF#>TG;K*UD!c*fA|JMi#a8?V#wV#P(7Ku=RI1Ur?Yq#NT?6l-Akr^H;qo>G^Zqw)X z4qQbk)zr}a{#|w#p^pBJZ|HI4+7Xc$+h8q9nhPqo`>v-!+yGW@;iOZfp`UDXyd_5wUxaS$KyuMiHeE_t&X_RW=r z6wE%_iZ&AVDpF-$BMtigkgIU8t3yD}+@FLi99#-S&Lq4Mi7W&nB3gX(Ck(O9!YB|E zI#xPaUR<bd**^ZA{l&K<{TL&L0W6=$GP>)WFm}A)TgU=< zLe70a3YlbZSmZYRnONLa=U2oD-7}O{=gw>DaN)V9L|)fB27Ywp)kU&8vO0TvVv*SQ zcsjyc!bLi7xfzB3Eu8`l{SegZbx5a%CBm`}H7q}8xzrr|<7iO8viNHj@z}@84uFB> zzN9!M2|Vffw4vLFZ@HifUZk465&H;qtWdv{?e>yGT`uRm3vGMtrK9D+<>_0on=!Mg*Pi|=7+TUQsT--4_SJnK zb(-P+TKK3LK3WaGPz?^0++Y^!S{_+Gwe-p+5xg%xQpno(zvo>KANaknGPGW4^?O(Ky-$zkx8!wHm`-9idmd~zq z-;>PFV}JD@-;hA>Umg+&FU^<-kKGr)Xc_y=GiK*xGUCCX!;Csd6~*!?%3M~ zluLOvU0-o4SmPCiA!D#-P}r$^9(#vSV8ZPqW{7-SU`I#YcgUCG$fhVr9h*MCG{$Xq z3DSWrU%Pb1DZsWiz}7B6w)t+C4?ROpT*C~D3I6@)SfVxrj^n;2&Homk=gx4O1f?Cw Fe*mAbr_=xd literal 0 HcmV?d00001 diff --git a/SearchEngine/__pycache__/search.cpython-312.pyc b/SearchEngine/__pycache__/search.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12f68c5d83fb5ed16f8a7beeee5c012b69628afc GIT binary patch literal 6142 zcmcIoU2qfE6~3$8)xTxQmTb%Xtswpp7y*e%fuV#S1A!2S#7QPN&Zvuajb&uX*Ow=2-fi ztZ1@yEG^0Bbx&-Ylu3*X$!SeZ6nhryk{R-77u-(}ne;Le>9t8rFDrBXjAXmW^zssW zk@VUn4r&Ju-|Lj@P`e}t)b257)MdDOWKl^B^dyJ$jRyoZIW(M-1(mi5a{5d%4R^_* zk(8(<*R9hA!8)UbVnTub~pGWx=lgDhGQV9Y8hqB;D$wQz;GzCI+D^b1V24`ah)hBF>drjuGcuGd-4j+t#<9DNH^Z;-{% z#<${^xGV0(#yX1y-{*%YU&} zTef0K)ja@Xl;$PzlKmaW-I5n~k_#O4@SDBM*TMR-Y)V-+%MFIiyYkiC$mST3Fr4@+ z-(R`PRRovh(i+X)SUMGYa>o2m`+>@1JK+kx^z7 z(V9$4j}fJbsT{>qULS3Zsb$U;EsZ%xNzqQ!tGAzkh!Vt9O3>ukeZ$<#Zy0HoNrzO`CLBO&CpT1A^`;n~3Ray3A}^GAE=nFeDBZiTVtF zSV^We!({J$9T?R{gQWm zgCldhpZVqU`QV95hoSnkzG=}P$~WvU_zx_EnwJP$vwNIbY-pM|bM4H0L+eaK>*VXR z4biE{Ohd;wyBG@J*m$$;dfUy8>m8HY?Xg>9(~;Tc&gsMlM+>174+#_4oj>^^B&~_# z*N)EzTW5l;lj>C0yT|SvyZcNbxa(^cG^pA`9)|kLNowly!Is}ArdobCIN3M7CSUi% zIq$xIFV&LJv&`3SQ~R5*?8F~gA~twAZy;Z{t>E3h==J~N$U`2Qmem8`iZ>%&LGp)& zE|&WX&tdI4&;czU?%dMl$8l4qg2a>*1JO1_-WLkZ+KROkW6JV!w@ry zFal5sF{;Umn4tVX-|jw)wbULQ$nZF>ZRLqT^Q z&19t0V{*jcP#2&XG`pit1oYt%&EQo{Ne&x)Dl;l83MN2+anw}MIm%{iaKww48?_Zf z%Z<|m=!O0ze5c`Lu*yhUuPKKU+-`#Ysshy;Dz`SE1ltYZ z!Bz%-LQ!XU*E6)%!_~cP!8Z zLb0BZK0pef9&CB^tPQNXvaIArvH-FNWN%S^n>R;>%`R}jK?eh?yJ6=2R?++PTE5Gi|vwq>SmIRF?pBx;EPKu3x>v2NW?kwMUFESPFnqkF!@ zd;`!1`(9MEWm`;8TA*dwM<%x2P)@_|N!ULB$K211c>!0f{9g{V3pR<(+7h<&wzJK!ZcUn%|Eq6& zbK6qwAE$xmrO#K_%z|Yd0v9)Y6~2sQgO>l9(^h4_Sr!6|f7@mYRy@cgAD_03(R z2_Z>O`~t-4X#i6#BVec&aWf6nlj0@{%1)RV%?wH%N*K3F5G#Beop*2Dx>e9!G--h7 z*6XolRq57i;iH3?N=l^xV|we!lq{+;d;vf(7lq2F6us#zVtOsI+cJfYMSaQ^9K8dp ztxy>(&;y!!ib-fKKEP84rPazhlyKpd4Nw{Gm&MeGOj)uLfd-Q#+Y8)SO>)Y&LX!K6G*>baFQI!hGnJnb0eh+9%aXd1}wJ_OQ+ zRmp{|b1lFu`yCdotXLkZ%!HxQ$7I0NKPOfh-i z=7Mv}N?z1F+t~RnUNq-z{h}!{`Pl7_TOHG4p=lQeb{BJv8)wFMKz!!{IX3rrPMqYg z59E3$e=;rJbG$b=-Is4TIOjivao@!}@cOTIU+$jsuBE)EJLipmJU*0> zMp7tyz9NYKDxBt$A_1M03UtS>gU%LO<+g(u#0d(i7=e@kVF)h z8COj?i4`5>d6Y1~=)lX)y~S7Ar}RJ@(mNFa1-!s8pA*+-r13Md0eGM@^al4e*T*pH KmI&6=#D4%GEduob literal 0 HcmV?d00001 diff --git a/SearchEngine/__pycache__/stack.cpython-312.pyc b/SearchEngine/__pycache__/stack.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75f8b5c26e56fc7c166b72f7806d5a75cf68e362 GIT binary patch literal 1832 zcmah}O>7%Q6rR~1XX7SLno#8jh_*^-vj-vsK}1%?fuu!6$f+8NP%x}q&ouGI-d$$a zp8*&avy}+BW%cJpio>3XXdLs@G7SC z%u>ZK(+YKiLQwKtv&u{Eidio4z?^>b@})&xD2Zu>7J+7g%YX<`*bm3p_u+=u_8R z@~W&z^SMj&&tGAlLyG~=FVO-ku3mIkN-oWF0b|CmMTTvc+)`lMVMcS7!Batf%lW*h(Df?I&PSHwVp~(>rQ2GxgATE(YDI33u@ zhpwoPa3GLKIZjF)%L;eV;sFhU7L8>fCgw)UMP1H!$hIO-h>y=9|V`b z1PW+5HL#uC%6@v`V0h#pGqmk&Ios}*yPuiZR1<=Rcl2iF+ymqM!NAaF`Z!Ahv22Y6 zF#eY%g+SMy%oD+5utVI_|9}9T4#;CYt@)I$b!rXv2*mPdK>?_dryJqz@QeCp&@^88 z_Nnj2cgMefZFd5k document -> frequency.""" def __init__(self): + # Dictionary to store index + # Example: { "word": {"doc1": 2, "doc2": 1} } self.index = {} def add_doc(self, doc, text): - text = text.lower().translate(str.maketrans('', '', string.punctuation)) - words = [w.strip() for w in text.split() if w.strip()] - for w in words: - if w not in self.index: - self.index[w] = {} - self.index[w][doc] = self.index[w].get(doc, 0) + 1 + """Add a document to the index.""" + # Convert text to lowercase and remove punctuation + clean_text = text.lower().translate(str.maketrans('', '', string.punctuation)) + + # Split text into words + words = clean_text.split() + + for word in words: + if word not in self.index: + self.index[word] = {} + # Count how many times a word appears in a document + self.index[word][doc] = self.index[word].get(doc, 0) + 1 + + def _clean_query(self, query): + """Helper function to clean search queries.""" + query = query.lower().translate(str.maketrans('', '', string.punctuation)) + return query.split() def search(self, query): + """Search for documents that contain all words in the query.""" q_words = self._clean_query(query) if not q_words: return [] + # Start with documents containing the first word if q_words[0] not in self.index: return [] results = set(self.index[q_words[0]].keys()) - for w in q_words[1:]: - if w not in self.index: + # Keep only docs that contain all other words + for word in q_words[1:]: + if word not in self.index: return [] - results &= set(self.index[w].keys()) + results = results & set(self.index[word].keys()) + # Rank results by score (sum of word frequencies) ranked = [] for doc in results: - score = sum(self.index[w].get(doc, 0) for w in q_words) + score = sum(self.index[word].get(doc, 0) for word in q_words) ranked.append({"doc": doc, "score": score}) return sorted(ranked, key=lambda x: x["score"], reverse=True) - - def _clean_query(self, query): - query = query.lower().translate(str.maketrans('', '', string.punctuation)) - return [w.strip() for w in query.split() if w.strip()] diff --git a/SearchEngine/search.py b/SearchEngine/search.py index e52fe29..981c6d8 100644 --- a/SearchEngine/search.py +++ b/SearchEngine/search.py @@ -3,87 +3,99 @@ from stack import Stack from index import InvertedIndex +# Path to the folder that has all text files +PATH = "./documents" # First we go to base dir + class SearchSim: - """Search engine simulation using Inverted Index + Stack (for history).""" + """A simple search engine simulation.""" - def __init__(self, path="./"): - self.index = InvertedIndex() - self.history = Stack() - self.path = path - self.results = [] - self._load() + def __init__(self, path=PATH): + self.index = InvertedIndex() # for word search + self.history = Stack() # to store search history + self.path = path # folder path + self.results = [] # last search results + self._load() # load documents into index def _load(self): - print("Building index...") + """Load all text files and build the index.""" + print("Loading index...") try: files = [f for f in os.listdir(self.path) if f.endswith(".txt")] if not files: - print("No .txt files in documents/") + print("No .txt files found in documents folder.") for f in files: - doc = os.path.splitext(f)[0] + doc = os.path.splitext(f)[0] # filename without .txt with open(os.path.join(self.path, f), "r", encoding="utf-8") as file: self.index.add_doc(doc, file.read()) - print(f"Index built with {len(files)} docs.") + print(f"Index built with {len(files)} documents.") except FileNotFoundError: - print("documents/ folder not found.") + print("Documents folder not found!") exit() def run(self): + """Main loop for user interaction.""" while True: - user_input = input("\nEnter search query, 'back', 'show', or 'quit': ").strip() - if user_input.lower() == "quit": + user_input = input("\nEnter search query, 'back', 'show', or 'quit': ").strip().lower() + + if user_input == "quit": + print("Goodbye!") break - elif user_input.lower() == "back": + elif user_input == "back": self._back() - elif user_input.lower() == "show": + elif user_input == "show": self.history.show() - else: + elif user_input: # if user typed something self._search(user_input) - def _search(self, q): - self.history.push(q) - print(f"\nSearching: '{q}'") - self.results = self.index.search(q) + def _search(self, query): + """Handle search queries.""" + self.history.push(query) + print(f"\nSearching for: '{query}'") + self.results = self.index.search(query) if not self.results: - print("No matches.") + print("No matches found.") return - print(f"Found {len(self.results)} docs:") - for i, r in enumerate(self.results, 1): + print(f"Found {len(self.results)} document(s):") + for i, r in enumerate(self.results, start=1): print(f"{i}. {r['doc']}.txt | Score: {r['score']}") self._open_doc() def _open_doc(self): + """Open and show the contents of a selected document.""" while True: - sel = input("\nEnter doc number to open, or 'next': ").strip().lower() - if sel == "next": + choice = input("\nEnter document number to open, or 'next': ").strip().lower() + if choice == "next": break try: - i = int(sel) - if 1 <= i <= len(self.results): - doc = self.results[i - 1]["doc"] + num = int(choice) + if 1 <= num <= len(self.results): + doc = self.results[num - 1]["doc"] with open(os.path.join(self.path, f"{doc}.txt"), "r", encoding="utf-8") as f: print(f"\n--- {doc}.txt ---\n{f.read()}\n------------------") else: print("Invalid number.") except ValueError: - print("Enter a number or 'next'.") + print("Please enter a valid number or 'next'.") except FileNotFoundError: print("File not found.") def _back(self): + """Go back to the previous search query.""" if len(self.history.items) <= 1: - print("No previous search.") + print("No previous search available.") return - self.history.pop() - prev = self.history.peek() - print(f"\nBack to: '{prev}'") - self.results = self.index.search(prev) + + self.history.pop() # remove current search + prev_query = self.history.peek() # last one left + print(f"\nBack to: '{prev_query}'") + + self.results = self.index.search(prev_query) if not self.results: - print("No matches.") + print("No matches found.") else: - for i, r in enumerate(self.results, 1): + for i, r in enumerate(self.results, start=1): print(f"{i}. {r['doc']}.txt | Score: {r['score']}") self._open_doc() diff --git a/SearchEngine/stack.py b/SearchEngine/stack.py index 4ddb398..4ea08c1 100644 --- a/SearchEngine/stack.py +++ b/SearchEngine/stack.py @@ -1,4 +1,4 @@ -# stack.py +# stack.py class Stack: """Custom Stack implementation using list (LIFO).""" @@ -24,4 +24,4 @@ def peek(self): return None if self.empty() else self.items[-1] def show(self): - print("Current Stack (top → bottom):", list(reversed(self.items)) if self.items else "Empty") + print("Current Stack (top TO bottom):", list(reversed(self.items)) if self.items else "Empty")