From 65200a85c6b869c52d2ab3f101908ebe0a8d5fa5 Mon Sep 17 00:00:00 2001
From: Danielle Pinto <danielle.peterson101@gmail.com>
Date: Mon, 16 Feb 2026 22:29:08 -0500
Subject: [PATCH 1/3] first commit

---
 docs/src/rosalind/09-subs.md | 118 +++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 docs/src/rosalind/09-subs.md

diff --git a/docs/src/rosalind/09-subs.md b/docs/src/rosalind/09-subs.md
new file mode 100644
index 0000000..3aef1ba
--- /dev/null
+++ b/docs/src/rosalind/09-subs.md
@@ -0,0 +1,118 @@
+# Finding a Motif in DNA
+
+🤔 [Problem link](https://rosalind.info/problems/subs/)
+
+!!! warning "The Problem"
+
+    Given two strings s and t, 
+    t is a substring of s if t is contained as a contiguous collection of symbols in s 
+    (as a result, t must be no longer than s).
+
+    The position of a symbol in a string is the total number of symbols found to its left, including itself.    
+    (e.g., the positions of all occurrences of 'U' in "AUGCUUCAGAAAGGUCUUACG" are 2, 5, 6, 15, 17, and 18).   
+    The symbol at position i of s is denoted by s[i].
+
+    A substring of s can be represented as s[j:k],   
+    where j and k represent the starting and ending positions of the substring in s;    
+    for example, if s= "AUGCUUCAGAAAGGUCUUACG",   
+    then s[2:5]= "UGCU".
+
+    The location of a substring s[j:k]is its beginning position j;   
+    note that t will have multiple locations in s
+    if it occurs more than once as a substring of s.
+    (see the Sample below).
+
+    Given: 
+    Two DNA strings s and t.  
+    (each of length at most 1 kbp).
+
+    Return: 
+    All locations of t as a substring of s.  
+
+    Sample Dataset
+    `GATATATGCATATACTTATAT`
+    
+    Sample Output
+    `2 4 10`
+
+### Handwritten solution
+The clunkiest solution uses a for-loop.   
+We can loop over every character within the input string and  
+check if we can find the substring in the subsequent characters.  
+
+
+```julia
+
+dataset = "GATATATGCATATACTTATAT"
+search_string = "ATAT"
+
+function haystack(substring, string)
+    # check if the strings are empty
+    if isempty(substring) || isempty(string)
+        throw(ErrorException("empty sequences"))
+    end
+
+    # check that string exists in data
+    if ! occursin(substring, string)
+        return []
+    end
+
+    output = []
+
+    for i in eachindex(string)
+        # check if first letter of string matches character at the index
+        if string[i] == substring[1]
+            # check if full 
+            if i + length(substring) - 1 <= length(string) && string[i:i+length(substring)-1] == substring
+                push!(output, i)
+            end
+        end
+    end
+    return output
+end
+
+haystack(search_string, dataset)
+```
+We can also use the [`findnext`](https://docs.julialang.org/en/v1/base/strings/#Base.findnext) function in Julia so that we don't have to loop through every character in the string. 
+
+```julia
+function haystack_findnext(substring, string)
+    # check if the strings are empty
+    if isempty(substring) || isempty(string)
+        throw(ErrorException("empty sequences"))
+    end
+
+    # check that string exists in data
+    if ! occursin(substring, string)
+        return []
+    end
+
+    output = []
+    i = 1
+    # while index is less than the length of string
+    while i < length(string)
+        result = findnext(substring, string, i)
+        if result == nothing
+            break
+        end
+
+        if result != nothing
+            push!(output, first(result))
+            i = first(result) + 1
+        end
+    end
+    return output
+end
+    
+
+haystack_findnext(search_string, dataset)
+```
+
+### Biojulia solution
+
+Lastly, we can leverage some functions in the Kmers Biojulia package to help us!
+
+```julia
+
+
+```
\ No newline at end of file

From 52cc16c88d2949714015c320c81ca18c1ae42913 Mon Sep 17 00:00:00 2001
From: Danielle Pinto <danielle.peterson101@gmail.com>
Date: Tue, 17 Feb 2026 15:16:45 -0500
Subject: [PATCH 2/3] add regex solution

---
 docs/src/rosalind/09-subs.md | 44 +++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/docs/src/rosalind/09-subs.md b/docs/src/rosalind/09-subs.md
index 3aef1ba..d2d60b9 100644
--- a/docs/src/rosalind/09-subs.md
+++ b/docs/src/rosalind/09-subs.md
@@ -36,13 +36,14 @@
     `2 4 10`
 
 ### Handwritten solution
-The clunkiest solution uses a for-loop.   
+Let's start off with the most verbose solution.  
 We can loop over every character within the input string and  
 check if we can find the substring in the subsequent characters.  
 
+In the first solution,   
+we will check each index for an exact match to the substring we are searching for. 
 
 ```julia
-
 dataset = "GATATATGCATATACTTATAT"
 search_string = "ATAT"
 
@@ -58,11 +59,11 @@ function haystack(substring, string)
     end
 
     output = []
-
     for i in eachindex(string)
         # check if first letter of string matches character at the index
         if string[i] == substring[1]
-            # check if full 
+            # check if full substring matches at index
+            # make sure not to search index past string 
             if i + length(substring) - 1 <= length(string) && string[i:i+length(substring)-1] == substring
                 push!(output, i)
             end
@@ -73,7 +74,17 @@ end
 
 haystack(search_string, dataset)
 ```
-We can also use the [`findnext`](https://docs.julialang.org/en/v1/base/strings/#Base.findnext) function in Julia so that we don't have to loop through every character in the string. 
+We can also use the [`findnext`](https://docs.julialang.org/en/v1/base/strings/#Base.findnext) function in Julia.   
+
+There are similar `findfirst` and `findlast` functions,   
+but since we want to find all matches,  
+we will use `findnext`.
+
+Currently, there isn't a `findall` function that allows us to avoid a loop.  
+We'll still also loop over every character in the string,   
+as there could be overlapping substrings.
+
+
 
 ```julia
 function haystack_findnext(substring, string)
@@ -105,6 +116,25 @@ function haystack_findnext(substring, string)
 end
     
 
+haystack_findnext(search_string, dataset)
+```
+
+Lastly, we can also use Regex's search function,
+which produces quite the elegant solution!
+
+
+```julia
+function haystack_regex(substring, string)
+    if isempty(substring) || isempty(string)
+        throw(ErrorException("emptysequences"))                            
+    end                                                                                                                             
+    if !occursin(substring, string)            
+          return[]    
+    end    
+    
+    return [m.offset for m in eachmatch(Regex(substring), string, overlap=true) ] 
+end
+
 haystack_findnext(search_string, dataset)
 ```
 
@@ -115,4 +145,6 @@ Lastly, we can leverage some functions in the Kmers Biojulia package to help us!
 ```julia
 
 
-```
\ No newline at end of file
+```
+
+

From a0857a2dc75f75408ca141f8c2d8824e8a3da0cf Mon Sep 17 00:00:00 2001
From: Danielle Pinto <danielle.peterson101@gmail.com>
Date: Sat, 21 Feb 2026 22:21:49 -0500
Subject: [PATCH 3/3] add findall function

---
 docs/src/rosalind/09-subs.md | 52 ++++++++++++------------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/docs/src/rosalind/09-subs.md b/docs/src/rosalind/09-subs.md
index d2d60b9..562edf7 100644
--- a/docs/src/rosalind/09-subs.md
+++ b/docs/src/rosalind/09-subs.md
@@ -40,7 +40,7 @@ Let's start off with the most verbose solution.
 We can loop over every character within the input string and  
 check if we can find the substring in the subsequent characters.  
 
-In the first solution,   
+In other words,   
 we will check each index for an exact match to the substring we are searching for. 
 
 ```julia
@@ -74,20 +74,20 @@ end
 
 haystack(search_string, dataset)
 ```
-We can also use the [`findnext`](https://docs.julialang.org/en/v1/base/strings/#Base.findnext) function in Julia.   
 
-There are similar `findfirst` and `findlast` functions,   
-but since we want to find all matches,  
-we will use `findnext`.
+### Biojulia solution
+
+The BioSequences package has a helpful function [`findall`](https://github.com/BioJulia/BioSequences.jl/blob/b626dbcaad76217b248449e6aa2cc1650e95660c/src/BioSequences.jl#L261-L316), 
+which returns the indices of all exact string matches.   
 
-Currently, there isn't a `findall` function that allows us to avoid a loop.  
-We'll still also loop over every character in the string,   
-as there could be overlapping substrings.
+It isn't included in the documentation about exact string search [here](https://biojulia.dev/BioSequences.jl/v2.0/sequence_search/#Exact-search-1),   
+but the function exists!  
 
+BioSequences has other helpful exact string search functions like `findfirst`, `firstnext`, and `findlast`.   
 
 
 ```julia
-function haystack_findnext(substring, string)
+function haystack_findall(substring, string)
     # check if the strings are empty
     if isempty(substring) || isempty(string)
         throw(ErrorException("empty sequences"))
@@ -98,29 +98,19 @@ function haystack_findnext(substring, string)
         return []
     end
 
-    output = []
-    i = 1
-    # while index is less than the length of string
-    while i < length(string)
-        result = findnext(substring, string, i)
-        if result == nothing
-            break
-        end
-
-        if result != nothing
-            push!(output, first(result))
-            i = first(result) + 1
-        end
-    end
-    return output
+    matches = findall(ExactSearchQuery(dna"$substring"),dna"$string")
+    return first.(matches)
 end
     
 
-haystack_findnext(search_string, dataset)
+haystack_findall(search_string, dataset)
 ```
+### Regex solution
 
-Lastly, we can also use Regex's search function,
-which produces quite the elegant solution!
+Lastly, we can also use Regex's search function.   
+Here the "pattern" we are searching for is the exact string.   
+This is the a great solution if we wanted to look for patterns of more complicated strings,   
+but it works for exact matches as well!
 
 
 ```julia
@@ -138,13 +128,5 @@ end
 haystack_findnext(search_string, dataset)
 ```
 
-### Biojulia solution
-
-Lastly, we can leverage some functions in the Kmers Biojulia package to help us!
-
-```julia
-
-
-```