Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ Each partition takes into account a fraction of its neighboring partitions k-mer

Modimizer sketch size. Must be lower than window size `w`. A lower sketch size means less k-mers to compare (and faster runtime), at the expense of lower accuracy. Recommended to be kept >= 1000.

`--forward <bool>`

Use forward k-mers only, instead of the default of canonical k-mers. Warning: this will give strand specific output.

`-r / --resolution <int>`

Dotplot resolution. This corresponds to the number of windows each input sequence is partitioned into. Default is 1000. Overrides the `--window` parameter.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ModDotPlot"
version = "0.9.8"
version = "0.9.9"
requires-python = ">= 3.7"
dependencies = [
"pysam",
Expand Down
2 changes: 1 addition & 1 deletion src/moddotplot/const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION = "0.9.8"
VERSION = "0.9.9"
COLS = [
"#query_name",
"query_start",
Expand Down
17 changes: 16 additions & 1 deletion src/moddotplot/moddotplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,18 @@ def get_parser():
type=str,
)

static_parser.add_argument(
"--forward",
action="store_true",
help="Enforce forward only k-mers instead of canonical k-mers. Warning: only use if you want strand-specific output!",
)

static_parser.add_argument(
"--plot-direction",
action="store_true",
help="Create a plot containing the direction of each k-mer array (relative to the first array). Arrays with inversions will be highlighted in blue (forward) and pink (reverse).",
)

static_parser.add_argument(
"--colors",
default=None,
Expand Down Expand Up @@ -635,7 +647,10 @@ def main():
# -----------LOAD SEQUENCES INTO MEMORY-----------
kmer_list = []
for i in fasta_list:
kmer_list.append(readKmersFromFile(i, args.kmer, False))
if args.forward:
kmer_list.append(readKmersFromFile(i, args.kmer, False, True))
else:
kmer_list.append(readKmersFromFile(i, args.kmer, False, False))
k_list = [item for sublist in kmer_list for item in sublist]
# Throw error if compare only selected with one sequence.
if len(k_list) < 2 and args.compare_only:
Expand Down
23 changes: 15 additions & 8 deletions src/moddotplot/parse_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def extractRegion(seq_name):
return None


def generateKmersFromFasta(seq: Sequence[str], k: int, quiet: bool) -> Iterable[int]:
def generateKmersFromFasta(
seq: Sequence[str], k: int, quiet: bool, fw_only: bool
) -> Iterable[int]:
n = len(seq)
if not quiet:
progress_thresholds = round(n / 77)
Expand All @@ -60,11 +62,12 @@ def generateKmersFromFasta(seq: Sequence[str], k: int, quiet: bool) -> Iterable[
# Remove case sensitivity
kmer = seq[i : i + k].upper()
fh = mmh3.hash(kmer)

# Calculate reverse complement hash directly without the need for translation
rc = mmh3.hash(kmer[::-1].translate(tab_b))

yield fh if fh < rc else rc
if fw_only:
yield fh
else:
# Calculate reverse complement hash directly without the need for translation
rc = mmh3.hash(kmer[::-1].translate(tab_b))
yield fh if fh < rc else rc


def isValidFasta(file_path):
Expand Down Expand Up @@ -151,7 +154,9 @@ def printProgressBar(
print()


def readKmersFromFile(filename: str, ksize: int, quiet: bool) -> List[List[int]]:
def readKmersFromFile(
filename: str, ksize: int, quiet: bool, fw_only: bool
) -> List[List[int]]:
"""
Given a filename and an integer k, returns a list of all k-mers found in the sequences in the file.
"""
Expand All @@ -161,7 +166,9 @@ def readKmersFromFile(filename: str, ksize: int, quiet: bool) -> List[List[int]]
for seq_id in seq.references:
print(f"Retrieving k-mers from {seq_id}.... \n")
kmers_for_seq = []
for kmer_hash in generateKmersFromFasta(seq.fetch(seq_id), ksize, quiet):
for kmer_hash in generateKmersFromFasta(
seq.fetch(seq_id), ksize, quiet, fw_only
):
kmers_for_seq.append(kmer_hash)
all_kmers.append(kmers_for_seq)
print(f"\n{seq_id} k-mers retrieved! \n")
Expand Down