CountESS-Project
diff --git a/‎docs/benchmark.rst‎
Lines changed: 15 additions & 8 deletions b/‎docs/benchmark.rst‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/notebooks/benchmarks_bz2.ipynb‎
Lines changed: 4 additions & 18 deletions b/‎docs/notebooks/benchmarks_bz2.ipynb‎
Lines changed: 4 additions & 18 deletions
diff --git a/‎docs/notebooks/benchmarks_gz.ipynb‎
Lines changed: 6 additions & 20 deletions b/‎docs/notebooks/benchmarks_gz.ipynb‎
Lines changed: 6 additions & 20 deletions
diff --git a/‎docs/notebooks/benchmarks_raw.ipynb‎
Lines changed: 6 additions & 20 deletions b/‎docs/notebooks/benchmarks_raw.ipynb‎
Lines changed: 6 additions & 20 deletions
diff --git a/‎docs/notebooks/exported/benchmarks_bz2.rst‎
Lines changed: 6 additions & 8 deletions b/‎docs/notebooks/exported/benchmarks_bz2.rst‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎docs/notebooks/exported/benchmarks_gz.rst‎
Lines changed: 36 additions & 22 deletions b/‎docs/notebooks/exported/benchmarks_gz.rst‎
Lines changed: 36 additions & 22 deletions
@@ -1,18 +1,25 @@
 Performance comparison
 *****************************
 
-This page contains some performance and usage comparisons for processing FASTQ_ files with fqfa and `pyfastx <https://github.com/lmdu/pyfastx>`_.
+This page contains some performance and usage comparisons for processing FASTQ_ files with
+fqfa and `pyfastx <https://github.com/lmdu/pyfastx>`_.
 
 In these benchmarks, fqfa is comparable to `pyfastx <https://github.com/lmdu/pyfastx>`_,
-although `pyfastx <https://github.com/lmdu/pyfastx>`_ run in non-indexed mode is fastest.
+although `pyfastx <https://github.com/lmdu/pyfastx>`_ has made substantial performance
+improvements since fqfa was written, particularly when reading gzip-compressed input files.
 
 The results are derived from `Jupyter notebooks <https://jupyter.org/>`_.
-If you'd like to run this code yourself, the notebooks are available with the fqfa documentation in ``fqfa/docs/notebooks``.
-The file used in the benchmark is from the `Enrich2 example dataset <https://github.com/FowlerLab/Enrich2-Example>`_.
-To run the benchmarks as written, you will have to decompress the bz2 file and also create a gzipped version.
-
-This section includes examples of usage that are common in my work, primarily in processing files of barcode reads for high-throughput functional genomic assays.
-`pyfastx <https://github.com/lmdu/pyfastx>`_ includes many other functions that are not demonstrated here.
+If you'd like to run this code yourself, the notebooks are available with the fqfa
+documentation in ``fqfa/docs/notebooks``.
+The file used in the benchmark is from the
+`Enrich2 example dataset <https://github.com/FowlerLab/Enrich2-Example>`_.
+To run the benchmarks as written, you will have to decompress the bz2 file and also
+create a gzipped version.
+
+This section includes examples of usage that are common in my work, primarily in
+processing files of barcode reads for high-throughput functional genomic assays.
+`pyfastx <https://github.com/lmdu/pyfastx>`_ includes many other functions that are not
+demonstrated here.
 
 Benchmarking for raw FASTQ files
 #####################################
 
@@ -23,7 +23,7 @@
 author = "Alan F Rubin"
 
 # The full version, including alpha/beta/rc tags
-release = "1.0.0"
+release = "1.1.0"
 
 
 # -- General configuration ---------------------------------------------------
 
@@ -114,20 +114,6 @@
     "print(f\"Kept {len(filt_reads)} reads after applying filter.\")\n",
     "del filt_reads"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -146,18 +132,18 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5rc1"
+   "version": "3.8.2"
   },
   "pycharm": {
    "stem_cell": {
     "cell_type": "raw",
-    "source": [],
     "metadata": {
      "collapsed": false
-    }
+    },
+    "source": []
    }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
@@ -33,10 +33,10 @@
     "\n",
     "```\n",
     "334M    BRCA1_input_sample.fq\n",
-    "48M     BRCA1_input_sample.fq.bz2\n",
-    "520M    BRCA1_input_sample.fq.fxi\n",
-    "68M     BRCA1_input_sample.fq.gz\n",
-    "522M    BRCA1_input_sample.fq.gz.fxi\n",
+    " 48M    BRCA1_input_sample.fq.bz2\n",
+    "511M    BRCA1_input_sample.fq.fxi\n",
+    " 68M    BRCA1_input_sample.fq.gz\n",
+    "513M    BRCA1_input_sample.fq.gz.fxi\n",
     "```"
    ]
   },
@@ -184,7 +184,7 @@
     "# Benchmark 3: filtering reads on quality\n",
     "\n",
     "This code creates a list of reads for which all bases are at least Q20.\n",
-    "The performance and usage in this section is quite similar to Benchmark 2."
+    "The performance and usage in this section is quite a bit faster than Benchmark 2 following recent performance improvements in pyfastx."
    ]
   },
   {
@@ -245,20 +245,6 @@
     "print(f\"Kept {len(filt_reads)} reads after applying filter.\")\n",
     "del filt_reads"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -277,7 +263,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5rc1"
+   "version": "3.8.2"
   },
   "pycharm": {
    "stem_cell": {
 
@@ -32,10 +32,10 @@
     "\n",
     "```\n",
     "334M    BRCA1_input_sample.fq\n",
-    "48M     BRCA1_input_sample.fq.bz2\n",
-    "520M    BRCA1_input_sample.fq.fxi\n",
-    "68M     BRCA1_input_sample.fq.gz\n",
-    "522M    BRCA1_input_sample.fq.gz.fxi\n",
+    " 48M    BRCA1_input_sample.fq.bz2\n",
+    "511M    BRCA1_input_sample.fq.fxi\n",
+    " 68M    BRCA1_input_sample.fq.gz\n",
+    "513M    BRCA1_input_sample.fq.gz.fxi\n",
     "```"
    ]
   },
@@ -181,7 +181,7 @@
     "# Benchmark 3: filtering reads on quality\n",
     "\n",
     "This code creates a list of reads for which all bases are at least Q20.\n",
-    "The performance and usage in this section is quite similar to Benchmark 2."
+    "The performance and usage in this section is quite a bit faster than Benchmark 2 following recent performance improvements in pyfastx."
    ]
   },
   {
@@ -240,20 +240,6 @@
     "print(f\"Kept {len(filt_reads)} reads after applying filter.\")\n",
     "del filt_reads"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -272,7 +258,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.5rc1"
+   "version": "3.8.2"
   }
  },
  "nbformat": 4,
 
@@ -31,8 +31,8 @@ statement.
 
 .. parsed-literal::
 
-    CPU times: user 51.3 s, sys: 993 ms, total: 52.3 s
-    Wall time: 52.3 s
+    CPU times: user 42.2 s, sys: 1.05 s, total: 43.3 s
+    Wall time: 43.4 s
     @140313_SN743_0432_AC3TTHACXX:4:1101:5633:2224:1#0/1
     CCCGTGGCCTTTTCCA
     +
@@ -81,8 +81,8 @@ FastqRead class.
 
 .. parsed-literal::
 
-    CPU times: user 1min 59s, sys: 174 ms, total: 1min 59s
-    Wall time: 1min 59s
+    CPU times: user 1min 35s, sys: 277 ms, total: 1min 35s
+    Wall time: 1min 35s
     Median average quality is 37.5
 
 
@@ -109,9 +109,7 @@ class.
 
 .. parsed-literal::
 
-    CPU times: user 58.8 s, sys: 848 ms, total: 59.7 s
-    Wall time: 59.7 s
+    CPU times: user 43 s, sys: 784 ms, total: 43.8 s
+    Wall time: 43.8 s
     Kept 3641762 reads after applying filter.
 
-
-
@@ -22,10 +22,10 @@ than the reads in this case:
 ::
 
    334M    BRCA1_input_sample.fq
-   48M     BRCA1_input_sample.fq.bz2
-   520M    BRCA1_input_sample.fq.fxi
-   68M     BRCA1_input_sample.fq.gz
-   522M    BRCA1_input_sample.fq.gz.fxi
+    48M    BRCA1_input_sample.fq.bz2
+   511M    BRCA1_input_sample.fq.fxi
+    68M    BRCA1_input_sample.fq.gz
+   513M    BRCA1_input_sample.fq.gz.fxi
 
 .. code:: ipython3
 
@@ -37,8 +37,8 @@ than the reads in this case:
 
 .. parsed-literal::
 
-    CPU times: user 32.1 s, sys: 18.6 s, total: 50.7 s
-    Wall time: 50.8 s
+    CPU times: user 9.1 s, sys: 1.05 s, total: 10.1 s
+    Wall time: 10.2 s
     <Read> 140313_SN743_0432_AC3TTHACXX:4:1101:5633:2224:1#0/1 with length of 16
     <Read> 140313_SN743_0432_AC3TTHACXX:4:1101:6580:2239:1#0/1 with length of 16
     <Read> 140313_SN743_0432_AC3TTHACXX:4:1101:6929:2242:1#0/1 with length of 16
@@ -62,8 +62,8 @@ doesn’t perform any extra computation or quality value conversion.
 
 .. parsed-literal::
 
-    CPU times: user 3.34 s, sys: 452 ms, total: 3.79 s
-    Wall time: 3.79 s
+    CPU times: user 2.59 s, sys: 312 ms, total: 2.9 s
+    Wall time: 2.9 s
     ('140313_SN743_0432_AC3TTHACXX:4:1101:5633:2224:1#0/1', 'CCCGTGGCCTTTTCCA', 'B@CFFFFFHHHHHJJJ')
     ('140313_SN743_0432_AC3TTHACXX:4:1101:6580:2239:1#0/1', 'TTTGGTAAAGGGTAAC', 'BBCFFDFFHHHHDHIJ')
     ('140313_SN743_0432_AC3TTHACXX:4:1101:6929:2242:1#0/1', 'AATAATGTATGTACCT', 'BC@FFFFEFHHHHJJJ')
@@ -89,8 +89,8 @@ statement.
 
 .. parsed-literal::
 
-    CPU times: user 39.7 s, sys: 757 ms, total: 40.5 s
-    Wall time: 40.5 s
+    CPU times: user 30.8 s, sys: 881 ms, total: 31.6 s
+    Wall time: 31.6 s
     @140313_SN743_0432_AC3TTHACXX:4:1101:5633:2224:1#0/1
     CCCGTGGCCTTTTCCA
     +
@@ -138,6 +138,14 @@ information is not provided.
     print(f"Median average quality is {median(read_quals)}")
     del read_quals
 
+
+.. parsed-literal::
+
+    CPU times: user 53.9 s, sys: 323 ms, total: 54.2 s
+    Wall time: 54.2 s
+    Median average quality is 37.5
+
+
 pyfastx without index
 ---------------------
 
@@ -154,8 +162,8 @@ processing the input file.
 
 .. parsed-literal::
 
-    CPU times: user 1min 12s, sys: 95.6 ms, total: 1min 12s
-    Wall time: 1min 12s
+    CPU times: user 55.9 s, sys: 15.4 ms, total: 55.9 s
+    Wall time: 56 s
     Median average quality is 37.5
 
 
@@ -175,17 +183,17 @@ FastqRead class.
 
 .. parsed-literal::
 
-    CPU times: user 1min 42s, sys: 119 ms, total: 1min 42s
-    Wall time: 1min 42s
+    CPU times: user 1min 23s, sys: 55.6 ms, total: 1min 23s
+    Wall time: 1min 23s
     Median average quality is 37.5
 
 
 Benchmark 3: filtering reads on quality
 =======================================
 
 This code creates a list of reads for which all bases are at least Q20.
-The performance and usage in this section is quite similar to Benchmark
-2.
+The performance and usage in this section is quite a bit faster than
+Benchmark 2 following recent performance improvements in pyfastx.
 
 pyfastx with index
 ------------------
@@ -199,6 +207,14 @@ information is not provided.
     print(f"Kept {len(filt_reads)} reads after applying filter.")
     del filt_reads
 
+
+.. parsed-literal::
+
+    CPU times: user 6.17 s, sys: 360 ms, total: 6.53 s
+    Wall time: 6.53 s
+    Kept 3641707 reads after applying filter.
+
+
 pyfastx without index
 ---------------------
 
@@ -211,8 +227,8 @@ pyfastx without index
 
 .. parsed-literal::
 
-    CPU times: user 9.29 s, sys: 356 ms, total: 9.65 s
-    Wall time: 9.65 s
+    CPU times: user 7.24 s, sys: 620 ms, total: 7.86 s
+    Wall time: 7.87 s
     Kept 3641762 reads after applying filter.
 
 
@@ -232,9 +248,7 @@ class.
 
 .. parsed-literal::
 
-    CPU times: user 39.9 s, sys: 884 ms, total: 40.8 s
-    Wall time: 40.8 s
+    CPU times: user 31.2 s, sys: 660 ms, total: 31.9 s
+    Wall time: 31.9 s
     Kept 3641762 reads after applying filter.
 
-
-