diff --git a/src/FalcoConfig.cpp b/src/FalcoConfig.cpp index 1d855e3..6b13ab6 100644 --- a/src/FalcoConfig.cpp +++ b/src/FalcoConfig.cpp @@ -300,6 +300,7 @@ FalcoConfig::FalcoConfig(const int argc, char *argv[]) { is_bam = false; is_fastq = false; is_fastq_gz = false; + is_stdin = false; std::ostringstream ost; for (int i = 0; i < argc; ++i) { @@ -356,8 +357,18 @@ FalcoConfig::setup() { // Now check for the file format (FASTQ/SAM/BAM, compressed or not) define_file_format(); - // Get filename without absolute path - filename_stripped = std::filesystem::path(filename).filename().string(); + // If filename indicates stdin with prefix like "stdin:prefix", + // set is_stdin and derive filename_stripped from the prefix. + if (filename.rfind("stdin:", 0) == 0) { + is_stdin = true; + // strip the "stdin:" prefix and use remainder as stripped filename + const std::string prefix = filename.substr(6); + filename_stripped = prefix.empty() ? std::string{"stdin"} : prefix; + } + else { + // Get filename without absolute path + filename_stripped = std::filesystem::path(filename).filename().string(); + } // read which modules to run and the cutoffs for pass/warn/fail read_limits(); @@ -387,6 +398,12 @@ FalcoConfig::define_file_format() { if (format.empty()) { auto filename_lower(filename); to_lower(filename_lower); + // If input is from stdin (stdin:), assume plain FASTQ unless + // the user explicitly forced a format. + if (filename.rfind("stdin:", 0) == 0) { + is_fastq = true; + return; + } if (endswith(filename, ".sam") || endswith(filename, ".sam_mapped")) { is_sam = true; } diff --git a/src/FalcoConfig.hpp b/src/FalcoConfig.hpp index 092b48d..d8fb3d3 100644 --- a/src/FalcoConfig.hpp +++ b/src/FalcoConfig.hpp @@ -104,6 +104,7 @@ struct FalcoConfig { // IO bool is_sam, is_bam, is_fastq, is_fastq_gz; + bool is_stdin; // whether input should be read from standard input std::string filename; std::string filename_stripped; diff --git a/src/StreamReader.cpp b/src/StreamReader.cpp index fb1f769..a77b700 100644 --- a/src/StreamReader.cpp +++ b/src/StreamReader.cpp @@ -36,6 +36,11 @@ get_tile_split_position(FalcoConfig &config) { // Count colons to know the formatting pattern size_t num_colon = 0; + // If reading from stdin (stdin:), we cannot open the file to + // inspect read names; assume no tile information. + if (filename.rfind("stdin:", 0) == 0) + return 0; + if (config.is_sam) { std::ifstream sam_file(filename); if (!sam_file) @@ -639,6 +644,11 @@ StreamReader::check_bytes_read(const size_t read_num) { /*******************************************************/ char get_line_separator(const std::string &filename) { + // If filename indicates stdin (stdin:) assume standard newline + if (filename.rfind("stdin:", 0) == 0) { + return '\n'; + } + FILE *fp = fopen(filename.c_str(), "r"); if (fp == NULL) throw std::runtime_error("bad input file: " + filename); @@ -663,6 +673,11 @@ FastqReader::FastqReader(FalcoConfig &_config, const size_t _buffer_size) : size_t get_file_size(const std::string &filename) { + // For stdin-mode inputs (stdin:) we cannot determine size; return 1 + if (filename.rfind("stdin:", 0) == 0) { + return 1; + } + FILE *fp = fopen(filename.c_str(), "r"); if (fp == NULL) throw std::runtime_error("bad input file: " + filename); @@ -677,6 +692,12 @@ get_file_size(const std::string &filename) { // Load fastq with zlib size_t FastqReader::load() { + // If filename indicates stdin (stdin:) then use stdin as input + if (filename.rfind("stdin:", 0) == 0) { + fileobj = stdin; + return 1; // unknown size + } + fileobj = fopen(filename.c_str(), "r"); if (fileobj == NULL) throw std::runtime_error("Cannot open FASTQ file : " + filename); @@ -691,7 +712,9 @@ FastqReader::is_eof() { FastqReader::~FastqReader() { delete[] filebuf; - fclose(fileobj); + // Only close if it's not stdin + if (fileobj && fileobj != stdin) + fclose(fileobj); } // Parses fastq gz by reading line by line into the gzbuf @@ -746,6 +769,16 @@ GzFastqReader::GzFastqReader(FalcoConfig &_config, const size_t _buffer_size) : // Load fastq with zlib size_t GzFastqReader::load() { + // We do not support reading compressed streams from stdin via this API. + // Users should decompress upstream (e.g. with zcat) and pass contents + // with the std: convention. If filename indicates stdin, throw + // an informative error. + if (filename.rfind("stdin:", 0) == 0) { + throw std::runtime_error( + "Compressed stdin not supported: pipe decompressed data (e.g. zcat) " + "and use stdin: as filename"); + } + fileobj = gzopen(filename.c_str(), "r"); if (fileobj == Z_NULL) throw std::runtime_error("Cannot open gzip FASTQ file : " + filename); diff --git a/src/falco.cpp b/src/falco.cpp index 30702a8..0f64201 100644 --- a/src/falco.cpp +++ b/src/falco.cpp @@ -581,15 +581,28 @@ main(int argc, char *argv[]) { // ADS: make sure all input files are non-empty unless user oks it if (!allow_empty_input) { for (const auto &fn : leftover_args) { - std::error_code ec; - const bool empty_file = std::filesystem::is_empty(fn, ec); - if (ec) { - std::cerr << "Error reading file: " << fn << " (" << ec.message() - << ")\n"; - return EXIT_FAILURE; + // Skip emptiness checks for stdin-mode inputs (stdin:) + if (fn.rfind("stdin:", 0) == 0) + continue; + + try { + std::error_code ec; + const bool empty_file = std::filesystem::is_empty(fn, ec); + if (ec) { + std::cerr << "Error reading file: " << fn << " (" << ec.message() + << ")\n"; + return EXIT_FAILURE; + } + else if (empty_file) { + std::cerr << "Input file is empty: " << fn << '\n'; + return EXIT_FAILURE; + } } - else if (empty_file) { - std::cerr << "Input file is empty: " << fn << '\n'; + catch (const std::filesystem::filesystem_error &e) { + // If it's a stdin-style argument, skip; otherwise report + if (fn.rfind("stdin:", 0) == 0) + continue; + std::cerr << "Error reading file: " << fn << " (" << e.what() << ")\n"; return EXIT_FAILURE; } } @@ -632,9 +645,12 @@ main(int argc, char *argv[]) { // check if all filenames exist bool all_files_exist = true; for (std::size_t i = 0; i < std::size(all_seq_filenames); ++i) { - if (!std::filesystem::exists(all_seq_filenames[i])) { - std::cerr << "ERROR! File does not exist: " << all_seq_filenames[i] - << '\n'; + const auto &fn = all_seq_filenames[i]; + // allow stdin: to represent stdin; skip exists check for it + if (fn.rfind("stdin:", 0) == 0) + continue; + if (!std::filesystem::exists(fn)) { + std::cerr << "ERROR! File does not exist: " << fn << '\n'; all_files_exist = false; } }