{
  "_id": "6a1f0e8eb401979e7341d388",
  "Package": "textpress",
  "Type": "Package",
  "Title": "A Lightweight and Versatile NLP Toolkit",
  "Version": "1.1.1",
  "Authors@R": "c(person(\"Jason\", \"Timm\", role = c(\"aut\", \"cre\"), email = \"JaTimm@salud.unm.edu\"))",
  "Maintainer": "Jason Timm <JaTimm@salud.unm.edu>",
  "Description": "An R toolkit for building text corpora and searching them.\nNo custom object classes, just plain data frames from start to\nfinish. Covers the full arc from URL to retrieved passage\nthrough a consistent four-step API: Fetch, Read, Process,\nSearch. Traditional tools (KWIC, BM25, dictionary matching) sit\nalongside modern ones (semantic search, LLM-ready chunking),\nall compatible with the native R pipe ('|>').",
  "License": "MIT + file LICENSE",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.3",
  "URL": "https://github.com/jaytimm/textpress,\nhttps://jaytimm.github.io/textpress/",
  "BugReports": "https://github.com/jaytimm/textpress/issues",
  "Config/pak/sysreqs": "libicu-dev libxml2-dev libssl-dev",
  "Repository": "https://jaytimm.r-universe.dev",
  "Date/Publication": "2026-03-18 16:29:04 UTC",
  "RemoteUrl": "https://github.com/jaytimm/textpress",
  "RemoteRef": "HEAD",
  "RemoteSha": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-17 08:50:05 UTC",
    "User": "root"
  },
  "Author": "Jason Timm [aut, cre]",
  "MD5sum": "32390fc04ebe4b2889953f05f77f9691",
  "_user": "jaytimm",
  "_type": "src",
  "_file": "textpress_1.1.1.tar.gz",
  "_fileid": "cb3e6674235d42a5ee17e19c15d878fb4c00fae52cb289a9add1d900ede67edd",
  "_filesize": 172104,
  "_sha256": "cb3e6674235d42a5ee17e19c15d878fb4c00fae52cb289a9add1d900ede67edd",
  "_created": "2026-05-17T08:50:05.000Z",
  "_published": "2026-06-02T17:10:38.261Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79128644321,
      "time": 170,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "WARNING",
      "artifact": "7040981771"
    },
    {
      "job": 79128644135,
      "time": 164,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "WARNING",
      "artifact": "7040981278"
    },
    {
      "job": 79128644530,
      "time": 136,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "WARNING",
      "artifact": "7040975197"
    },
    {
      "job": 79128644194,
      "time": 133,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "WARNING",
      "artifact": "7040974320"
    },
    {
      "job": 79128643299,
      "time": 158,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7040959906"
    },
    {
      "job": 79128643394,
      "time": 108,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7364710516"
    },
    {
      "job": 79128644053,
      "time": 79,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "WARNING",
      "artifact": "7040970403"
    },
    {
      "job": 79128644412,
      "time": 73,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "WARNING",
      "artifact": "7040969518"
    },
    {
      "job": 79128644404,
      "time": 95,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "WARNING",
      "artifact": "7040972357"
    }
  ],
  "_buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/jaytimm/textpress",
  "_commit": {
    "id": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
    "author": "jaytimm <jason.timm@zohomail.com>",
    "committer": "jaytimm <jason.timm@zohomail.com>",
    "message": "vignette edits\n",
    "time": 1773851344
  },
  "_maintainer": {
    "name": "Jason Timm",
    "email": "jatimm@salud.unm.edu"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5",
      "role": "Depends"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "httr",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "role": "Imports"
    },
    {
      "package": "rvest",
      "role": "Imports"
    },
    {
      "package": "stringi",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "xml2",
      "role": "Imports"
    },
    {
      "package": "pbapply",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Imports"
    },
    {
      "package": "lubridate",
      "role": "Imports"
    },
    {
      "package": "SnowballC",
      "version": ">= 0.7.0",
      "role": "Suggests"
    },
    {
      "package": "DT",
      "role": "Suggests"
    },
    {
      "package": "dplyr",
      "role": "Suggests"
    }
  ],
  "_owner": "jaytimm",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-08",
      "n": 4
    },
    {
      "week": "2026-09",
      "n": 33
    },
    {
      "week": "2026-10",
      "n": 10
    },
    {
      "week": "2026-12",
      "n": 11
    }
  ],
  "_tags": [],
  "_topics": [
    "corpus-search",
    "nlp",
    "web-scraping"
  ],
  "_stars": 3,
  "_contributors": [
    {
      "user": "jaytimm",
      "count": 109,
      "uuid": 25044244
    }
  ],
  "_userbio": {
    "uuid": 25044244,
    "type": "user",
    "name": "Jason Timm"
  },
  "_downloads": {
    "count": 498,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/textpress"
  },
  "_devurl": "https://github.com/jaytimm/textpress",
  "_pkgdown": "https://jaytimm.github.io/textpress/",
  "_searchresults": 6,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/textpress.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/jaytimm/textpress",
  "_realowner": "jaytimm",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2024-10-14"
    },
    {
      "version": "1.1.0",
      "date": "2026-02-23"
    },
    {
      "version": "1.1.1",
      "date": "2026-03-17"
    }
  ],
  "_exports": [
    "abbreviations",
    "dict_generations",
    "dict_political",
    "fetch_urls",
    "fetch_wiki_refs",
    "fetch_wiki_urls",
    "nlp_cast_tokens",
    "nlp_index_tokens",
    "nlp_roll_chunks",
    "nlp_split_paragraphs",
    "nlp_split_sentences",
    "nlp_tokenize_text",
    "read_urls",
    "search_dict",
    "search_index",
    "search_regex",
    "search_vector",
    "util_fetch_embeddings"
  ],
  "_help": [
    {
      "page": "abbreviations",
      "title": "Common abbreviations for NLP",
      "topics": [
        "abbreviations"
      ]
    },
    {
      "page": "dict_generations",
      "title": "Demo dictionary of generation-name variants for NER",
      "topics": [
        "dict_generations"
      ]
    },
    {
      "page": "dict_political",
      "title": "Demo dictionary of political / partisan term variants for NER",
      "topics": [
        "dict_political"
      ]
    },
    {
      "page": "fetch_urls",
      "title": "Fetch URLs from a search engine",
      "topics": [
        "fetch_urls"
      ]
    },
    {
      "page": "fetch_wiki_refs",
      "title": "Fetch external citation URLs from Wikipedia article(s)",
      "topics": [
        "fetch_wiki_refs"
      ]
    },
    {
      "page": "fetch_wiki_urls",
      "title": "Fetch Wikipedia page URLs by search query",
      "topics": [
        "fetch_wiki_urls"
      ]
    },
    {
      "page": "nlp_cast_tokens",
      "title": "Convert token list to data frame",
      "topics": [
        "nlp_cast_tokens"
      ]
    },
    {
      "page": "nlp_index_tokens",
      "title": "Build a BM25 index for ranked keyword search",
      "topics": [
        "nlp_index_tokens"
      ]
    },
    {
      "page": "nlp_roll_chunks",
      "title": "Roll units into fixed-size chunks with optional context",
      "topics": [
        "nlp_roll_chunks"
      ]
    },
    {
      "page": "nlp_split_paragraphs",
      "title": "Split text into paragraphs",
      "topics": [
        "nlp_split_paragraphs"
      ]
    },
    {
      "page": "nlp_split_sentences",
      "title": "Split text into sentences",
      "topics": [
        "nlp_split_sentences"
      ]
    },
    {
      "page": "nlp_tokenize_text",
      "title": "Tokenize text into a clean token stream",
      "topics": [
        "nlp_tokenize_text"
      ]
    },
    {
      "page": "read_urls",
      "title": "Read content from URLs",
      "topics": [
        "read_urls"
      ]
    },
    {
      "page": "search_dict",
      "title": "Exact phrase / MWE matcher",
      "topics": [
        "search_dict"
      ]
    },
    {
      "page": "search_index",
      "title": "Search the BM25 index",
      "topics": [
        "search_index"
      ]
    },
    {
      "page": "search_regex",
      "title": "Search corpus by regex",
      "topics": [
        "search_regex"
      ]
    },
    {
      "page": "search_vector",
      "title": "Semantic search by cosine similarity",
      "topics": [
        "search_vector"
      ]
    },
    {
      "page": "util_fetch_embeddings",
      "title": "Fetch embeddings from a Hugging Face inference endpoint",
      "topics": [
        "util_fetch_embeddings"
      ]
    }
  ],
  "_readme": "https://github.com/jaytimm/textpress/raw/HEAD/README.md",
  "_rundeps": [
    "askpass",
    "cli",
    "cpp11",
    "curl",
    "data.table",
    "generics",
    "glue",
    "httr",
    "jsonlite",
    "lattice",
    "lifecycle",
    "lubridate",
    "magrittr",
    "Matrix",
    "mime",
    "openssl",
    "pbapply",
    "pillar",
    "pkgconfig",
    "R6",
    "rlang",
    "rvest",
    "selectr",
    "stringi",
    "stringr",
    "sys",
    "tibble",
    "timechange",
    "utf8",
    "vctrs",
    "xml2"
  ],
  "_score": 3.7781512503836434,
  "_indexed": true,
  "_nocasepkg": "textpress",
  "_universes": [
    "jaytimm"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.1.1",
      "date": "2026-05-17T08:52:52.000Z",
      "distro": "noble",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "a9701a6683ab1d3acb31a41d4baf1f5b70477befa7f72d9c45fd2369fe6eb590",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.1.1",
      "date": "2026-05-17T08:52:48.000Z",
      "distro": "noble",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "6e40fdcfd38e72c83be9f3e214f7a326fb36b33d6c92c011f1eb271b8751e718",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.1.1",
      "date": "2026-05-17T08:52:06.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "435c53e4154e9726f15362a4768ff54f5e63656fe2592e11238b1b5798f3265b",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.1.1",
      "date": "2026-05-17T08:51:58.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "e80addb1d3136b9728cfec368db6a6f3bb168b6004c8efa7bc0502b35304d709",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.1.1",
      "date": "2026-05-17T08:51:21.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "b90612cd7179adae0e66cd73337e3a6be479bb4b322a915e23f6c379f0e60322",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.1.1",
      "date": "2026-05-17T08:51:12.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "77f0bbde3298a99f0ffa551e34a9d60f61aeab63051edd728e2001695148d2b5",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.1.1",
      "date": "2026-05-17T08:51:32.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "6073147a5e3d6339a68b2e6ec313a06fa0e69144b402bc240380402b8f878151",
      "status": "success",
      "check": "WARNING",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.1.1",
      "date": "2026-06-02T17:10:21.000Z",
      "commit": "33c73e76ae5d72cd498dd2d0683c532dd1021757",
      "fileid": "e98e94497af7621533c5bd2788eae4699d7e2a06f260f384570c8246abf15b74",
      "status": "success",
      "buildurl": "https://github.com/r-universe/jaytimm/actions/runs/25986261745"
    }
  ]
}