src: Add accumulated native code

Taeyeon Mori 3 years ago
parent 8832b455f5
commit 29d095cccc
  1. 8
  2. 80
  3. 141
  4. 187
  5. 429
  6. 68
  7. 692
  8. 78
  9. 110
  10. 241
  11. 81
  12. 68
  13. 418
  14. 90
  15. 95
  16. 431
  17. 192
  18. 591
  19. 11

src/.gitignore vendored

@ -0,0 +1,8 @@

@ -0,0 +1,80 @@
Random Sources
Playground for random utilities, mostly unprivileged single-user linux namespaces
### chome
Bind mount a different directory on top of $HOME to (partially) isolate a process
### fakensudo
Pretend to be root (uid 0) by running in a single-user namespace mapping one's own UID to 0
### keepassxc-print
Retrieve passwords from KeePassXC on the commandline via the browser interface.
### overlayns
Run a command in a custom mount namespace. Like `unshare -mUc` with the added possibility of setting up custom mounts in the namespace before running the target application
### ssh-overlay-kiosk
Create an emphemeral home directory for each invocation.
### steamns
Isolate steam (and other 32-bit apps) in an unprivileged single-user-namespace "chroot"
### keepassxc-browser.hpp
Very simple library for interacting with KeePassXC's browser interface from native code
Depends on libsodium, jsoncpp, ko::proc
### ko::fd
Convenient wrapper around Linux APIs with dirfd support
kofd\_pipe.hpp adds a class for working with pairs of uni-directional pipes
Depends on ko::fs
### ko::fs
Misc. filesystem utilities
- cpath: Type that is trivially convertible to const char\* and from std::string and std::filesystem::path
- dir\_ptr: Convenient iterator-based wrapper around the dirent API
### ko::ns
Utilities for working with Linux Namespaces (unshare, clone, setns)
Depends on ko::util, ko::fd, ko::os
- ko::ns::idmap: Functions for writing /proc/$$/Xidmap
- ko::ns::mount: Functions for setting up mount namespaces
- ko::ns::clone: Helpers for spawning processes in new namespaces (kons\_clone.hpp, requires ko::proc)
### ko::os
Misc. OS helpers
Depends on ko:: fs
- get\_home()
- is\_mountpoint()
### ko::proc
Utilities for spawning and managing child processes
Depends on pthread, ko::fd
- popen[p]: Spawn subprocess and communicate via pipes
- sync::semapair: Synchronization across processes
- child\_ref: Child process reference with cleanup
- [s]vclone: Wrappers around linux clone(CLONE\_VM)
- simple\_spawn: Trivial fork(); execvp() primitive
### ko::util
Misc. utilities
- str: Type-safe-ly concatenate all arguments
- cvshort: Short-circuit continuation using C-Style return codes

@ -0,0 +1,141 @@
#include <iostream>
#include <filesystem>
#include <unordered_set>
#include <fstream>
#include "koutil.hpp"
#include "kofd.hpp"
#include "koos.hpp"
namespace fs = std::filesystem;
void usage(const char *prog) {
std::cout << "Usage: " << prog << " [option...] <newhome> [prog] [arg...]" << std::endl
<< " (c) 2019 Taeyeon Mori" << std::endl
<< std::endl
<< " This program allows confining an application to it's own home directory" << std::endl
<< " without chainging the literal home directory path." << std::endl
<< std::endl
<< "Options:" << std::endl
<< " -h Display this help text" << std::endl
<< " -H HOME Override the home directory path" << std::endl
<< " -w Don't make / read-only" << std::endl
<< " -W Preserve working directory" << std::endl
//<< " -s Make (the rest of) /home inaccessible" << std::endl
//<< " -S Make /media and /mnt inaccessible as well (implies -s)" << std::endl
//<< " -x PATH Make path inaccessible" << std::endl
<< std::endl
<< "Parameters:" << std::endl
<< " newhome The new home directory path" << std::endl
<< " prog The executable to run (defaults to $SHELL)" << std::endl
<< " arg... The executable parameters" << std::endl;
struct params {
fs::path home, newhome;
bool rw = false,
nohome = false,
nomnt = false,
pwd = true;
std::unordered_set<std::string> hide;
const char *const *argv = nullptr;
int bindfile(const params &p, fs::path path) {
auto opath = p.home / path;
if (fs::exists(opath)) {
auto npath = p.newhome / path;
if (!fs::exists(npath)) {
if (fs::is_directory(opath))
else {
auto touch = std::ofstream(npath);
if(ko::os::bind(opath, npath, 0))
return -1;
return ko::os::bind(npath, npath, MS_REMOUNT|MS_RDONLY);
return 0;
int pmain(params p) {
auto uid = getuid(),
gid = getgid();
auto [e, eloc] = ko::util::cvshort()
.then("unshare", ::unshare, CLONE_NEWUSER|CLONE_NEWNS)
.then("bind Xauthority", bindfile, p, ".Xauthority")
.then("bind pulse cookie", bindfile, p, ".config/pulse/cookie")
.then("bind home", ko::os::bind, p.newhome, p.home, MS_REC)
.ifthen("make / ro", !, ko::os::bind, "/", "/", MS_REMOUNT|MS_RDONLY)
.ifthen("chdir", p.pwd, ::chdir, p.home.c_str())
.then([uid,gid]() -> ko::util::cvresult {
auto dir = ko::fd::opendir("/proc/self");
if (!dir)
return {-1, "open /proc/self"};
if (!ko::fd::dump("deny", "setgroups", 0644, dir))
return {-1, "write setgroups"};
if (!ko::fd::dump(ko::util::str(gid, " ", gid, " 1\n"), "gid_map", 0644, dir))
return {-1, "write gid_map"};
if (!ko::fd::dump(ko::util::str(uid, " ", uid, " 1\n"), "uid_map", 0644, dir))
return {-1, "write uid_map"};
return {0, nullptr};
.then("setresgid", ::setresgid, gid, gid, gid)
.then("setresuid", ::setresuid, uid, uid, uid)
.then("exec", ::execvp, p.argv[0], const_cast<char *const *>(p.argv));
return e;
int main(int argc, char **argv) {
static const char *exec_argv[] = {getenv("SHELL"), nullptr};
params p{
.home = ko::os::get_home(),
.argv = exec_argv
constexpr auto spec = "+hH:wsSxW";
while (true) {
auto opt = getopt(argc, const_cast<char *const *>(argv), spec);
if (opt == -1)
else if (opt == '?' || opt == 'h') {
return opt == 'h' ? 0 : 1;
else if (opt == 'H')
p.home = ::optarg;
else if (opt == 'w') = true;
else if (opt == 'W')
p.pwd = false;
else if (opt == 's' || opt == 'S') {
if (opt == 'S') {
else if (opt == 'x')
if (argc == ::optind) {
std::cout << "Error: missing mandatory newhome argument, see `" << argv[0] << " -h`" << std::endl;
return 2;
p.newhome = argv[::optind++];
if (argc > ::optind)
p.argv = &argv[::optind];
return pmain(p);

@ -0,0 +1,187 @@
// Fake sudo using user namespace; Similar to fakeroot
// (c) 2020 Taeyeon Mori <taeyeon at>
#include "kons.hpp"
#include <cstdlib>
#include <iostream>
#include <unistd.h>
#include <getopt.h>
#include <pwd.h>
#include <grp.h>
// Helpers
int xerror(const char *desc) {
return -errno;
[[noreturn]] void die(int r, const char *msg) {
std::cerr << msg << std::endl;
[[noreturn]] void die_errno(int r, const char *msg) {
// ========================================================
// Main
// ========================================================
void usage(const char *prog) {
std::cout << "Usage:" << std::endl
<< " " << prog << " -h | -K | -k | -V" << std::endl
<< " " << prog << " -v [-k] [-u user] [-g group]" << std::endl
<< " " << prog << " -e [-k] [-u user] [-g group] [--] file" << std::endl
<< " " << prog << " [-bEHPk] [-u user] [-g group] [-i|-s] [--] command" << std::endl
<< std::endl
<< "General Options:" << std::endl
<< " -h Display this help text" << std::endl
<< std::endl;
struct config {
const char *const *exec_argv = nullptr;
bool background = false,
preserve_env = false,
editor = false,
login = false,
set_home = false,
preserve_groups = false,
run_shell = false;
uid_t uid = 0;
gid_t gid = 0;
template <typename F, typename R, typename T>
R get_pwd(F f, R std::remove_pointer_t<std::invoke_result_t<F,T>>::*fld, T nam) {
auto s = f(nam);
if (!s)
die(20, "Could not resolve user or group");
return s->*fld;
// Parse commandline arguments
// returns -1 on success, exit code otherwise
int parse_cmdline(config &conf, int argc, const char *const *argv) {
constexpr auto spec = "+hbEeg:HiKkPpsu:Vv";
constexpr option longspec[] = {{"help",0,nullptr,'h'},
while (true) {
auto opt = getopt_long(argc, const_cast<char *const *>(argv), spec, longspec, nullptr);
if (opt == -1)
else if (opt == '?' || opt == 'h') {
return opt == 'h' ? 0 : 1;
else if (opt == 'V') {
std::cout << "fakensudo Namespace fake sudo version 0.1" << std::endl
<< "(c) 2020 Taeyeon Mori" << std::endl;
return 0;
else if (opt == 'b') conf.background = true;
else if (opt == 'E') conf.preserve_env = true; // XXX: ignores the optinal list
else if (opt == 'e') conf.editor = true;
else if (opt == 'g') conf.gid = get_pwd(getgrnam, &group::gr_gid, optarg);
else if (opt == 'H') conf.set_home = true;
else if (opt == 'i') conf.login = true;
else if (opt == 'K') return 0; // XXX: check for clashes
else if (opt == 'k') /* pass */;
else if (opt == 'P') conf.preserve_groups = true;
else if (opt == 'p') /* pass */;
else if (opt == 's') conf.run_shell = true;
else if (opt == 'u') conf.uid = get_pwd(getpwnam, &passwd::pw_uid, optarg);
else if (opt == 'v') return 0; // XXX: properly check options
else die(10, "Unknown option encountered");
// Check sanity
bool good = true;
if (conf.run_shell || conf.login) {
if (conf.run_shell && conf.login)
good = false;
if (conf.editor)
good = false;
} else if (::optind >= argc)
good = false;
if (!good) {
return 5;
// Rest is child cmnd
if (argc > ::optind)
conf.exec_argv = &argv[::optind];
return -1;
int main(int argc, char **argv) {
// Set defaults
auto conf = config{};
// Parse commandline
auto perr = parse_cmdline(conf, argc, argv);
if (perr != -1)
return perr;
auto uerr = ko::ns::unshare_single(conf.uid, conf.gid, CLONE_NEWUSER);
if (uerr != 0)
die_errno(31, "unshare");
// Drop Permissions
setresgid(conf.gid, conf.gid, conf.gid);
setresuid(conf.uid, conf.uid, conf.uid);
auto exec_argv = conf.exec_argv;
if (conf.run_shell) {
auto shell = getenv("SHELL");
if (shell == nullptr)
die(41, "Could not get SHELL from environment");
if (conf.exec_argv == nullptr || *conf.exec_argv == nullptr)
exec_argv = new char*[]{shell, nullptr};
die(200, "-s not fully implemented");
} else if (conf.login) {
auto shell = get_pwd(getpwuid, &passwd::pw_shell, conf.uid);
if (shell == nullptr)
die(41, "Could not get SHELL from passwd record");
if (conf.exec_argv == nullptr || *conf.exec_argv == nullptr)
exec_argv = new char*[]{shell, "-l", nullptr};
die(200, "-i not fully implemented");
} else if (conf.editor) {
die(200, "-e not implemented");
// Exec
execvpe(exec_argv[0], const_cast<char *const*>(exec_argv), environ);
die_errno(33, "exec");

@ -0,0 +1,429 @@
// Simple Client-Library for the KeePassXC-Browser API
// (c) 2019 Taeyeon Mori
// Depends on: libsodium, jsoncpp
// NOTE: Users must make sure to initialize libsodium!
// WARNING: This currently does nothing to protect the keys in memory.
// Such measures could be added to crypto::, but as the key material
// is stored in a plain file on disk anyway, that seems to be a lot of useless work.
// This applies especially for small, short-lived cli utilities.
// WARNING: With a plain secrets file, the 'Never ask before accessing credentials' option in
// in KeePassXC becomes an even bigger security risk!
#pragma once
#include "koproc.hpp"
#include <json/json.h>
#include <sodium.h>
#include <string>
#include <iostream>
#include <sstream>
#include <optional>
#include <utility>
#include <memory>
#include <cstring>
#include <variant>
#include <unordered_map>
#include <unistd.h>
namespace keepassxc {
using string = std::string;
using data = std::basic_string<uint8_t>;
// Hack, but libsodium insists on unsigned char
// The result of this is cleaner than having individual
// casts all over the place and as a side benefit, it
// tends to prevent toughtlessly trying to put binary
// data into json directly.
const data &data_cast(const string &s) {
return *reinterpret_cast<const data*>(&s);
const string &nodata_cast(const data &d) {
return *reinterpret_cast<const string*>(&d);
* Cryptography goes here
namespace crypto {
data generate_nonce() {
auto nonce = data(crypto_box_NONCEBYTES, 0);
::randombytes(, crypto_box_NONCEBYTES);
return nonce;
/// Return [[public_key, secret_key]]
std::optional<std::pair<data, data>> generate_keypair() {
auto seckey = data(crypto_box_SECRETKEYBYTES, 0);
auto pubkey = data(crypto_box_PUBLICKEYBYTES, 0);
if (::crypto_box_keypair(, == 0)
return {{pubkey, seckey}};
return {};
data encrypt(const data &plain, const data &nonce, const data &pubkey, const data &seckey) {
auto cipher = data(plain.size() + crypto_box_MACBYTES, 0);
const auto ok = crypto_box_easy(,, plain.size(),,, == 0;
return ok ? cipher : data();
data decrypt(const data &cipher, const data &nonce, const data &pubkey, const data &seckey) {
auto plain = data(cipher.size() - crypto_box_MACBYTES, 0);
const auto ok = crypto_box_open_easy(,, cipher.size(),,, == 0;
return ok ? plain : data();
string b64encode(const data &dec) {
auto enc = string(sodium_base64_ENCODED_LEN(dec.size(), sodium_base64_VARIANT_ORIGINAL), 0);
::sodium_bin2base64(, enc.size(),, dec.size(), sodium_base64_VARIANT_ORIGINAL);
return enc;
std::optional<data> b64decode(const string &enc) {
auto dec = data(enc.size() * 3 / 4 + 1, 0);
size_t data_len = 0;
if (::sodium_base642bin(, dec.size(),, enc.size(),
nullptr, &data_len, nullptr, sodium_base64_VARIANT_ORIGINAL) == 0) {
return dec;
return {};
void increment(data &n) {
::sodium_increment(, n.size());
* The keepassxc client configuration
struct config {
static constexpr auto CONF_PUBKEY = "public_key",
CONF_PRIVKEY = "private_key",
CONF_DATABASES = "databases";
data public_key, private_key;
std::unordered_map<string, string> dbs;
* Create a new configuration
* @note This creates the persistent key pair
static std::optional<config> create() {
auto keys = crypto::generate_keypair();
if (!keys)
return {};
auto [public_key, private_key] = keys.value();
return config{
.public_key = public_key,
.private_key = private_key,
.dbs = {},
* Load configuration from a JSON object
* @param conf The JSON object
static std::optional<config> load(const Json::Value &conf) {
if (!conf.isMember(CONF_PUBKEY) || !conf.isMember(CONF_PRIVKEY))
return std::nullopt;
auto public_key = crypto::b64decode(conf[CONF_PUBKEY].asString());
if (!public_key)
return std::nullopt;
auto private_key = crypto::b64decode(conf[CONF_PRIVKEY].asString());
if (!private_key)
return std::nullopt;
return config{
.public_key = public_key.value(),
.private_key = private_key.value(),
.dbs = [&conf]() {
auto ids = std::unordered_map<string, string>{};
if (!conf.isMember(CONF_DATABASES))
return ids;
for (auto it = conf[CONF_DATABASES].begin(); it != conf[CONF_DATABASES].end(); it++)
ids.emplace(, it->asString());
return ids;
* Write the configuration into a JSON object
void serialize(Json::Value &conf) const {
conf[CONF_PUBKEY] = crypto::b64encode(this->public_key);
conf[CONF_PRIVKEY] = crypto::b64encode(this->private_key);
conf[CONF_DATABASES] = Json::objectValue;
for (auto [dbhash, id] : this->dbs)
conf[CONF_DATABASES][dbhash] = id;
* Dump the configuration as a JSON object
Json::Value serialize() const {
Json::Value json(Json::objectValue);
return json;
* Simple, blocking client for interacting with KeePassXC
class client {
config conf;
data conn_pubkey = {},
conn_privkey = {},
remote_pubkey = {};
string conn_id = {},
remote_dbhash = {};
pid_t pid = -1;
std::unique_ptr<ko::fd::pipe> pipe = {};
std::array<const char *, 2> proc_cmd = {"keepassxc-proxy", nullptr};
const std::unique_ptr<Json::StreamWriter> dumper{[](){
auto builder = Json::StreamWriterBuilder();
builder["indentation"] = "";
return builder.newStreamWriter();
const std::unique_ptr<Json::CharReader> loader{Json::CharReaderBuilder().newCharReader()};
inline string dumps(const Json::Value &v) {
auto s = std::ostringstream();
this->dumper->write(v, &s);
return s.str();
inline std::variant<Json::Value, string> loads(const string &json) {
auto v = Json::Value();
auto err = std::string();
if (this->loader->parse(, + json.size(), &v, &err))
return v;
return err;
client(config conf) :
const config &get_config() const {
return this->conf;
void set_command(const char *cmd) {
this->proc_cmd[0] = cmd;
bool is_connected() {
return this->pid > 0 && this->pipe; // XXX check pipe is connected
bool is_associated() {
return !this->remote_pubkey.empty() && !this->remote_dbhash.empty();
* Start the KeePassXC process
* @note This generates necessary ephemeral keys and ids
* XXX Should move the key pair into associate()?
bool connect() {
auto keys_opt = crypto::generate_keypair();
if (!keys_opt)
return false;
std::tie(this->conn_pubkey, this->conn_privkey) = keys_opt.value();
std::tie(this->pid, this->pipe) = ko::proc::popenp(this->;
this->conn_id = crypto::b64encode(crypto::generate_nonce());
return is_connected();
Json::Value jerror(string reason) {
auto err = Json::Value{Json::objectValue};
err["action"] = "client-error";
err["success"] = "false";
err["errorCode"] = -1;
err["error"] = reason;
return err;
Json::Value send_message(const Json::Value &msg) {
auto &pipe = *(this->pipe);
auto msg_s = this->dumps(msg);
auto sz_opt = pipe.read_bin<uint32_t>();
if (!sz_opt)
return jerror(string{"Could not read result size: "} + strerror(errno));
auto reply =;
if (reply.size() < sz_opt.value())
return jerror(string{"Could not read result: "} + strerror(errno));
auto result_err = this->loads(reply);
if (result_err.index())
return jerror(string{"Could not parse message: "} + std::get<string>(result_err));
//std::cerr << "Conversation: " << msg_s << " -> (" << sz_opt.value() << ") " << reply << std::endl;
return std::get<0>(result_err);
Json::Value send_message_enc(const Json::Value &msg) {
auto nonce = crypto::generate_nonce();
auto msg_enc = crypto::encrypt(data_cast(this->dumps(msg)), nonce, this->remote_pubkey, this->conn_privkey);
auto wrap = Json::Value(Json::objectValue);
wrap["action"] = msg["action"];
wrap["nonce"] = crypto::b64encode(nonce);
wrap["clientID"] = this->conn_id;
wrap["message"] = crypto::b64encode(msg_enc);
auto res = this->send_message(wrap);
if (res.isMember("error"))
return res;
if (res.get("nonce", "").asString() != crypto::b64encode(nonce))
return this->jerror("Invalid response nonce");
auto cipher_opt = crypto::b64decode(res["message"].asString());
if (!cipher_opt)
return this->jerror("Malformed ciphertext");
auto data = crypto::decrypt(cipher_opt.value(), nonce, this->remote_pubkey, this->conn_privkey);
auto result_err = this->loads(nodata_cast(data));
if (result_err.index())
return this->jerror(string{"Could not parse inner message: "} + std::get<1>(result_err));
return std::get<0>(result_err);
// ----------------------------------------------------------
// Message types
inline Json::Value msg_skeleton(const string &action) {
auto msg = Json::Value{Json::objectValue};
msg["action"] = action;
return msg;
Json::Value send_change_public_keys() {
auto msg = this->msg_skeleton("change-public-keys");
msg["publicKey"] = crypto::b64encode(this->conn_pubkey);
msg["clientID"] = this->conn_id;
msg["nonce"] = crypto::b64encode(crypto::generate_nonce());
return this->send_message(msg);
Json::Value send_get_databasehash() {
return this->send_message_enc(this->msg_skeleton("get-databasehash"));
Json::Value send_associate() {
auto msg = this->msg_skeleton("associate");
msg["key"] = crypto::b64encode(this->conn_pubkey);
msg["idKey"] = crypto::b64encode(this->conf.public_key);
return this->send_message_enc(msg);
Json::Value send_test_associate(const string &id) {
auto msg = this->msg_skeleton("test-associate");
msg["key"] = crypto::b64encode(this->conf.public_key);
msg["id"] = id;
return this->send_message_enc(msg);
Json::Value send_get_logins(const string &url, const string &submitUrl=string(), bool httpAuth=false) {
auto msg = this->msg_skeleton("get-logins");
msg["url"] = url;
if (!submitUrl.empty())
msg["submitUrl"] = submitUrl;
if (httpAuth)
msg["httpAuth"] = httpAuth;
msg["keys"] = Json::Value{Json::arrayValue};
msg["keys"][0] = Json::Value{Json::objectValue};
msg["keys"][0]["id"] = crypto::b64encode(this->conf.public_key);
msg["keys"][0]["key"] = crypto::b64encode(this->conn_pubkey);
return this->send_message_enc(msg);
// ----------------------------------------------------------
// Composite functions
* Try to associate with KeePassXC using existing IDs
* @return A non-empty error message on failure
string try_associate() {
// Exchange pubkeys
auto res = this->send_change_public_keys();
if (res.isMember("error"))
return res["error"].asString();
if (!res.isMember("publicKey"))
return "publicKey not in change-public-keys reply";
this->remote_pubkey = crypto::b64decode(res["publicKey"].asString()).value();
// Get the dbhash
res = this->send_get_databasehash();
if (res.isMember("error"))
return res["error"].asString();
this->remote_dbhash = res["hash"].asString();
// Look up database
auto f = conf.dbs.find(this->remote_dbhash);
if (f == conf.dbs.end())
return "Not associated with database";
// Verify association
res = this->send_test_associate(f->second);
if (res.get("success", "false") != "true")
return "Key appears to have been revoked";
return {};
* Try to associate with KeePassXC using either existing or new IDs
* @return A non-empty error message on failure
string associate() {
auto err = try_associate();
if (err.empty())
return {};
auto res = this->send_associate();
if (res.isMember("error"))
return res["error"].asString();
if (res.get("success", "false") != "true")
return "Unknown error";
this->conf.dbs.emplace(this->remote_dbhash, res["id"].asString());
return {};

@ -0,0 +1,68 @@
#include "keepassxc-browser.hpp"
#include <fstream>
#include <filesystem>
#include <cstdlib>
namespace fs = std::filesystem;
template <typename... Args>
void die(int code, Args... msg) {
(std::cerr << ... << msg) << std::endl;
int main(int argc, char **argv) {
if (::sodium_init() < 0)
die(-44, "Error: Could not initialize libsodium");
if (argc < 2)
die(-1, "Usage: ", argv[0], " <url>");
// Try to make the cli emulate pass at some point
auto config_path = fs::path{getenv("HOME")} / ".config/keepassxc-pass.json";
auto conf = [&config_path]() {
if (!fs::exists(config_path)) {
auto opt = keepassxc::config::create();
if (!opt)
die(-6, "Error: Could not initialize secrets");
return opt.value();
} else {
auto s = std::ifstream(config_path);
auto v = Json::Value{};
s >> v;
auto opt = keepassxc::config::load(v);
if (!opt)
die(-5, "Error: Could not load secrets from config");
return opt.value();
auto client = keepassxc::client(conf);
if (!client.connect())
die(-2, "Error: Could not popen keepass");
// Hide new association behind a flag?
auto err = client.associate();
if (!err.empty())
die(-3, "Error: Could not associate with keepass: ", err);
auto s = std::ofstream(config_path);
s << client.get_config().serialize();
fs::permissions(config_path, fs::perms::owner_read|fs::perms::owner_write);
auto res = client.send_get_logins(argv[1]);
if (res["success"] != "true")
die(-4, "Error: Could not get logins: ", res["error"].asString());
if (res["count"] == "0")
die(1, "No logins found");
std::cout << res["entries"][0]["password"].asString() << std::endl;
return 0;

@ -0,0 +1,692 @@
// ============================================================================
// kofd.hpp
// ko::fd
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// File descriptor functions
#pragma once
#include "kofs.hpp"
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/sendfile.h>
#include <sys/ioctl.h>
#include <sys/mount.h> // linux/fs.h includes linux/mount.h which overrides some of the things from sys/mount.h
#include <linux/fs.h>
#include <unistd.h>
#include <cstring>
#include <string>
#include <utility>
#include <optional>
// ==================================================================
namespace ko::fd {
// ------------------------------------------------------------------
// Working with file descriptors
* Auto-close move-only filedescriptor wrapper
class fd {
int _fd;
fd() :
fd(int fd) :
fd(fd const &) = delete;
fd(fd &&o) :
fd &operator=(int fd) {
if (_fd >= 0)
_fd = fd;
return *this;
fd &operator=(fd &&o) {
if (_fd >= 0)
_fd = o.move();
return *this;
~fd() {
if (_fd >= 0)
* Boolean operator
* @note This differs from a raw int fd
operator bool() const {
return _fd >= 0;
* Negation operator
* @note This differs from a raw int fd
bool operator !() const {
return _fd < 0;
// Comparison
bool operator ==(int i) const {
return _fd == i;
bool operator !=(int i) const{
return _fd != i;
bool operator <(int i) const {
return _fd < i;
bool operator >(int i) const {
return _fd > i;
bool operator <=(int i) const {
return _fd <= i;
bool operator >=(int i) const {
return _fd >= i;
* Get the raw int fd
* @note This is not allowed on temporaries
* @note Use move() instead to transfer ownership.
* @see move()
operator int() & {
return _fd;
* Disown this object
* @note
int move() {
auto tmp = _fd;
_fd = -1;
return tmp;
* Close the file descriptor early
bool close() {
if (_fd < 0) return false;
if (::close(_fd) && errno != EBADF) return false;
_fd = -1;
return true;
* Copy the file descriptor
fd dup() {
return ::dup(_fd);
// Opening file descriptors
// @{
* Open a file descriptor
* @param path The path
* @param flags The open(2) flags
* @param dirfd The directory fd \p path may be relative to
* @param cloexec Add O_CLOEXEC to \p flags
* @return A \c fd file descriptor
fd open(const fs::cpath &path, long flags, int dirfd=AT_FDCWD, bool cloexec=true) {
return ::openat(dirfd, path, flags | (cloexec ? O_CLOEXEC : 0));
* Open a file descriptor, creating the file if it doesn't exist
* @param path The path
* @param flags The open(2) flags
* @param mode The file mode to create with
* @param dirfd The directory fd \p path may be relative to
* @param cloexec Add O_CLOEXEC to \p flags
* @return A \c fd file descriptor
fd open_creat(const fs::cpath &path, long flags, mode_t mode, int dirfd=AT_FDCWD, bool cloexec=true) {
return ::openat(dirfd, path, O_CREAT | flags | (cloexec ? O_CLOEXEC : 0), mode);
* Open a directory file descriptor
* @param path The directory path
* @param dirfd The directory fd \p path may be relative to
* @return A \c fd directory file descriptor
fd opendir(const fs::cpath &path, int dirfd=AT_FDCWD) {
return ::openat(dirfd, path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
* Open a directory file descriptor with custom flags
* @param path The directory path
* @param flags The flags to pass to open(2)
* @param dirfd The directory fd \p path may be relative to
* @return A directory \c fd
fd opendir2(const fs::cpath &path, long flags, int dirfd=AT_FDCWD) {
return ::openat(dirfd, path, flags|O_DIRECTORY);
// @}
// Checking properties
// @{
* Check if a path exists
* @param path The path
* @param dirfd The directory fd \p path may be relative to
* @return true if path exists
bool exists(const fs::cpath &path, int dirfd=AT_FDCWD) {
return !::faccessat(dirfd, path, F_OK, 0);
* Check if a path is a directory
* @param path The path
* @param dirfd The directory fd \p path may be relative to
* @return true if path is a directory
bool is_dir(const fs::cpath &path, int dirfd=AT_FDCWD) {
struct stat st;
if (::fstatat(dirfd, path, &st, 0))
return false;
return S_ISDIR(st.st_mode);
* Read the target of a symbolic link
* @param path The symlink path
* @param dirfd The directory fd \p path may be relative to
* @return A fs::path. It is empty on error
fs::path readlink(const fs::cpath &path, int dirfd=AT_FDCWD) {
constexpr auto static_bufsize = 4096;
char buf[static_bufsize];
auto sz = ::readlinkat(dirfd, path, buf, static_bufsize);
if (sz < 0)
return {};
if (sz < static_bufsize)
return {buf, buf + sz};
struct stat st;
if (::fstatat(dirfd, path, &st, AT_SYMLINK_NOFOLLOW))
return {};
auto extbuf = std::make_unique<char[]>(st.st_size);
sz = ::readlinkat(dirfd, path, extbuf.get(), sz);
if (sz < 0)
return {};
return {&extbuf[0], &extbuf[sz]};
* Get the target if a file is a symbolic link or return the path as-is if it is something else
* @param path The path
* @param dirfd The directory fd \p path may be relative to
* @param notexist_ok Whether or not to return the path as-is if it doesn't exist (default=true)
* @return A fs::path, possibly relative to dirfd. It may be empty on error
fs::path readlink_or_path(const fs::path &path, int dirfd=AT_FDCWD, bool notexist_ok=true) {
auto target = readlink(path, dirfd);
if (target.empty()) {
if (errno == EINVAL || (errno == ENOENT && notexist_ok))
return path;
return {};
// Make (relative) returned value relative to dirfd
if (target.is_relative())
return path.parent_path() / target;
return target;
* Check if a directory is empty
bool is_dir_empty(const fs::cpath &path, int dirfd=AT_FDCWD) {
auto fd = opendir(path, dirfd);
if (!fd)
return false;
auto dir = fs::dir_ptr(fd);
if (!dir)
return false;
errno = 0;
while (true) {
auto res = dir.readdir();
if (res == nullptr)
return errno == 0;
if (strcmp(".", res->d_name) && strcmp("..", res->d_name))
return false;
// @}
// Creating files and directories
// @{
* Create a symbolic link
* @param target The link target.
* @param path The path of the new symlink
* @param dirfd The directory fd \p path may be relative to
* @return 0 on success
* @note target is relative to the directory containing the link, NOT dirfd
int symlink(const fs::cpath &target, const fs::cpath &path, int dirfd=AT_FDCWD) {
return ::symlinkat(target, dirfd, path);
* Create a directory
* @param path The new directory path
* @param mode The permissions to assign
* @param dirfd The directory fd \p path may be relative to
* @return 0 on success
int mkdir(const fs::cpath &path, mode_t mode=0755, int dirfd=AT_FDCWD) {
return ::mkdirat(dirfd, path, mode);
* Create all parent directories
* @param path The path of the innermost directory to create
* @param mode The permissions to assign
* @param dirfd The directory fd \p path may be relative to
* @return The number of directories created, or -1 on error
int makedirs(const fs::path &path, mode_t mode=0755, int dirfd=AT_FDCWD) {
struct stat st;
// Treat empty path as .
// Check if exists
if (!fstatat(dirfd, path.empty() ? "." : path.c_str(), &st, 0)) {
// If directory, we're fine.
if (S_ISDIR(st.st_mode))
return 0;
// Else, this is an error
errno = ENOTDIR;
return -1;
// Propagate any error other than ENOENT
if (errno != ENOENT || path.empty())
return -1;
// Ensure parents
auto parents = makedirs(path.parent_path(), mode, dirfd);
// Actually create directory
if (mkdir(path, mode, dirfd))
return -1;
return parents + 1;
* Create a file if it doesn't exist
* @param path The path of the file
* @param mode The permissions to assign if it has to be created
* @param dirfd The directory fd \p path may be relative to
* @return 0 on success
int touch(const fs::cpath &path, mode_t mode=0755, int dirfd=AT_FDCWD) {
auto fd = open_creat(path, O_WRONLY, mode, dirfd);
return fd ? 0 : -1;
* Remove a file
* @param path The path of the file to remove
* @param dirfd The directory fd \p may be relative to
* @return 0 on success
int unlink(const fs::cpath &path, int dirfd=AT_FDCWD) {
return ::unlinkat(dirfd, path, 0);
* Remove a directory
* @param path The path of the directory to remove
* @param dirfd The directory fd \p may be relative to
* @return 0 on success
int rmdir(const fs::cpath &path, int dirfd=AT_FDCWD) {
return ::unlinkat(dirfd, path, AT_REMOVEDIR);
* Copy a symbolic link
* @param from The source symbolic link path
* @param to The target symbolic link path (must not exist)
* @param from_dirfd The directory fd \p from may be relative to
* @param dirfd The directory fd \p to may be relative to
* @return 0 on success
int copy_symlink(const fs::cpath &from, fs::cpath to,
int from_dirfd=AT_FDCWD, int dirfd=AT_FDCWD) {
auto target = readlink(from, from_dirfd);
return ::symlinkat(target.c_str(), dirfd, to);
// @}
// File descriptor I/O
// @{
// Read
* Read until \p size bytes have been read or an error has been encoutnered
* @param fd A file descriptor
* @param dest The destination buffer
* @param size The desired number of bytes read
* @return The actual number of bytes read
* @note If returned value != \p size, errno will be set. errno == 0 indicates EOF
size_t read(int fd, char *dest, size_t size) {
size_t have = 0;
while (have < size) {
auto got = ::read(fd, dest + have, size - have);
if (got == 0) {
errno = 0;
} else if (got < 0)
have += got;
return have;
* Read until \p size bytes have been read or an error has been encoutnered
* @param fd A file descriptor
* @param size The desired number of bytes read
* @return The resulting string
* @note If returned string.size() != \p size, errno will be set. errno == 0 indicates EOF
std::string read(int fd, size_t size) {
auto buf = std::string(size, 0);
buf.resize(read(fd,, size));
return buf;
* Read until \p size bytes have been read, an error has been encoutnered, or the timeout is hit
* @param fd A file descriptor
* @param dest The destination buffer
* @param size The desired number of bytes read
* @param timeout The timeout that must not be exceeded between chunk reads
* @return The actual number of bytes read
* @note If returned value != \p size, errno will be set. errno == 0 indicates EOF.
* Timeout is indicated by ETIMEDOUT.
size_t read(int fd, char *dest, size_t size, timeval timeout) {
size_t have = 0;
auto fds = fd_set();
FD_SET(fd, &fds);
while (have < size) {
auto rv = select(fd + 1, &fds, nullptr, nullptr, &timeout);
if (rv == 0) {
errno = ETIMEDOUT;
} else if (rv < 0) {
auto got = ::read(fd, dest + have, size - have);
if (got == 0) {
errno = 0;
} else if (got < 0)
have += got;
return have;
* Read until \p size bytes have been read, an error has been encoutnered, or the timeout is hit
* @param fd A file descriptor
* @param size The desired number of bytes read
* @param timeout The timeout that must not be exceeded between chunk reads
* @return The resulting string
* @note If returned value != \p size, errno will be set. errno == 0 indicates EOF
* Timeout is indicated by ETIMEDOUT.
std::string read(int fd, size_t size, timeval timeout) {
auto buf = std::string(size, 0);
buf.resize(read(fd,, size, timeout));
return buf;
* Read a POD type from a file descriptor
* @tparam T The type
* @param fd The file descriptor
* @return The object on success, std::nullopt on failure
* @note If std::nullopt is returned, errno will be set.
template <typename T>
std::optional<T> read_bin(int fd) {
char buf[sizeof(T)];
if (read(fd, buf, sizeof(T)) == sizeof(T))
return *reinterpret_cast<T*>(buf);
return std::nullopt;
// Write
* Write all bytes to a file descriptor unless an error occurs (blocking)
* @param fd The file descriptor
* @param buf The source buffer
* @param size The number of bytes to write
* @return The number of bytes written
* @note If returned value != \p size, errno will be set.
size_t write(int fd, const char *buf, size_t size) {
size_t have = 0;
while (have < size) {
auto got = ::write(fd, buf + have, size - have);
if (got == 0) {
errno = 0;
} else if (got < 0)
have += got;
return have;
* Write all bytes to a file descriptor unless an error occurs (blocking)
* @param fd The file descriptor
* @param s A string to write
* @return The number of bytes written
* @note If returned value != \p s.size(), errno will be set.
size_t write(int fd, const std::string &s) {
return write(fd,, s.size());
* Write a POD object to a file descriptor
* @tparam T The POD type
* @param fd The file descriptor
* @param v The object
* @return The number of bytes written
* @note If returned value != sizeof(T), errno will be set.
template <typename T>
size_t write_bin(int fd, const T &v) {
return write(fd, reinterpret_cast<const char*>(&v), sizeof(v));
// Shortcuts
* Read a file from disk
* @param path The file path
* @param dirfd The directory fd \p path may be relative to
* @param max The maximum number of bytes to read
* @return A pair of (data read, errno)
* @note If data.size() == max, more data may be available.
std::pair<std::string, int> cat(const fs::cpath &path, int dirfd=AT_FDCWD, size_t max=1024) {
auto fd = open(path, O_RDONLY, dirfd);
if (!fd)
return {};
auto r = read(fd, max);
if (r.size() < max)
return {r, errno};
return {r, 0};
* Write a file to disk
* @param s The data to write
* @param path The path to write to
* @param mode The mode to create the file with, if neccessary
* @param dirfd The directory fd \p path may be relative to
bool dump(const std::string &s, const fs::cpath &path, mode_t mode, int dirfd=AT_FDCWD) {
auto fd = open_creat(path, O_WRONLY, mode, dirfd);
if (!fd)
return -1;
return write(fd, s) == s.size();
// @}
// Copying Files
// @{
* Naively copy data between file descriptors
* @param fs The source file descriptor
* @param fd The destination file descriptor
* @param len The number of bytes to copy
bool fcopy_raw(int fs, int fd, size_t len) {
constexpr size_t bufsz = 8192;
char buf[bufsz];
do {
auto target = std::min(len, bufsz);
auto nread = read(fs, buf, target);
if (nread < target && errno != 0)
return false;
auto written = write(fd, buf, nread);
if (written < nread)
return false;
if (nread < target)
return true;
len -= nread;
} while (len > 0);
return true;
* Copy data between file descriptors
* @param fs The source file descriptor
* @param fd The destination file descriptor
* @param len The number of bytes to copy
* @return false on failure with errno set
* @note This attempts to use copy_file_range(2) and sendfile(2)
* before falling back to fcopy_raw
bool fcopy(int fs, int fd, size_t len) {
while (len > 0) {
auto r = ::copy_file_range(fs, NULL, fd, NULL, len, 0);
if (r < 0) {
if (errno == ENOSYS || errno == EXDEV || errno == EINVAL)
return fcopy_raw(fs, fd, len);
len -= r;
while (len > 0) {
auto r = ::sendfile(fd, fs, NULL, len);
if (r < 0)
return fcopy_raw(fs, fd, len);
len -= r;
return true;
* Copy a file
* @param src The path to copy from
* @param dst The path to copy to
* @param src_dir The directory fd \p src may be relative to
* @param dst_dir The directory fd \p dst may be relative to
* @return false on failure with errno set
* @note This variant will only try to preserve the file mode, no other attributes
* @note Note that this function takes two separate directory fds
* @note This will use reflink/FICLONE if supported.
bool copy0(const fs::cpath &src, const fs::cpath &dst, int src_dir=AT_FDCWD, int dst_dir=AT_FDCWD) {
struct stat st;
if (::fstatat(src_dir, src, &st, 0))
return false;
auto fs = open(src, O_RDONLY, src_dir);
if (!fs)
return false;
auto fd = open_creat(dst, O_WRONLY, st.st_mode, dst_dir);
if (!fd)
return false;
// Try reflink
#ifdef FICLONE
int ret = ::ioctl(fd, FICLONE, (int)fs);
if (ret != -1)
return ret == st.st_size;
return fcopy(fs, fd, st.st_size);
// @}
} // namespace ko::fd

@ -0,0 +1,78 @@
// ============================================================================
// kofd_pipe.hpp
// ko::fd::pipe
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// bi-directional pipe implementation
#pragma once
#include "kofd.hpp"
#include <optional>
namespace ko::fd {
// ------------------------------------------------------------------
* Represents a bi-directional pair of file descriptors
class pipe {
int rfd, wfd;
pipe(fd &&fd) :
rfd(fd.move()), wfd(rfd)
pipe(fd &&rfd, fd &&wfd) :
rfd(rfd.move()), wfd(wfd.move())
explicit pipe(int rfd, int wfd) :
rfd(rfd), wfd(wfd)
~pipe() {
if (this->wfd != this->rfd)
// IO Functions, see namespace fd
inline size_t read(char *dest, size_t size) {
return ::ko::fd::read(this->rfd, dest, size);
inline std::string read(size_t size) {
return ::ko::fd::read(this->rfd, size);
inline size_t read(char *dest, size_t size, timeval timeout) {
return ::ko::fd::read(this->rfd, dest, size, timeout);
inline std::string read(size_t size, timeval timeout) {
return ::ko::fd::read(this->rfd, size, timeout);
inline size_t write(const char *buf, size_t size) {
return ::ko::fd::write(this->wfd, buf, size);
inline size_t write(const std::string &s) {
return ::ko::fd::write(this->wfd, s);
template <typename T>
inline size_t write_bin(const T &v) {
return ::ko::fd::write_bin<T>(this->wfd, v);
template <typename T>
inline std::optional<T> read_bin() {
return ::ko::fd::read_bin<T>(this->rfd);
} // namespace ko::fd

@ -0,0 +1,110 @@
// ============================================================================
// kofs.hpp
// ko::fs
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// Misc. Filesystem functions
#pragma once
#include <dirent.h>
#include <unistd.h>
#include <string>
#include <filesystem>
namespace ko::fs {
using namespace std::filesystem;
* Helper struct for functions that require a c-string path
struct cpath {
const char *path;
inline cpath(const char *path) : path(path) {}
inline cpath(const fs::path &path) : path(path.c_str()) {}
inline cpath(const std::string &path) : path(path.c_str()) {}
inline operator const char *() const {
return path;
class dir_ptr {
DIR *ptr;
dir_ptr(int fd) {
ptr = ::fdopendir(fd);
dir_ptr(const cpath &path) {
ptr = ::opendir(path);
bool operator !() {
return ptr == nullptr;
~dir_ptr() {
dirent const *readdir() {
return ::readdir(ptr);
// Iterator
class iterator {
dir_ptr &dir;
dirent const *here = nullptr;
bool done = false;
int error = 0;
friend class dir_ptr;
iterator(dir_ptr &dir, bool done) : dir(dir), done(done) {
iterator &operator ++() {
if (!done) {
auto errno_bak = errno;
here = dir.readdir();
if (here == nullptr) {
done = true;
if (errno != errno_bak)
error = errno;
return *this;
dirent const &operator *() {
return *here;
operator bool() {
return !done;
bool operator ==(iterator const &other) const {
return dir.ptr == other.dir.ptr && (here == || done == other.done);
int get_errno() {
return error;
iterator begin() {
return iterator(*this, false);
iterator end() {
return iterator(*this, true);
} // namespace ko::fs

@ -0,0 +1,241 @@
* Small Linux Namespace utility header
* (c) 2019 Taeyeon Mori <taeyeon AT>
#pragma once
#include "koutil.hpp"
#include "kofd.hpp"
#include "koos.hpp"
#include <sched.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <array>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <memory>
namespace ko::ns {
* idmap namespace
* Contains functions for setting /proc/pid/[ug]id_map
namespace idmap {
/// An entry in [ug]id_map
struct entry {
uid_t start;
uid_t host_start;
unsigned long count;
/// Write the idmap <map> to <path>
template <typename Container>
inline bool set(fs::path path, Container map) {
auto stream = std::ofstream(path);
for (entry &e : map)
stream << e.start << ' ' << e.host_start << ' ' << e.count << '\n';
return stream.good();
/// Disable setgroups() syscall for process <pid>
/// This is required for unprivileged user namespaces
bool disable_setgroups(pid_t pid) {
auto stream = std::ofstream(util::str("/proc/", pid, "/setgroups"));
stream << "deny";
return stream.good();
/// Return an idmap mapping a single ID
inline constexpr std::array<entry, 1> single(uid_t id, uid_t host_id) {
return {{ {id, host_id, 1} }};
/// Get the path to the <map_type> map for <pid>
/// <map_type> may be "uid" or "pid"
inline fs::path path(pid_t pid, const char *map_type) {
return util::str("/proc/", pid, "/", map_type, "_map");
* namespace kons::mount
* Stuff related to setting up the mount namespace
namespace mount {
using os::mount;
using os::bind;
// Mount all the basic filesystems
util::cvresult mount_core(const fs::path &root) {
return util::cvshort()
.ifthen("mount_root", !os::is_mountpoint(root), bind, root, root, 0)
.ifthen("mount_proc", fs::exists(root / "proc"), mount, "proc", root / "proc", "proc", 0, nullptr)
.ifthen("mount_sys", fs::exists(root / "sys"), bind, "/sys", root / "sys", MS_REC)
.ifthen("mount_dev", fs::exists(root / "dev"), bind, "/dev", root / "dev", MS_REC)
.ifthen("mount_tmp", fs::exists(root / "tmp"), mount, "tmp", root / "tmp", "tmpfs", 0, nullptr)
.ifthen("mount_run", fs::exists(root / "run"), mount, "run", root / "run", "tmpfs", 0, nullptr);
/// Write-protect path to mitigate broken filesystem permissions in single-user ns
util::cvresult protect_path(const fs::path &path) {
return util::cvshort()
.then("bind_protect", bind, path, path, MS_REC)
.then("bind_protect_ro", bind, path, path, MS_REC|MS_REMOUNT|MS_RDONLY);
/// Bind in additional locations required by GUI programs
/// Some of these are serious isolation breaches!
/// Note that home and rundir must be relative and will be interpreted against both '/' and $root
util::cvresult mount_gui(const fs::path &root, const fs::path &home, const fs::path &rundir) {
auto path_from_env = [](const char *name, fs::path dflt, const char *prefix=nullptr) -> fs::path {
auto var = getenv(name);
if (var != nullptr) {
if (prefix != nullptr) {
auto plen = strlen(prefix);
if (!strncmp(var, prefix, plen))
var += plen;
if (var[0] == '/')
return var+1;
return dflt;
auto path_from_env_rel = [](fs::path to, const char *name, const char *dflt) -> fs::path {
auto var = getenv(name);
if (var != nullptr)
return to / var;
return to / dflt;
// Bind-mount various paths required to get GUI apps to communicate with system services
// X11, DBus (both buses, Steam does use the system bus), PulseAudio
auto frags = std::array<fs::path, 9>{
"run/udev", // Udev database for correct ENV entries e.g. ID_INPUT_JOYSTICK markers
//"etc/machine-id", // Pulseaudio will only connect with same machine-id by default. See below
path_from_env("XAUTHORITY", home / ".Xauthority"),
home / ".config/pulse/cookie",
path_from_env("DBUS_SESSION_BUS_ADDRESS", rundir / "bus", "unix:path="),
rundir / "pulse",
rundir / "pipewire-0",
path_from_env_rel(rundir, "WAYLAND_DISPLAY", "wayland-0"),
// /tmp/.X11-unix must be owned by user or root for wlroots xwayland to work (e.g. gamescope)
// behaviour can be overridden by env var KONS_BIND_X11=all
if (![&frags, root]() {
auto x11_mount_mode = getenv("KONS_BIND_X11");
if (x11_mount_mode && !strcasecmp("all", x11_mount_mode))
return true;
auto display = getenv("DISPLAY");
if (!display)
return false;
if (display[0] == ':')
display += 1;
for (char *c = display; *c; c++)
if (!isdigit(*c))
return false;
auto dirname = root / frags[0];
::chmod(dirname.c_str(), 01777);
auto sockname = frags[0] / util::str("X", display);
fd::touch(root / sockname);
frags[0] = sockname;
return true;
}()) {
std::cerr << "Warn: Invalid $DISPLAY value; falling back to bind-mounting /tmp/.X11-unix whole" << std::endl;
// Pulseaudio will by default only connect to the server published in the X11 root window properties if the machine-ids match.
// Either we bind-mount /etc/machine-id or we need to set PULSE_SERVER in the environment. Both are suboptimal hacks:
// /etc/machine-id shoudn't be the same across two rootfs' but it might be acceptable since we're not running init.
// OTOH, setting PULSE_SERVER can break with nonstandard configurations if they're not manually set in ENV. X11 publish is not enough.
auto pulse = util::str("unix:/", rundir.c_str(), "/pulse/native");
setenv("PULSE_SERVER", pulse.c_str(), 0); // Don't overwrite, assume that there's a reason it's set. May be TCP.
// If custom unix socket path, it could fail either way as it may not be included above.
// NOTE that exec[vlp]e() must be used to make setenv() work.
auto sh = util::cvshort();
auto host_root = fs::path("/");
for (auto frag : frags) {
auto hpath = host_root / frag;
if (fs::exists(hpath)) {
auto path = root / frag;
if (!fs::exists(path)) {
if (fs::is_directory(hpath))
else {
auto touch = std::ofstream(path);
if (!(sh = sh.then("mount_gui", bind, hpath, path, 0)))
return sh;
/// Pivot the root to $new_root, optionally keeping the old one at $old_root.
/// Note that the $old_root directory is required in the process either way.
util::cvresult pivot_root(const fs::path &new_root, const fs::path &old_root, bool keep_old=true) {
auto path = new_root / old_root;
if (!fs::exists(path))
return util::cvshort()
.then("pivot_root", syscall, SYS_pivot_root, new_root.c_str(), path.c_str())
.then("chdir_root", chdir, "/")
.ifthen("umount_oldroot", !keep_old, umount2, old_root.c_str(), MNT_DETACH);
} // namespace mount
* Unshare (at least) new single-user namespace
* @param uid The uid inside the userns
* @param gid The gid inside the userns
* @param flags The unshare(2)/clone(2) flags (CLONE_NEWUSER implied)
* @return Zero on success, -1 + errno on failure
inline int unshare_single(uid_t uid, uid_t gid, long flags) {
auto euid = geteuid();
auto egid = getegid();
auto r = ::unshare(flags | CLONE_NEWUSER);
if (r != 0)
return r;
if (!idmap::set("/proc/self/uid_map", idmap::single(uid, euid)))
return -1;
if (!idmap::disable_setgroups(getpid()))
return -1;
if (!idmap::set("/proc/self/gid_map", idmap::single(gid, egid)))
return -1;
return 0;
inline int setns(const fs::cpath &path, int type, int dirfd=AT_FDCWD) {
auto fd = ::openat(dirfd, path, O_RDONLY);
if (fd < 0)
return errno;
auto res = ::setns(fd, type);
return res;
} // namespace ko::ns

@ -0,0 +1,81 @@
// ============================================================================
// kons_clone.hpp
// ko::ns::clone namespace
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// Small Linux namespace utility header
// Clone-related functions
#pragma once
#include "kons.hpp"
#include "koproc.hpp"
* clone namespace
* Useful wrappers around the clone(2) syscall
namespace ko::ns::clone {
namespace detail {
template <typename ArgP>
int uvclone_entry(void *arg) {
auto [f, args, sync] = *reinterpret_cast<ArgP>(arg);
return std::apply(f, args);
* Spawn a process in a new user namespace
* @param uidmap The uid map for the user namespace
* @param gidmap The gid map for the user namespace
* @param fn The function to call in the child process
* @param stacksize The size of the process stack
* @param flags The clone(2) flags (SIGCHLD|CLONE_VM|CLONE_NEWUSER implied)
* @param args The function arguments
template <typename U, typename G, typename F, typename... Args>
std::pair<proc::child_ref, int> uvclone(U uidmap, G gidmap, F fn, size_t stacksize, long flags, Args... args) {
auto [sync, sync_c] = proc::sync::make_semapair(false);
auto data = new std::tuple{fn, std::tuple{std::forward<Args>(args)...}, sync_c};
auto proc = proc::detail::do_clone(detail::uvclone_entry<decltype(data)>, stacksize, CLONE_NEWUSER | CLONE_VM | flags, data);
auto res = EINVAL;
if (proc) {
// Wait for child
// Set maps
auto pid =;
if (idmap::set(idmap::path(pid, "uid"), uidmap)) {
if (idmap::disable_setgroups(pid)) {
if (idmap::set(idmap::path(pid, "gid"), gidmap)) {
res = 0;
if (res)
res = errno;;
return {std::move(proc), res};
* Spawn a process in a new single-user user namespace
* @param uid The uid inside the user namespace
* @param gid The gid inside the user namespace
* @param fn The function to call in the child process
* @param stacksize The size of the process stack
* @param flags The clone(2) flags (SIGCHLD|CLONE_VM|CLONE_NEWUSER implied)
* @param args The function arguments
template <typename F, typename... Args>
inline std::pair<proc::child_ref, int> uvclone_single(uid_t uid, gid_t gid, F fn, size_t stacksize, long flags, Args... args) {
return uvclone(idmap::single(uid, getuid()), idmap::single(gid, getgid()), fn, stacksize, flags, args...);
} // namespace ko::ns::clone

@ -0,0 +1,68 @@
// ============================================================================
// koos.hpp
// ko::os
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// Misc. OS interfaces
#pragma once
#include "kofs.hpp"
#include <mntent.h>
#include <pwd.h>
#include <sys/mount.h>
namespace ko::os {
/// Try to get the current user home directory
fs::path get_home() {
const char *home_env = getenv("HOME");
if (home_env)
return fs::path(home_env);
auto pwd = getpwuid(getuid());
if (pwd)
return fs::path(pwd->pw_dir);
return fs::path("/");
// ------------------------------------------------------------------
// Mounting filesystems
inline int mount(const fs::cpath &src, const fs::cpath &dst, const char *type, long flags=0, void *args=nullptr) {
auto res = ::mount(src, dst, type, flags, args);
if (res)
return errno;
return 0;
inline int bind(const fs::cpath &src, const fs::cpath &dst, long flags=0) {
return mount(src, dst, nullptr, MS_BIND | flags, nullptr);
/// Check if a path is a mount point
bool is_mountpoint(const fs::cpath &path) {
auto fp = setmntent("/proc/self/mounts", "r");
if (!fp) {
return false;
bool found = false;
while (auto ent = getmntent(fp)) {
if (!strcmp(path, ent->mnt_dir)) {
found = true;
return found;
} // namespace ko::os

@ -0,0 +1,418 @@
// ============================================================================
// ko::util koutil.hpp
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// Managing child processes
#pragma once
#include <sched.h>
#include <semaphore.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include "kofd.hpp"
#include "kofd_pipe.hpp"
#include <atomic>
#include <array>
#include <functional>
#include <iostream>
#include <memory>
#include <optional>
#include <tuple>
#include <utility>
namespace ko::proc {
// ------------------------------------------------------------------
// Simple popen implementaion
using popen_result_t = std::pair<pid_t, std::unique_ptr<fd::pipe>>;
* Spawn a process and connect it's stdin and stdout to a pipe
* @param exec_fn An exec-style function to call in the new process.
* @param args The arguments to exec_fn
* @return The PID and a pipe object
* @warning As this uses vfork(), exec_fn must actually call some kind of exec
* before the parent process can resume.
template <typename F, typename... Args>
inline popen_result_t popen_impl(F exec_fn, Args... exec_args) {
// Open 2 pipes
auto pipefd = std::array<std::array<int, 2>, 2>{};
if (::pipe2(pipefd[0].data(), 0))
return {-1, nullptr};
if (::pipe2(pipefd[1].data(), 0)) {
return {-1, nullptr};
// Fork
auto pid = vfork();
if (pid == 0) {
// Close parent ends
// Set up stdin and stdout
if (::dup2(pipefd[0][0], 0) != 0 || ::dup2(pipefd[1][1], 1) != 1)
// Close superfluous child ends
// exec
// Close child ends
// Abort if fork failed
if (pid < 0) {
return {pid, nullptr};
// return stuff
return {pid, std::make_unique<fd::pipe>(pipefd[1][0], pipefd[0][1])};
* Spawn a process and connect it's stdin and stdout to a pipe
* @param argv The process argv
* @return The PID and a pipe object
* @note argv[0] is the process image path
popen_result_t popen(const char **argv) {
return popen_impl(::execv, const_cast<char*>(argv[0]), const_cast<char**>(argv));
* Spawn a process and connect it's stdin and stdout to a pipe
* @param argv The process argv
* @return The PID and a pipe object
* @note argv[0] is the process image name or path
popen_result_t popenp(const char **argv) {
return popen_impl(::execvp, const_cast<char*>(argv[0]), const_cast<char**>(argv));
// ------------------------------------------------------------------
/// Process Synchronization
namespace sync {
namespace detail {
class semaphore_pair {
std::atomic_int refs;
bool shared;
sem_t sems[2];
semaphore_pair(bool shared) :
refs(0), shared(shared)
int bshared = shared ? 1 : 0;
sem_init(&sems[0], bshared, 0);
sem_init(&sems[1], bshared, 0);
~semaphore_pair() {
static semaphore_pair *create(bool shared = false) {
if (shared) {
void *mem = mmap(nullptr, sizeof(semaphore_pair), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (mem == MAP_FAILED)
return nullptr;
return new (mem) semaphore_pair(true);
} else {
return new semaphore_pair(false);
semaphore_pair *retain() {
return this;
void release() {
auto v = refs.fetch_sub(1);
if (v == 1) {
if (shared) {
munmap(this, sizeof(semaphore_pair));
} else {
delete this;
sem_t *sem(int n) {
return &sems[n%2];
* Contains a set of semaphores for bidirectional synchronization
class semapair {
detail::semaphore_pair *sems;
int offset;
semapair(detail::semaphore_pair *sems, int offset) :
friend std::array<semapair, 2> make_semapair(bool);
semapair(const semapair &o) :
semapair(semapair &&o) :
o.sems = nullptr;
~semapair() {
if (sems)
inline void wait() {
inline void post() {
inline void yield() {
inline std::array<semapair, 2> make_semapair(bool shared) {
auto stuff = detail::semaphore_pair::create(shared);
return {{ {stuff, 0}, {stuff, 1} }};
} // namespace sync (ko::proc::sync)
// ------------------------------------------------------------------
// Clone wrappers
using void_callback_t = std::pair<void(*)(void *), void *>;
* Represents a cloned child process with potential cleanup
class child_ref {
pid_t _pid = -1;
std::optional<void_callback_t> cleanup = {};
bool _done = false;
int _status = -1;
inline void _check_clean() {
if (cleanup)
std::cerr << "Warning: ko::proc::child_ref with cleanup destroyed without waiting" << std::endl;
child_ref(pid_t pid) :
child_ref(pid_t pid, void_callback_t cleanup_cb) :
_pid(pid), cleanup(cleanup_cb)
child_ref(child_ref &&o) :
_pid(o._pid), cleanup(o.cleanup),
_done(o._done), _status(o._status)
o._pid = -1;
o.cleanup = {};
child_ref &operator =(child_ref &&o) {
_pid = o._pid;
cleanup = std::move(o.cleanup);
_done = o._done;
_status = o._status;
o._pid = -1;
o.cleanup = {};
return *this;
~child_ref() {
operator bool() {
return _pid > 0;
int wait() {
if (!_done) {
waitpid(_pid, &_status, 0);
if (cleanup) {
auto [f, arg] = cleanup.value();
cleanup = {};
_done = true;
return WEXITSTATUS(_status);
std::pair<bool, int> poll() {
if (!_done) {
if(waitpid(_pid, &_status, WNOHANG) == 0)
return {false, 0};
if (cleanup) {
auto [f, arg] = cleanup.value();
cleanup = {};
_done = true;
return {true, WEXITSTATUS(_status)};
pid_t pid() {
return _pid;
int status() {
return WEXITSTATUS(_status);
bool waited() {
return _done;
namespace detail {
// Cleanup
struct cleanup_data {
uint8_t *stack = nullptr;
size_t stack_size;
void *args_copy = nullptr;
template <typename ArgP>
void cleanup(void *d) {
auto data = reinterpret_cast<cleanup_data*>(d);
if (data->args_copy)
delete reinterpret_cast<ArgP>(data->args_copy);
if (data->stack)
munmap(data->stack, data->stack_size);
delete data;
template <typename ArgP>
inline void_callback_t make_cleanup_cb(uint8_t *stack, size_t stack_size, ArgP data) {
return { cleanup<ArgP>, new cleanup_data{stack, stack_size, data} };
// Entrypoints
template <typename ArgP>
int vclone_entry(void *arg) {
// XXX Does this work with non-movable types?
auto [f, args] = std::move(*reinterpret_cast<ArgP>(arg));
return std::apply(f, args);
// Common work function
template <typename D>
inline child_ref do_clone(int(*entry)(void*), size_t stacksize, int flags, D *data) {
// Allocate stack
auto stack = reinterpret_cast<uint8_t*>(
mmap(nullptr, stacksize, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
if (stack == MAP_FAILED)
return {-ENOMEM};
// Clone
// SIGCHLD is required for child_ref and cleanup to work.
auto pid = ::clone(entry, stack + stacksize, SIGCHLD | flags, data);
// Discard everything if failed
if (pid < 0) {
if (data)
delete data;
if (stack)
munmap(stack, stacksize);
return {pid};
// Return child_ref with cleanup
return {pid, make_cleanup_cb(stack, stacksize, data)};
* Spawn a process sharing the same virtual memory
* @param fn The function to call in the new process
* @param stacksize The size of the new process stack
* @param flags The clone(2) flags (SIGCHLD|CLONE_VM implied)
* @param args The function arguments
template <typename F, typename... Args>
child_ref vclone(F fn, size_t stacksize, long flags, Args... args) {
auto data = new std::pair{fn, std::tuple{std::forward<Args>(args)...}};
return detail::do_clone(detail::vclone_entry<decltype(data)>, stacksize, CLONE_VM | flags, data);
* Spawn a process sharing the same virtual memory with synchronization primitives
* @param fn The function to call in the new process [int fn(ko::proc::sync::semapair, args...)]
* @param stacksize The size of the new process stack
* @param flags The clone(2) flags (SIGCHLD|CLONE_VM implied)
* @param args The function arguments
template <typename F, typename... Args>
std::pair<child_ref, sync::semapair> svclone(F fn, size_t stacksize, long flags, Args... args) {
auto [sem_a, sem_b] = sync::make_semapair(false);
auto data = new std::pair{fn, std::tuple{sem_b, std::forward<Args>(args)...}};
return {detail::do_clone(detail::vclone_entry<decltype(data)>, stacksize, CLONE_VM | flags, data), sem_a};
* Spawn a child process and immediately execvp() a new image
* @param argv The argument list for the new process.
* @note argv[0] is used as the image name/path
child_ref simple_spawn(const char *const *argv) {
auto pid = ::fork();
if (pid == 0)
::_exit(::execvp(argv[0], const_cast<char *const*>(argv)));
return {pid};
} // namespace ko::proc (ko::proc)

@ -0,0 +1,90 @@
// ============================================================================
// ko::util koutil.hpp
// (c) 2019 Taeyeon Mori <taeyeon at>
// ============================================================================
// Miscellaneous utilities
#pragma once
#include <cstring>
#include <sstream>
#include <string>
#include <utility>
#include <tuple>
namespace ko::util {
// ------------------------------------------------------------------
// Misc.
/// Build a string from fragments using ostringstream
template <typename... Args>
inline std::string str(Args... args) {
auto sstream = std::ostringstream();
(sstream <<...<< args);
return sstream.str();
// ------------------------------------------------------------------
// Cvresult
/// A more verbose result type with a very terse error location indicator
using cvresult = std::pair<int, const char *>;
/// Allows short-circuiting c-style return values
struct cvshort {
int _state = 0;
const char *_where = nullptr;
template <typename F, typename... Args>
inline cvshort &then(F fn, Args... args) {
if (_state == 0)
std::tie(_state, _where) = fn(args...);
return *this;
template <typename F, typename... Args>
inline cvshort &then(const char *name, F fn, Args... args) {
if (_state == 0) {
_state = fn(args...);
if (_state != 0)
_where = name;
return *this;
template <typename F, typename... Args>
inline cvshort &ifthen(bool cond, F fn, Args... args) {
if (_state == 0 && cond)
std::tie(_state, _where) = fn(args...);
return *this;
template <typename F, typename... Args>
inline cvshort &ifthen(const char *name, bool cond, F fn, Args... args) {
if (_state == 0 && cond) {
_state = fn(args...);
if (_state != 0)
_where = name;
return *this;
operator bool() const {
return _state == 0;
int state() const {
return _state;
const char *where() const {
return _where;
operator cvresult() {
return {_state, _where};
} // namespace ko::util

@ -0,0 +1,95 @@
# ===================================================================
# Taeyeon's miscallaneous applications
# (c) 2019 Taeyeon Mori
# ===================================================================
PROGS = keepassxc-print steamns chome overlayns
PROGS_ALL = fakensudo ssh-overlay-kiosk overlayns-static
# Compiler config
CXX = clang++
CXXFLAGS = -std=c++20 -Wall
OPTIMIZE = -O3 -flto
# Install config
INSTALL_PATH ?= ~/.local/bin
# -------------------------------------------------------------------
# Common targets
.PHONY: all most install clean
most: $(PROGS)
all: $(PROGS) $(PROGS_ALL)
install: $(PROGS)
@echo "Installing to INSTALL_PATH = $(INSTALL_PATH)"
@mkdir -p $(INSTALL_PATH)
@bash -c 'for prog in $(PROGS); do test -e $$prog && echo "Install $$prog -> $(INSTALL_PATH)" && install -m755 $$prog $(INSTALL_PATH); done'
install-fake-sudo: fakensudo
install -m755 $< /usr/local/bin/sudo
# ------------------------------------------------------------------
# Dependencies
dep_koutil = koutil.hpp
flg_koutil =
dep_kofs = kofs.hpp
flg_kofs =
dep_koos = koos.hpp $(dep_kofs)
flg_koos = $(flg_kofs)
dep_kofd = kofd.hpp $(dep_kofs)
flg_kofd = $(flg_kofs)
dep_kofd_pipe = kofd_pipe.hpp $(dep_kofd)
flg_kofd_pipe = $(flg_kofd)
dep_koproc = koproc.hpp $(dep_kofd_pipe)
flg_koproc = -pthread $(flg_kofd_pipe)
dep_kons = kons.hpp $(dep_koutil) $(dep_kofd) $(dep_koos)
flg_kons = $(flg_koutil) $(flg_kofd) $(flg_koos)
dep_kons_clone = kons_clone.hpp $(dep_kons) $(dep_koproc)
flg_kons_clone = $(flg_kons) $(flg_koproc)
dep_keepassxc = keepassxc-browser.hpp $(dep_koproc)
flg_keepassxc = $(shell pkg-config --cflags --libs libsodium jsoncpp) $(flg_koproc)
# -------------------------------------------------------------------
# Applications
keepassxc-print: keepassxc-print.cpp $(dep_keepassxc)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_keepassxc) -o $@ $<
steamns: steamns.cpp $(dep_kons_clone)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_kons_clone) -o $@ $<
MOverlay2-nsexec: MOverlay2-nsexec.cpp $(dep_kons_clone)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_kons_clone) -o $@ $<
chome: chome.cpp $(dep_koutil) $(dep_kofd) $(dep_koos)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_koutil) $(flg_kofd) $(flg_koos) -o $@ $<
fakensudo: fakensudo.cpp $(dep_kons)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_kons) -o $@ $<
ssh-overlay-kiosk: ssh-overlay-kiosk.cpp $(dep_koutil) $(dep_kofd) $(dep_koos)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_koutil) $(flg_kofd) $(flg_koos) -o $@ $<
@echo Setting $@ setuid root
sudo chown root $@
sudo chmod u+s $@
overlayns: overlayns.cpp $(dep_kons) $(dep_koproc)
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_kons) $(flg_koproc) -o $@ $<
overlayns-static: overlayns.cpp $(dep_kons) $(dep_koproc) makefile
$(CXX) $(CXXFLAGS) $(OPTIMIZE) $(flg_kons) $(flg_koproc) -static -fdata-sections -ffunction-sections -Wl,--gc-sections -o $@ $<

@ -0,0 +1,431 @@
// overlayns
// (c) 2021 Taeyeon Mori
#include <string>
#include <string_view>
#include <vector>
#include <unordered_map>
#include <list>
#include <variant>
#include <span>
#include <algorithm>
#include <numeric>
#include <spawn.h>
#include "kons_clone.hpp"
using namespace ko;
using namespace std::literals::string_literals;
using namespace std::literals::string_view_literals;
static constexpr auto vers = "0.5"sv;
void usage(char const * prog) {
printf("Synopsis: %s [-h] [-o ovl-spec]... [-m mnt-spec]... <command...>\n"
"Run a command in it's own mount namespace\n"
"Spec options:\n"
" -m mnt-spec Add a mount to the namespace\n"
" -o ovl-spec Add an overlay to the namespace\n"
"Mount spec:\n"
" A mount specification takes the following format:\n"
" -m <fstype>,<device>,<mountpoint>[,<option>...]\n"
" see mount(8) for more information on options.\n"
" Some options may not match exactly however.\n"
" Shortcuts are in place for bind mounts:\n"
" `-m bind,/a,/b` is equivalent to `-m ,/a,/b,bind`\n"
" `-m rbind,/a,/b` is equivalent to `-m ,/a,/b,bind,rec`\n"
"Overlay spec:\n"
" An overlay specification takes the following form:\n"
" -o <mountpoint>,<option>...\n"
" Avaliable options are (in addition to standard mount options):\n"
" lowerdir=<path> Mandatory, see mount(8)\n"
" upperdir=<path> Mandatory, see mount(8)\n"
" workdir=<path> Mandatory, see mount(8)\n"
" shadow Replaces lowerdir=; Use mountpoint as lowerdir\n"
" and shadow it's content\n"
" tmp Replaces upperdir= and workdir=;\n"
" Use a (new) temporary directory for both\n"
" copyfrom=<path> Copy contents of <path> to upperdir before mounting\n"
"overlayns %s (c) 2021 Taeyeon Mori\n"
auto str_split(std::string_view s, char c) {
size_t start = 0, next = 0;
std::vector<std::string_view> parts;
while ((next = s.find(c, start)) != s.npos) {
while (next > 0 && s[next-1] == '\\' && (next < 2 || s[next-2] != '\\')) {
if ((next = s.find(c, next+1)) == s.npos)
parts.push_back(s.substr(start, next - start));
start = next + 1;
return parts;
auto str_join(const std::span<std::string_view> &ss, char c) -> std::string {
if (ss.empty())
return {};
auto sizes = std::vector<size_t>{};
std::ranges::transform(ss, std::back_inserter(sizes), [](const auto& s) {return s.size();});
auto size = ss.size() - 1 + std::reduce(sizes.begin(), sizes.end());
auto result = std::string();
result = ss[0];
std::for_each(ss.begin()+1, ss.end(), [&result, c](const auto &s) {
return result;
struct mount_spec {
enum class mkdir_mode {
std::string_view type;
std::string_view device;
std::string_view mountpoint;
uint64_t flags = 0;
std::vector<std::string_view> args;
mkdir_mode mkdir = mkdir_mode::never;
struct parse_error {std::string_view msg;};
auto apply_options(std::span<std::string_view> opts) -> std::list<parse_error> {
static const std::unordered_map<std::string_view, uint64_t> flagnames = {
{"remount", MS_REMOUNT},
{"move", MS_MOVE},
{"bind", MS_BIND},
{"rec", MS_REC},
// propagation
{"shared", MS_SHARED},
{"private", MS_PRIVATE},
{"unbindable", MS_UNBINDABLE},
{"slave", MS_SLAVE},
// read
{"rw", 0},
{"ro", MS_RDONLY},
// atime
{"noatime", MS_NOATIME},
{"nodiratime", MS_NODIRATIME},
{"relatime", MS_RELATIME},
{"strictatime", MS_STRICTATIME},
// filetypes
{"nodev", MS_NODEV},
{"noexec", MS_NOEXEC},
{"nosuid", MS_NOSUID},
// misc
{"dirsync", MS_DIRSYNC},
{"lazytime", MS_LAZYTIME},
{"silent", MS_SILENT},
{"synchronous", MS_SYNCHRONOUS},
{"mandlock", MS_MANDLOCK},
std::list<parse_error> errors;
for (const std::string_view &opt : opts) {
if (opt.starts_with("mkdir=")) {
auto arg = opt.substr(6);
if (arg == "never") {
mkdir = mkdir_mode::never;
} else if (arg == "maybe") {
mkdir = mkdir_mode::maybe_all;
} else if (arg == "require") {
mkdir = mkdir_mode::require_all;
} else {
errors.push_back({"Unknown mkdir= argument"});
} else if (auto f = flagnames.find(opt); f != flagnames.end()) {
flags |= f->second;
} else {
return errors;
static auto parse(std::string_view s) -> std::pair<mount_spec, std::list<parse_error>> {
auto parts = str_split(s, ',');
if (s.size() < 3) {
std::cerr << "Incomplete mount spec: " << s << std::endl;
return {{}, {{"Incomplete mount spec (need at least type,device,mountpoint"}}};
mount_spec spec = {
.type = parts[0],
.device = parts[1],
.mountpoint = parts[2],
if (spec.type == "bind") {
spec.flags |= MS_BIND;
spec.type = "";
} else if (spec.type == "rbind") {
spec.flags |= MS_BIND | MS_REC;
spec.type = "";
auto errors = spec.apply_options(std::span(parts).subspan(3));
return {spec, errors};
int execute() {
if (!fs::exists(mountpoint)) {
if (mkdir == mkdir_mode::maybe_all || mkdir == mkdir_mode::require_all) {
} else if (mkdir == mkdir_mode::maybe_this || mkdir == mkdir_mode::require_this) {
} else {
std::cerr << "Mountpoint doesn't exist: " << mountpoint;
return 41;
} else if (mkdir == mkdir_mode::require_this || mkdir == mkdir_mode::require_all) {
std::cerr << "Mountpoint exists but was required to be created: " << mountpoint;
return 41;
std::string fstype{type},
margs = str_join(args, ',');
//std::cerr << "Mount -t " << fstype << " " << dev << " " << dest << " -o " << margs << " -f " << flags << std::endl;
auto res = os::mount(dev, dest, fstype.c_str(), flags, (void*)(margs.empty() ? nullptr : margs.c_str()));
if (res) {
std::cerr << "Failed mounting " << dev << " on " << dest << std::endl;
return res;
return 0;
struct copy_spec {
std::string_view source;
std::string_view dest;
int execute() {
std::error_code ec;
fs::copy(source, dest, fs::copy_options::recursive, ec);
if (ec.value())
std::cerr << "Could not copy " << source << " to " << dest << ": " << ec.message() << std::endl;
return ec.value();
struct config {
using step = std::variant<mount_spec, copy_spec>;
std::list<step> recipe;
std::list<fs::path> cleanup;
char const * const * cmdline;
// Null coalescing helper
template <typename T>
T *nc(T *a, T *dflt) {
return a ? a : dflt;
std::list<std::string> strings_g;
auto parse_overlay_spec(std::string_view s, config &cfg) -> std::list<mount_spec::parse_error> {
auto parts = str_split(s, ',');
if (parts.size() < 1)
return {{"Incomplete overlay spec"}};
mount_spec mspec = {"overlay", "overlay", parts[0]};
struct {
std::string_view lowerdir;
std::string_view upperdir;
std::string_view workdir;
bool tmp = false, shadow = false;
std::string_view copy_from;
} x;
auto options = std::vector<std::string_view>{};
std::copy_if(parts.begin()+1, parts.end(), std::back_inserter(options), [&x](const auto &opt) {
if (opt.starts_with("lowerdir=")) {
x.lowerdir = opt;
} else if (opt.starts_with("upperdir=")) {
x.upperdir = opt;
} else if (opt.starts_with("workdir=")) {
x.workdir = opt;
} else if (opt.starts_with("copyfrom=")) {
x.copy_from = opt.substr(9);
} else if (opt == "tmp") {
x.tmp = true;
} else if (opt == "shadow") {
x.shadow = true;
} else {
return true;
return false;
static constexpr auto lowerdir_opt = "lowerdir="sv;
if (x.shadow) {
// lowerdir == mountpoint
auto& s = strings_g.emplace_back();
s.reserve(x.lowerdir.empty() ? lowerdir_opt.size() + mspec.mountpoint.size() : x.lowerdir.size() + mspec.mountpoint.size());
s = lowerdir_opt;
s += mspec.mountpoint;
if (!x.lowerdir.empty()) {
s += ":";
s += x.lowerdir.substr(lowerdir_opt.size());
x.lowerdir = s;
static constexpr auto upperdir_opt = "upperdir="sv;
static constexpr auto upperdir_name = "/upper"sv;
static constexpr auto workdir_opt = "workdir="sv;
static constexpr auto workdir_name = "/work"sv;
if (x.tmp) {
auto tmpdir = std::string{nc((const char*)getenv("TMPDIR"), "/tmp")};
if (!mkdtemp( {
return {{"Could not create temporary directory for 'tmp' overlay option"sv}};
auto& upperdir = strings_g.emplace_back();
upperdir.reserve(upperdir_opt.size() + tmpdir.size() + upperdir_name.size());
upperdir = upperdir_opt;
upperdir += tmpdir;
upperdir += upperdir_name;
x.upperdir = upperdir;
auto& workdir = strings_g.emplace_back();
workdir.reserve(workdir_opt.size() + tmpdir.size() + workdir_name.size());
workdir = workdir_opt;
workdir += tmpdir;
workdir += workdir_name;
x.workdir = workdir;
std::list<mount_spec::parse_error> errors;
if (x.lowerdir.empty()) {
errors.push_back({"Missing lowerdir option"sv});
} else {
if (x.upperdir.empty() != x.workdir.empty()) {
errors.push_back({"Must specify upperdir and workdir both or neither"sv});
} else if (!x.upperdir.empty()) {
if (!errors.empty()) {
return errors;
if (!x.copy_from.empty()) {
cfg.recipe.emplace_back(copy_spec{x.copy_from, x.upperdir.substr(upperdir_opt.size())});
return errors;
int main(int argc, char*const* argv) {
config cfg;
// Commandline parsing
constexpr auto argspec = "+ho:m:";
for (auto opt = ::getopt(argc, argv, argspec); opt != -1; opt = ::getopt(argc, argv, argspec)) {
if (opt == 'h' || opt == '?') {
return opt == '?' ? 1 : 0;
} else if (opt == 'o') {
auto err = parse_overlay_spec(::optarg, cfg);
if (!err.empty()) {
std::cerr << "Error parsing overlay spec: " << ::optarg << std::endl;
for (const auto &e : err) {
std::cerr << " " << e.msg << std::endl;
return 33;
} else if (opt == 'm') {
auto [spec, err] = mount_spec::parse(::optarg);
if (!err.empty()) {
std::cerr << "Error parsing mount spec: " << ::optarg << std::endl;
for (const auto &e : err) {
std::cerr << " " << e.msg << std::endl;
return 33;
} else {
cfg.cmdline = &argv[::optind];
if (!cfg.cmdline[0]) {
std::cerr << "Missing child commandline" << std::endl;
return 22;
// Unshare
uid_t uid = getuid();
gid_t gid = getgid();
auto [child, ret] = ns::clone::uvclone_single(uid, gid, [&cfg](){
// Execute recipe
for (auto &step : cfg.recipe) {
int res = 0;
std::visit([&res](auto &spec) {
res = spec.execute();
}, step);
if (res)
return res;
return ::execvp(cfg.cmdline[0], const_cast<char*const*>(cfg.cmdline));
}, 102400, CLONE_NEWNS);
if (ret)
return ret;
// free memory
// execute child
ret = child.wait();
std::ranges::for_each(cfg.cleanup, [](const auto& p) {fs::remove_all(p);});
return ret;

@ -0,0 +1,192 @@
// (c) 2020 Taeyeon Mori
#include "koutil.hpp"
#include "kofs.hpp"
#include "kofd.hpp"
#include "koos.hpp"
#include <cstdlib>
#include <sstream>
#include <iostream>
#include <filesystem>
#include <unistd.h>
#include <pwd.h>
#include <mntent.h>
struct params {
std::filesystem::path motd;
bool ro = true;
bool protect = true;
char *const *argv = nullptr;
void usage(const char *prog) {
std::cout << "Usage: " << prog << " [-m MOTD] [ARGV...]" << std::endl
<< std::endl
<< "Options:" << std::endl
<< " -m MOTD Specify a file to be displayed on login" << std::endl
<< " ARGV Specify the shell executable and arguments" << std::endl
<< " By default, the shell from /etc/passwd is used with argument -l" << std::endl
params parse_args(int argc, char **argv) {
params p{};
constexpr auto spec = "+hm:";
while (true) {
auto opt = getopt(argc, const_cast<char *const *>(argv), spec);
if (opt == -1)
else if (opt == '?' || opt == 'h') {
exit(opt == 'h' ? 0 : 1);
} else if (opt == 'm') {
p.motd = ::optarg;
if (argc > ::optind)
p.argv = const_cast<char *const *>(&argv[::optind]);
return p;
// Helpers
int xerror(const char *desc) {
return -errno;
[[noreturn]] void die(int r, const char *msg) {
std::cerr << msg << std::endl;
[[noreturn]] void die_errno(int r, const char *msg) {
struct mntent_context {
FILE *mounts;
mntent_context(char const *fname) {
mounts = setmntent(fname, "r");
operator bool() {
return mounts != nullptr;
~mntent_context() {
if (mounts != nullptr)
mntent *next() {
return getmntent(mounts);
int ro_all_mounts() {
// Change all current mounts to readonly
auto mounts = mntent_context("/proc/mounts");
if (!mounts)
return 1;
mntent *ent;
while ((ent = != nullptr) {
if (ko::os::bind(ent->mnt_dir, ent->mnt_dir, MS_REMOUNT|MS_RDONLY))
return 1;
return 0;
int protect_self() {
// Hide self by by bind-mounting /dev/zero on top. Make it harder to exploit any vulns, just in case
// Though this does give away the name of the executable...
auto path = ko::fd::readlink("/proc/self/exe");
if (path.empty())
return 1;
return ko::os::bind("/dev/null", path);
int main(int argc, char **argv) {
params p = parse_args(argc, argv);
uid_t ruid, euid, suid;
gid_t rgid, egid, sgid;
passwd *passwd;
std::string options;
const char *default_shell_argv[] = {nullptr, "-l", nullptr}; // gets executable name from user passwd record
// Use shell from passwd if no command is given in argv
if (p.argv == nullptr)
p.argv = const_cast<char *const*>(default_shell_argv);
auto [e, eloc] = ko::util::cvshort()
.then("getresuid", getresuid, &ruid, &euid, &suid)
.then("getresgid", getresgid, &rgid, &egid, &sgid)
.then("getpwuid", [ruid,euid,&passwd]() {
// Check root perms
if (euid != 0)
die(3, "Must be suid root");
// Retrieve user info
errno = 0;
passwd = getpwuid(ruid);
if (errno != 0)
return 5;
else if (passwd == nullptr)
die(4, "Calling user ID not known to system");
return 0;
.then("setegid", ::setegid, 0)
.then("unshare", ::unshare, CLONE_NEWNS)
.then("make ns slave", ko::os::mount, "", "/", "", MS_REC|MS_SLAVE, nullptr)
.ifthen("make fs readonly",, ro_all_mounts)
.ifthen("protect self", p.protect, protect_self)
.then("mount tmp", ko::os::mount, "tmpfs", "/tmp", "tmpfs", MS_NOEXEC|MS_NODEV|MS_NOSUID, nullptr)
.then([ruid,rgid,suid,&options,passwd,&default_shell_argv]() -> ko::util::cvresult {
// Create directories
auto d = ko::fd::opendir("/tmp");
auto r = ko::util::cvshort()
.then("fchown tmp", ::fchown, (int)d, ruid, rgid)
.then("setegid", ::setegid, rgid)
.then("seteuid", ::seteuid, ruid)
.then("mkdir .home", ko::fd::mkdir, ".home", 0750, (int)d)
.then("mkdir work", ko::fd::mkdir, ".home/work", 0750, (int)d)
.then("mkdir top", ko::fd::mkdir, ".home/top", 0750, (int)d)
.then("seteuid root", ::seteuid, suid);
if (r) {
// Build option string
options = ko::util::str("lowerdir=", passwd->pw_dir, ",upperdir=/tmp/.home/top,workdir=/tmp/.home/work");
// Use shell from passwd
default_shell_argv[0] = passwd->pw_shell;
return r;
.then("mount overlay", ko::os::mount, "overlay", passwd->pw_dir, "overlay", 0, (void*)options.c_str())
.ifthen("show motd", !p.motd.empty(), [&p]() {
auto f = ko::fd::open(p.motd, O_RDONLY);
if (!f)
return 1;
struct stat st;
if (::fstat(f, &st))
return 1;
return ko::fd::fcopy(f, STDOUT_FILENO, st.st_size) ? 0 : 1;
.then("chdir home", ::chdir, passwd->pw_dir)
.then("drop gid", ::setresgid, rgid, rgid, rgid)
.then("drop uid", ::setresuid, ruid, ruid, ruid)
.then("exec", ::execvp, p.argv[0], p.argv);
return e;

@ -0,0 +1,591 @@
// Isolate steam in a namespace
// (c) 2019 Taeyeon mori <taeyeon at>
#include "kons_clone.hpp"
#include <cstdlib>
#include <sstream>
#include <iostream>
#include <fstream>
#include <filesystem>
#include <unistd.h>
#include <sys/signalfd.h>
namespace fs = std::filesystem;
constexpr auto ROOT_DIR = ".local/steam";
constexpr auto DEFAULT_CMD = (const char*[]){"/bin/bash", nullptr};
constexpr auto STEAM_USER = "steamuser";
// Helpers
int xerror(const char *desc) {
return -errno;
[[noreturn]] void die(int r, const char *msg) {
std::cerr << msg << std::endl;
[[noreturn]] void die_errno(int r, const char *msg) {
// Report to parent process
template <typename T>
class proc_future {
sem_t ready;
T value;
proc_future(int shared) {
sem_init(&ready, shared, 0);
~proc_future() {
T wait() {
return value;
void post(const T &v) {
value = v;
static proc_future<T> *create() {
auto shm = mmap(nullptr, sizeof(proc_future<T>), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
if (shm == MAP_FAILED)
return nullptr;
return new (shm) proc_future<T>(1);
// in shared VM, unmap() must be skipped
static proc_future<T> *create_private() {
auto shm = mmap(nullptr, sizeof(proc_future<T>), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (shm == MAP_FAILED)
return nullptr;
return new (shm) proc_future<T>(0);
// Waiting process must call destroy()
void destroy() {
munmap(this, sizeof(proc_future<T>));
// Posting process must call unmap() instead
void unmap() {
munmap(this, sizeof(proc_future<T>));
// ========================================================
// Namespace spawn process
// ========================================================
namespace nsproc {
struct config {
fs::path root_path, home_path, pwd;
char *const *exec_argv; // must be nullptr-terminated
uid_t uid, gid;
bool mounts, gui_mounts, system_ro, keep_root, dummy_mode, pid_ns;
std::optional<fs::path> setup_exec;
int ns_path_fd;
int pid1() {
sigset_t mask;
sigaddset(&mask, SIGCHLD);
if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1)
return xerror("sigprocmask");
int sfd = signalfd(-1, &mask, 0);
if (sfd == -1)
return xerror("signalfd");
fd_set fds;
struct timeval const tv = {.tv_sec = 60,.tv_usec = 0};
while (true) {
FD_SET(sfd, &fds);
struct timeval _tv = tv;
int retval = select(sfd + 1, &fds, NULL, NULL, &_tv);
if (retval < 0)
return xerror("select");
else if (retval) {
struct signalfd_siginfo si;
int s = read(sfd, &si, sizeof(si));
if (s != sizeof(si))
return xerror("signalfd_read");
if (si.ssi_signo != SIGCHLD) {
std::cerr << "Warn: Got signal != SIGCHLD" << std::endl;
// Reap children
while (true) {
pid_t w = ::waitpid(-1, NULL, WNOHANG);
if (w == -1) {
if (errno == ECHILD)
// Check if there are still processes in namespace
auto dir = ko::fs::dir_ptr("/proc");
int count = 0;
for (auto ent : dir) {
if (!isdigit(ent.d_name[0]))
if (count <= 1)
return 0;
int exec_app(const config &conf) {
if (conf.pwd.empty()) {
// Go to home is_directory
auto home = ko::os::get_home();
} else if (chdir(conf.pwd.c_str())) {
die_errno(50, "Could not preserve working directory (Maybe -k is required?)");
// Env.
setenv("TMPDIR", "/tmp", 1); // Any subfolders may not exist
setenv("PULSE_SERVER", ko::util::str("unix:/run/user/", conf.uid, "/pulse/native").c_str(), 0);
// Run provided setup cmd
if (conf.setup_exec) {
const char* argv[2] = {conf.setup_exec.value().c_str(), nullptr};
auto proc = ko::proc::simple_spawn(argv);
// Drop Permissions
setresgid(conf.gid, conf.gid, conf.gid);
setresuid(conf.uid, conf.uid, conf.uid);
// Exec
execvpe(conf.exec_argv[0], conf.exec_argv, environ);
return xerror("exec");
int nsproc_create(const config &conf, proc_future<int> *report) {
// Mount Namespace
if (conf.mounts) {
// Slightly hacky
auto run_media_path = ko::util::str("/run/media/", getenv("USER"));
auto [err, where] = ko::util::cvshort()
// Mount base system: /, /proc, /sys, /dev, /tmp, /run
.then(ko::ns::mount::mount_core, conf.root_path)
// Mount /usr readonly because file permissions are useless in a single-uid namespace
.ifthen(conf.system_ro && fs::exists(conf.root_path / "usr"),
ko::ns::mount::protect_path, conf.root_path / "usr")
.ifthen(conf.system_ro && fs::exists(conf.root_path / "etc"),
ko::ns::mount::protect_path, conf.root_path / "etc")
// Recursively bind in /media and /run/media/$USER for games
.ifthen("bind_media", fs::exists("/media") && fs::exists(conf.root_path / "media"),
ko::os::bind, "/media", conf.root_path / "media", MS_REC)
.ifthen("bind_run_media", fs::exists(run_media_path), [&conf, &run_media_path] () {
auto target_path = conf.root_path / "run/media" / STEAM_USER;
std::error_code ec;
fs::create_directories(target_path, ec);
if (ec)
return 1;
return ko::os::bind(run_media_path, target_path, MS_REC);
// Mount different things required by gui apps
ko::ns::mount::mount_gui, conf.root_path, conf.home_path.relative_path(), ko::util::str("run/user/", conf.uid))
// Add a dummy user to /etc/passwd
.then("bind_passwd", [&conf]() {
auto etc_passwd = conf.root_path / "etc/passwd";
auto tmp_passwd = conf.root_path / "tmp/passwd";
if (fs::exists(etc_passwd)) {
fs::copy(etc_passwd, tmp_passwd);
auto s = std::fstream(tmp_passwd, std::fstream::out | std::fstream::app);
s << std::endl << STEAM_USER << ":x:" << conf.uid << ":" << conf.gid << ":Steam Container User:" << conf.home_path.native() << ":/bin/bash" << std::endl;
return ko::os::bind(tmp_passwd, etc_passwd);
return 0;
// Finally, pivot_root
.then(ko::ns::mount::pivot_root, conf.root_path, "mnt", conf.keep_root);
if (err) {
if (report) report->post(1);
errno = err;
return xerror(where);
if (report) report->post(0);
// Run Application
if (!conf.dummy_mode)
return exec_app(conf);
return pid1();
// Joining Existing
// Must associate pid namespace in parent process first!
int nsproc_join_parent(const config &conf) {
auto [err, where] = ko::util::cvshort()
.then("setns_p_user", ko::ns::setns, "user", CLONE_NEWUSER, conf.ns_path_fd)
.then("setns_p_pid", ko::ns::setns, "pid", CLONE_NEWPID, conf.ns_path_fd);
if (err)
return xerror(where);
return 0;
int nsproc_join_child(const config &conf) {
auto [err, where] = ko::util::cvshort()
//.then("setns_c_user", ko::ns::setns, "user", CLONE_NEWUSER, conf.ns_path_fd)
.then("setns_c_mnt", ko::ns::setns, "mnt", CLONE_NEWNS, conf.ns_path_fd);
if (err)
return xerror(where);
return exec_app(conf);
// ========================================================
// Main
// ========================================================
void usage(const char *prog) {
std::cout << "Usage:" << std::endl
<< " " << prog << " -h" << std::endl
<< " " << prog << " [-rMGk] [-p <path>] [-e <path>] [--] <argv...>" << std::endl
<< " " << prog << " -c <path> [-MGk] [-p <path>] [-e <path>] [--] <argv...>" << std::endl
<< " " << prog << " -j <path> [-e <path>] [--] <argv...>" << std::endl
<< std::endl
<< "General Options:" << std::endl
<< " -h Display this help text" << std::endl
<< std::endl
<< "Namespace Sharing Options:" << std::endl
<< " -c <path> Create joinable namespace" << std::endl
<< " -j <path> Join namespaces identified by path" << std::endl
<< "Note: Passing the single-character '-' will use '$root_path/.namespace'" << std::endl
<< std::endl
<< "Namespace Joining Options:" << std::endl
<< " -p <path> The path to use for '-j-'" << std::endl
<< " -D Automatically spawn a instance of '" << prog << " -Dc'" << std::endl
<< " into the background if the ns path doesn't exist." << std::endl
<< "Note: -D can be combined with most options from the NS Creation section below" << std::endl
<< " but those options are ignored unless the ns must be created" << std::endl
<< std::endl
<< "Namespace Creation Options:" << std::endl
<< " -r Run in fakeroot mode (implies -W)" << std::endl
<< " -p <path> Use custom root path" << std::endl
<< " -M Don't set up mouts (implies -G)" << std::endl
<< " -G Don't set up GUI-related mounts" << std::endl
<< " -W Don't make system paths read-only (/usr, /etc)" << std::endl
<< " -k Keep the original root filesystem at /mnt" << std::endl
<< " -w Preserve working directory (may require -k)" << std::endl
<< " -e <path> Exceute a file during namespace setup" << std::endl
<< " -D Don't run any program, but idle to keep the namespace active." << std::endl
<< " This also takes care of reaping Zombies if it is PID 1." << std::endl;
struct config {
fs::path root_path;
const char *const *exec_argv = DEFAULT_CMD;
bool fakeroot = false,
mounts = true,
gui_mounts = true,
keep_root = false,
keep_pwd = false,
dummy_mode = false,
pid_ns = true,
ns_create = false,
system_ro = true;
std::optional<fs::path> ns_path,
// Parse commandline arguments
// returns -1 on success, exit code otherwise
int parse_cmdline(config &conf, int argc, const char *const *argv) {
constexpr auto spec = "+hp:rkwWMGe:c:j:D";
bool custom_root_path = false;
std::optional<fs::path> create_path, join_path;
while (true) {
auto opt = getopt(argc, const_cast<char *const *>(argv), spec);
if (opt == -1)
else if (opt == '?' || opt == 'h') {
return opt == 'h' ? 0 : 1;
else if (opt == 'r') {
conf.fakeroot = true;
conf.system_ro = false;
else if (opt == 'p') {
conf.root_path = ::optarg;
custom_root_path = true;
else if (opt == 'M')
conf.mounts = false;
else if (opt == 'G')
conf.gui_mounts = false;
else if (opt == 'W')
conf.system_ro = false;
else if (opt == 'k')
conf.keep_root = true;
else if (opt == 'w')
conf.keep_pwd = true;
else if (opt == 'e')
conf.ns_setup_exec = ::optarg;
else if (opt == 'c')
create_path = ::optarg;
else if (opt == 'j')
join_path = ::optarg;
else if (opt == 'D')
conf.dummy_mode = true;
// Check sanity
bool good = true;
if (join_path) {
if (create_path) {
std::cerr << "Error: -c and -j cannot be combined" << std::endl;
good = false;
// NOTE: let -p slip by to facilitate '-p<path> -j-' use-case
if (!conf.dummy_mode && (!conf.mounts || !conf.gui_mounts || conf.keep_root)) {
std::cerr << "Error: -j cannot be combined with any namespace setup options (-MGk) unless -D is given" << std::endl;
good = false;
conf.ns_path = join_path;
if (create_path) {
conf.ns_path = create_path;
conf.ns_create = true;
if (conf.ns_path) {
// This is somewhat arbitrary but should prevent accidentally entering a fakeroot ns using -j
if (conf.fakeroot) {
std::cerr << "Error: -r cannot be combined with -c or -j" << std::endl;
good = false;
// - Special default in -j and -c
if (*conf.ns_path == "-")
conf.ns_path = conf.root_path / ".namespace";
} else if (conf.dummy_mode) {
std::cerr << "Error: -D must be combined with -c or -j" << std::endl;
good = false;
if (!good) {
return 5;
// Rest is child cmnd
if (argc > ::optind)
conf.exec_argv = &argv[::optind];
return -1;
fs::path transpose_prefix(const fs::path &p, const fs::path &prefix, const fs::path &replace) {
static const auto up = fs::path{".."};
auto rel = fs::relative(p, prefix);
for (auto &c : rel) {
if (c == up)
return {};
return replace / rel;
fs::path convert_path(const config &conf, const fs::path &p) {
static const auto mounts = std::array<std::pair<fs::path,fs::path>, 2>{
std::pair{conf.root_path, "/"},
std::pair{"/media", "/media"}
for (auto &pr : mounts) {
auto res = transpose_prefix(p, pr.first, pr.second);
if (!res.empty())
return res;
return fs::path{"/mnt"} / p;
int main(int argc, char **argv) {
auto home = ko::os::get_home();
auto uid = getuid();
auto gid = getgid();
// Set defaults
auto conf = config{
.root_path = home / ROOT_DIR,
// Parse commandline
auto perr = parse_cmdline(conf, argc, argv);
if (perr != -1)
return perr;
// FIXME should lock something. Not sure what though. this can currently race
if (conf.ns_path) {
auto st = fs::symlink_status(*conf.ns_path);
if (fs::exists(st)) {
if (conf.ns_create) {
std::cerr << "Error: File exists: " << *conf.ns_path << std::endl;
return -EEXIST;
} else {
auto tgt = fs::status(*conf.ns_path);
if (!fs::exists(tgt)) {
std::cerr << "Warning: Cleaning up stale ns link " << *conf.ns_path << " to " << fs::read_symlink(*conf.ns_path) << std::endl;
} else if (!conf.ns_create && !conf.dummy_mode) {
std::cerr << "Error: No such file: " << *conf.ns_path << std::endl;
return -ENOENT;
// Auto-create dummy instance
auto parent_future = [&conf]() ->proc_future<int>* {
if (!conf.ns_create && conf.dummy_mode && !fs::exists(*conf.ns_path)) {
// Fork twice while communicating child pid
auto f = proc_future<int>::create();
if (!f)
die_errno(31, "Could not allocate future for dummy process");
auto vpid = ::vfork();
if (vpid < 0)
die_errno(32, "Could not spawn dummy process (-Dc)");
else if (vpid == 0) {
auto pid = ::fork();
if (pid < 0)
else if (pid > 0)
// Daemon process here
// Switch to creation mode
conf.ns_create = true;
return f;
} else {
// Parent process here
// check if second fork failed
int st = 0;
waitpid(vpid, &st, 0);
if (WEXITSTATUS(st) != 0)
die(33, "Could not spawn dummy process (-Dc); double fork failed");
// Wait for ns creation
auto pid = f->wait();
if (pid < 0)
die(34, "Could not spawn dummy process (-Dc); reported failure");
conf.ns_path = ko::util::str("/proc/", pid, "/ns");
return nullptr;
ko::fd::fd ns_path_fd = conf.ns_path ? ko::fd::opendir(*conf.ns_path) : ko::fd::fd(-1);
// Create nsproc config
auto nsconf = nsproc::config{
.root_path = conf.root_path,
.home_path = home,
.pwd = conf.keep_pwd ? convert_path(conf, fs::current_path()) : fs::path{},
.exec_argv = const_cast<char *const *>(conf.exec_argv),
.uid = conf.fakeroot ? 0 : uid,
.gid = conf.fakeroot ? 0 : gid,
.mounts = conf.mounts,
.gui_mounts = conf.gui_mounts,
.system_ro = conf.system_ro,
.keep_root = conf.keep_root,
.dummy_mode = conf.dummy_mode,
.pid_ns = conf.pid_ns,
.setup_exec = conf.ns_setup_exec,
.ns_path_fd = ns_path_fd
constexpr auto stacksize = 1024*1024;
// clone
auto ns_future = conf.ns_create ? proc_future<int>::create_private() : nullptr;
auto [proc, res] = conf.ns_path && !conf.ns_create ?
[&nsconf]() -> std::pair<ko::proc::child_ref, int> {
int e = nsproc::nsproc_join_parent(nsconf);
if (e) return {-1, e};
auto child = ko::proc::vclone(nsproc::nsproc_join_child, stacksize, 0, nsconf);
return {std::move(child), 0};
}() :
ko::ns::clone::uvclone_single(nsconf.uid, nsconf.gid, nsproc::nsproc_create, stacksize, CLONE_NEWNS|CLONE_NEWPID, nsconf, ns_future);
if (proc) {
// Child should handle signals and then return
static int _pid =;
signal(SIGINT, SIG_IGN); // assume sent to whole session
signal(SIGTERM, [](int sig){
kill(_pid, sig);
// Create ns_reference
if (conf.ns_create) {
if (ns_future) {
// TODO consider the return value?
// TODO move this out so it's independent of ns_create. But create_symlink can throw which would
// lead to a locked-up parent.
if (parent_future) {
fs::create_directory_symlink(ko::util::str("/proc/",, "/ns"), *conf.ns_path);
// Wait for child
res = proc.wait();
// Clean up ns path
if (conf.ns_create)
return res;
} else {
if (parent_future)

@ -0,0 +1,11 @@
"folders": [
"path": "."
"settings": {
"C_Cpp.default.cppStandard": "c++20",
"C_Cpp.default.cStandard": "c17"