Compare commits

...

8 commits

Author SHA1 Message Date
Gavin Hayes 9c12b053e2
Merge ecd3af1ac2 into 130fd66f9e 2024-05-03 12:51:48 -04:00
Justine Tunney 130fd66f9e
Release Cosmopolitan v3.3.4 2024-05-03 09:25:02 -07:00
Justine Tunney 8a44f913ae
Delete flaky tests
Signals are extremely difficult to unit test reliably. This is why
functions like sigsuspend() exist. When testing something else and
portably it becomes impossible without access to kernel internals.

OpenMP flakes in QEMU on one of my workstations. I don't think the
support is production worthy, because there's been issues on MacOS
additionally. It works great for every experiment I've used it for
though. However a flaky test is worse than no test at all. So it's
removed until someone takes an interest in productionizing it.
2024-05-03 09:11:04 -07:00
Gautham 5488f0b2ca
Remove zlib namespacing (#1142)
We have an optimized version of zlib from the Chromium project.
We need it for a lot of our libc services. It would be nice to export
this to user applications if we can, since projects like llamafile are
already depending on it under the private namespace, to avoid
needing to link zlib twice.
2024-05-03 08:07:25 -07:00
Gavin Hayes deff138e7e
recvfrom: don't convert address if addrsize is 0 (#1153) 2024-05-03 08:03:57 -07:00
Gavin Hayes b6e40a3a58
Add /dev/(u)random on NT (#1163) 2024-05-03 07:59:51 -07:00
Cadence Ember 8f6bc9dabc
Let signals interrupt fgets unless SA_RESTART set (#1152) 2024-05-03 07:49:41 -07:00
Gavin Hayes ecd3af1ac2 fix nt accept not initializing with SO_UPDATE_ACCEPT_CONTEXT 2024-05-03 01:48:18 -04:00
32 changed files with 179 additions and 532 deletions

View file

@ -332,7 +332,8 @@ textwindows int sys_fcntl_nt(int fd, int cmd, uintptr_t arg) {
if (__isfdkind(fd, kFdFile) || //
__isfdkind(fd, kFdSocket) || //
__isfdkind(fd, kFdConsole) || //
__isfdkind(fd, kFdDevNull)) {
__isfdkind(fd, kFdDevNull) || //
__isfdkind(fd, kFdDevRandom)) {
if (cmd == F_GETFL) {
rc = g_fds.p[fd].flags & (O_ACCMODE | _O_APPEND | _O_DIRECT |
_O_NONBLOCK | _O_RANDOM | _O_SEQUENTIAL);

View file

@ -104,6 +104,7 @@ textwindows int sys_fstat_nt(int fd, struct stat *st) {
return ebadf();
case kFdConsole:
case kFdDevNull:
case kFdDevRandom:
return sys_fstat_nt_special(g_fds.p[fd].kind, st);
case kFdSocket:
return sys_fstat_nt_socket(g_fds.p[fd].kind, st);

View file

@ -59,6 +59,8 @@ textwindows int sys_fstatat_nt(int dirfd, const char *path, struct stat *st,
return sys_fstat_nt_special(kFdConsole, st);
} else if (!strcmp(path + 5, "null")) {
return sys_fstat_nt_special(kFdDevNull, st);
} else if (!strcmp(path + 5, "random") || !strcmp(path + 5, "urandom")) {
return sys_fstat_nt_special(kFdDevRandom, st);
} else if (!strcmp(path + 5, "stdin")) {
return sys_fstat_nt(STDIN_FILENO, st);
} else if (!strcmp(path + 5, "stdout")) {

View file

@ -24,7 +24,6 @@
#include "libc/calls/termios.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/serialize.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/strace.internal.h"
#include "libc/intrin/weaken.h"
@ -42,6 +41,7 @@
#include "libc/nt/winsock.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/serialize.h"
#include "libc/sock/internal.h"
#include "libc/sock/struct/ifconf.h"
#include "libc/sock/struct/ifreq.h"
@ -66,7 +66,7 @@ static struct HostAdapterInfoNode {
struct sockaddr netmask;
struct sockaddr broadcast;
short flags;
} *__hostInfo;
} * __hostInfo;
static int ioctl_default(int fd, unsigned long request, void *arg) {
int rc;
@ -107,8 +107,9 @@ static int ioctl_fionread(int fd, uint32_t *arg) {
*arg = MAX(0, bytes);
return 0;
} else if (g_fds.p[fd].kind == kFdDevNull) {
*arg = 1;
return 0;
return enotty();
} else if (g_fds.p[fd].kind == kFdDevRandom) {
return einval();
} else if (GetFileType(handle) == kNtFileTypePipe) {
uint32_t avail;
if (PeekNamedPipe(handle, 0, 0, 0, &avail, 0)) {

View file

@ -55,6 +55,7 @@ bool32 ischardev(int fd) {
}
} else {
return __isfdkind(fd, kFdConsole) || __isfdkind(fd, kFdDevNull) ||
__isfdkind(fd, kFdDevRandom) ||
(__isfdkind(fd, kFdFile) &&
GetFileType(g_fds.p[fd].handle) == kNtFileTypeChar);
}

View file

@ -62,7 +62,7 @@ static textwindows int64_t Seek(struct Fd *f, int64_t offset, int whence) {
}
textwindows int64_t sys_lseek_nt(int fd, int64_t offset, int whence) {
if (__isfdkind(fd, kFdDevNull)) {
if (__isfdkind(fd, kFdDevNull) || __isfdkind(fd, kFdDevRandom)) {
return offset;
} else if (__isfdkind(fd, kFdFile)) {
struct Fd *f = g_fds.p + fd;

View file

@ -159,6 +159,15 @@ static textwindows int sys_open_nt_special(int fd, int flags, int mode,
return fd;
}
static textwindows int sys_open_nt_no_handle(int fd, int flags, int mode,
int kind) {
g_fds.p[fd].kind = kind;
g_fds.p[fd].mode = mode;
g_fds.p[fd].flags = flags;
g_fds.p[fd].handle = -1;
return fd;
}
static textwindows int sys_open_nt_dup(int fd, int flags, int mode, int oldfd) {
int64_t handle;
if (!__isfdopen(oldfd)) {
@ -211,6 +220,8 @@ textwindows int sys_open_nt(int dirfd, const char *file, uint32_t flags,
rc = sys_open_nt_special(fd, flags, mode, kFdConsole, u"CONIN$");
} else if (!strcmp(file + 5, "null")) {
rc = sys_open_nt_special(fd, flags, mode, kFdDevNull, u"NUL");
} else if (!strcmp(file + 5, "urandom") || !strcmp(file + 5, "random")) {
rc = sys_open_nt_no_handle(fd, flags, mode, kFdDevRandom);
} else if (!strcmp(file + 5, "stdin")) {
rc = sys_open_nt_dup(fd, flags, mode, STDIN_FILENO);
} else if (!strcmp(file + 5, "stdout")) {

View file

@ -72,7 +72,8 @@ ssize_t pread(int fd, void *buf, size_t size, int64_t offset) {
rc = sys_pread(fd, buf, size, offset, offset);
} else if (__isfdkind(fd, kFdSocket)) {
rc = espipe();
} else if (__isfdkind(fd, kFdFile) || __isfdkind(fd, kFdDevNull)) {
} else if (__isfdkind(fd, kFdFile) || __isfdkind(fd, kFdDevNull) ||
__isfdkind(fd, kFdDevRandom)) {
rc = sys_read_nt(fd, (struct iovec[]){{buf, size}}, 1, offset);
} else {
rc = ebadf();

View file

@ -40,6 +40,8 @@ static const char *__fdkind2str(int x) {
return "kFdZip";
case kFdEpoll:
return "kFdEpoll";
case kFdDevRandom:
return "kFdRandom";
default:
return "kFdWut";
}

View file

@ -65,7 +65,8 @@ ssize_t pwrite(int fd, const void *buf, size_t size, int64_t offset) {
rc = sys_pwrite(fd, buf, size, offset, offset);
} else if (__isfdkind(fd, kFdSocket)) {
rc = espipe();
} else if (__isfdkind(fd, kFdFile) || __isfdkind(fd, kFdDevNull)) {
} else if (__isfdkind(fd, kFdFile) || __isfdkind(fd, kFdDevNull) ||
__isfdkind(fd, kFdDevRandom)) {
rc = sys_write_nt(fd, (struct iovec[]){{(void *)buf, size}}, 1, offset);
} else {
return ebadf();

View file

@ -786,6 +786,10 @@ textwindows ssize_t ReadBuffer(int fd, void *data, size_t size, int64_t offset,
return 0;
}
if (f->kind == kFdDevRandom) {
return ProcessPrng(data, size) ? size : __winerr();
}
if (f->kind == kFdConsole) {
return ReadFromConsole(f, data, size, waitmask);
}

View file

@ -29,6 +29,7 @@ textwindows ssize_t sys_readv_nt(int fd, const struct iovec *iov, int iovlen) {
case kFdFile:
case kFdConsole:
case kFdDevNull:
case kFdDevRandom:
return sys_read_nt(fd, iov, iovlen, -1);
case kFdSocket:
return _weaken(sys_recv_nt)(fd, iov, iovlen, 0);

View file

@ -61,7 +61,7 @@ sys_readwrite_nt(int fd, void *data, size_t size, ssize_t offset,
bool pwriting = offset != -1;
bool seekable =
(f->kind == kFdFile && GetFileType(handle) == kNtFileTypeDisk) ||
f->kind == kFdDevNull;
f->kind == kFdDevNull || f->kind == kFdDevRandom;
if (pwriting && !seekable) {
return espipe();
}

View file

@ -2,15 +2,16 @@
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_FD_INTERNAL_H_
COSMOPOLITAN_C_START_
#define kFdEmpty 0
#define kFdFile 1
#define kFdSocket 2
#define kFdConsole 4
#define kFdSerial 5
#define kFdZip 6
#define kFdEpoll 7
#define kFdReserved 8
#define kFdDevNull 9
#define kFdEmpty 0
#define kFdFile 1
#define kFdSocket 2
#define kFdConsole 4
#define kFdSerial 5
#define kFdZip 6
#define kFdEpoll 7
#define kFdReserved 8
#define kFdDevNull 9
#define kFdDevRandom 10
struct Fd {
char kind;

View file

@ -52,6 +52,11 @@ static textwindows ssize_t sys_write_nt_impl(int fd, void *data, size_t size,
struct Fd *f = g_fds.p + fd;
bool isconsole = f->kind == kFdConsole;
// not implemented, XNU returns eperm();
if (f->kind == kFdDevRandom) {
return eperm();
}
// determine win32 handle for writing
int64_t handle = f->handle;
if (isconsole && _weaken(GetConsoleOutputHandle)) {

View file

@ -28,6 +28,7 @@ textwindows ssize_t sys_writev_nt(int fd, const struct iovec *iov, int iovlen) {
case kFdFile:
case kFdConsole:
case kFdDevNull:
case kFdDevRandom:
return sys_write_nt(fd, iov, iovlen, -1);
case kFdSocket:
return _weaken(sys_send_nt)(fd, iov, iovlen, 0);

View file

@ -4,7 +4,7 @@
#define __COSMOPOLITAN_MAJOR__ 3
#define __COSMOPOLITAN_MINOR__ 3
#define __COSMOPOLITAN_PATCH__ 3
#define __COSMOPOLITAN_PATCH__ 4
#define __COSMOPOLITAN__ \
(100000000 * __COSMOPOLITAN_MAJOR__ + 1000000 * __COSMOPOLITAN_MINOR__ + \
__COSMOPOLITAN_PATCH__)

View file

@ -87,10 +87,6 @@ static int sys_accept_nt_start(int64_t handle, struct NtOverlapped *overlap,
if (g_acceptex.lpAcceptEx(args->listensock, handle, args->buffer, 0,
sizeof(args->buffer->local),
sizeof(args->buffer->remote), 0, overlap)) {
// inherit properties of listening socket
unassert(!__imp_setsockopt(args->listensock, SOL_SOCKET,
kNtSoUpdateAcceptContext, &handle,
sizeof(handle)));
return 0;
} else {
return -1;
@ -123,6 +119,12 @@ textwindows int sys_accept_nt(struct Fd *f, struct sockaddr_storage *addr,
goto Finish;
}
// inherit properties of listening socket
// errors ignored as if f->handle was created before forking
// this fails with WSAENOTSOCK
__imp_setsockopt(resources.handle, SOL_SOCKET, kNtSoUpdateAcceptContext,
&f->handle, sizeof(f->handle));
// create file descriptor for new socket
// don't inherit the file open mode bits
int oflags = 0;

View file

@ -70,6 +70,9 @@ ssize_t recvfrom(int fd, void *buf, size_t size, int flags,
if (__isfdkind(fd, kFdSocket)) {
rc = sys_recvfrom_nt(fd, (struct iovec[]){{buf, size}}, 1, flags, &addr,
&addrsize);
if (rc != -1 && addrsize == sizeof(addr)) {
addrsize = 0;
}
} else if (__isfdkind(fd, kFdFile) && !opt_out_srcaddr) { /* socketpair */
if (!flags) {
rc = sys_read_nt(fd, (struct iovec[]){{buf, size}}, 1, -1);
@ -84,10 +87,14 @@ ssize_t recvfrom(int fd, void *buf, size_t size, int flags,
}
if (rc != -1) {
if (IsBsd()) {
__convert_bsd_to_sockaddr(&addr);
if (addrsize) {
if (IsBsd()) {
__convert_bsd_to_sockaddr(&addr);
}
__write_sockaddr(&addr, opt_out_srcaddr, opt_inout_srcaddrsize);
} else {
*opt_inout_srcaddrsize = 0;
}
__write_sockaddr(&addr, opt_out_srcaddr, opt_inout_srcaddrsize);
}
END_CANCELATION_POINT;

View file

@ -57,11 +57,7 @@ char *fgets_unlocked(char *s, int size, FILE *f) {
break;
} else {
if ((c = fgetc_unlocked(f)) == -1) {
if (ferror_unlocked(f) == EINTR) {
continue;
} else {
break;
}
break;
}
*p++ = c & 255;
if (c == '\n')

View file

@ -20,15 +20,19 @@
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/nt/files.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/o.h"
#include "libc/testlib/testlib.h"
int pipefd[2];
int stdoutBack;
int allowMask;
void SetUpOnce(void) {
testlib_enable_tmp_setup_teardown();
// qemu-aarch64 defines o_largefile wrong
allowMask = ~(O_LARGEFILE | 0x00008000);
}
void CaptureStdout(void) {
@ -46,8 +50,7 @@ void RestoreStdout(void) {
TEST(specialfile, devNull) {
ASSERT_SYS(0, 3, creat("/dev/null", 0644));
// qemu-aarch64 defines o_largefile wrong
ASSERT_EQ(O_WRONLY, fcntl(3, F_GETFL) & ~(O_LARGEFILE | 0x00008000));
ASSERT_EQ(O_WRONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(0, 2, write(3, "hi", 2));
ASSERT_SYS(0, 2, pwrite(3, "hi", 2, 0));
ASSERT_SYS(0, 2, pwrite(3, "hi", 2, 2));
@ -64,12 +67,51 @@ TEST(specialfile, devNull) {
TEST(specialfile, devNullRead) {
char buf[8] = {0};
ASSERT_SYS(0, 3, open("/dev/null", O_RDONLY));
// qemu-aarch64 defines o_largefile wrong
ASSERT_EQ(O_RDONLY, fcntl(3, F_GETFL) & ~(O_LARGEFILE | 0x00008000));
ASSERT_EQ(O_RDONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(0, 0, read(3, buf, 8));
ASSERT_SYS(0, 0, close(3));
}
TEST(specialfile, devRandomRead) {
char buf[8] = {0};
ASSERT_SYS(0, 3, open("/dev/random", O_RDONLY));
ASSERT_EQ(O_RDONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(0, 8, read(3, buf, 8));
ASSERT_NE(0, memcmp(buf, " ", 8));
ASSERT_SYS(0, 0, close(3));
}
TEST(specialfile, devUrandomRead) {
char buf[8] = {0};
ASSERT_SYS(0, 3, open("/dev/urandom", O_RDONLY));
ASSERT_EQ(O_RDONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(0, 8, read(3, buf, 8));
ASSERT_NE(0, memcmp(buf, " ", 8));
ASSERT_SYS(0, 0, close(3));
}
TEST(specialfile, devRandomWrite_fails_on_nt) {
if (!IsWindows()) {
return;
}
char buf[8] = {0};
ASSERT_SYS(0, 3, creat("/dev/random", 0644));
ASSERT_EQ(O_WRONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(EPERM, -1, write(3, buf, 8));
ASSERT_SYS(0, 0, close(3));
}
TEST(specialfile, devUrandomWrite_fails_on_nt) {
if (!IsWindows()) {
return;
}
char buf[8] = {0};
ASSERT_SYS(0, 3, creat("/dev/urandom", 0644));
ASSERT_EQ(O_WRONLY, fcntl(3, F_GETFL) & allowMask);
ASSERT_SYS(EPERM, -1, write(3, buf, 8));
ASSERT_SYS(0, 0, close(3));
}
TEST(specialfile, devStdout) {
char buf[8] = {8};
CaptureStdout();

View file

@ -33,9 +33,9 @@
__static_yoink("zipos");
__static_yoink("libc/testlib/hyperion.txt");
__static_yoink("_Cz_inflate");
__static_yoink("_Cz_inflateInit2");
__static_yoink("_Cz_inflateEnd");
__static_yoink("inflate");
__static_yoink("inflateInit2");
__static_yoink("inflateEnd");
void *Worker(void *arg) {
int i, fd;

View file

@ -25,6 +25,7 @@
#include "libc/runtime/syslib.internal.h"
#include "libc/sock/sock.h"
#include "libc/sock/struct/sockaddr.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/af.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/ipproto.h"
@ -107,7 +108,7 @@ TEST(O_NONBLOCK, canBeTunedWithFcntl_toMakeReadNonBlocking) {
PARENT();
EXPECT_SYS(0, 0, close(3));
ASSERT_SYS(0, 3, socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
ASSERT_SYS(0, O_RDWR, fcntl(3, F_GETFL));
ASSERT_SYS(0, O_RDWR, fcntl(3, F_GETFL) & O_ACCMODE); // QEMU O_LARGEFILE :(
ASSERT_SYS(0, 0, connect(3, (struct sockaddr *)&addr, sizeof(addr)));
ASSERT_SYS(0, 0, fcntl(3, F_SETFL, O_RDWR | O_NONBLOCK));
ASSERT_SYS(EAGAIN, -1, read(3, buf, 16));

View file

@ -93,3 +93,44 @@ TEST(recvfrom, test) {
EXPECT_SYS(0, 0, close(client1));
WAIT(exit, 0);
}
// server listens for connections, accepts a connection, and sends data
// client connects to server recieves with recvfrom and verifies addrsize
// is 0 as the sender info isn't available on connection sockets.
TEST(recvfrom, tcp) {
uint32_t addrsize = sizeof(struct sockaddr_in);
struct sockaddr_in server = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(0x7f000001),
};
ASSERT_SYS(0, 3, socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
ASSERT_SYS(0, 0, bind(3, (struct sockaddr *)&server, sizeof(server)));
ASSERT_SYS(0, 0, getsockname(3, (struct sockaddr *)&server, &addrsize));
ASSERT_SYS(0, 0, listen(3, 5));
////////////////////////////////////////////////////////////////////////////////
SPAWN(fork);
struct sockaddr_in data, addr;
uint32_t addrsize = sizeof(struct sockaddr_in);
EXPECT_SYS(0, 0, close(3));
ASSERT_SYS(0, 3, socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
ASSERT_SYS(0, 0, connect(3, (struct sockaddr *)&server, sizeof(server)));
ASSERT_SYS(
0, sizeof(data),
recvfrom(3, &data, sizeof(data), 0, (struct sockaddr *)&addr, &addrsize));
ASSERT_EQ(0, addrsize);
EXPECT_SYS(0, 0, close(3));
////////////////////////////////////////////////////////////////////////////////
PARENT();
int client;
struct sockaddr client_sockaddr;
uint32_t sockaddr_size = sizeof(client_sockaddr);
ASSERT_NE(-1, (client = accept(3, &client_sockaddr, &sockaddr_size)));
ASSERT_SYS(0, sizeof(client_sockaddr),
sendto(client, &client_sockaddr, sizeof(client_sockaddr), 0,
(struct sockaddr *)&server, sizeof(server)));
EXPECT_SYS(0, 0, close(client));
WAIT(exit, 0);
EXPECT_SYS(0, 0, close(3));
}

View file

@ -152,8 +152,6 @@ __attribute__((__constructor__)) static void StdioPro(int argc, char *argv[]) {
}
TEST(socket, canBeUsedAsExecutedStdio) {
if (IsWindows())
return; // TODO(jart): What broke this?
char buf[16] = {0};
const char *prog;
uint32_t addrsize = sizeof(struct sockaddr_in);

View file

@ -1,476 +0,0 @@
/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
vi: set et ft=cpp ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include <algorithm>
#include <atomic>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <ctime>
#include "libc/stdio/rand.h"
#define PRECISION 2e-6
#define LV1DCACHE 49152
#define THRESHOLD 3000000
#if defined(__OPTIMIZE__) && !defined(__SANITIZE_ADDRESS__)
#define ITERATIONS 5
#else
#define ITERATIONS 1
#endif
#define OPTIMIZED __attribute__((__optimize__("-O3,-ffast-math")))
#define PORTABLE \
__target_clones("arch=znver4," \
"arch=znver3," \
"arch=sapphirerapids," \
"arch=alderlake," \
"arch=rocketlake," \
"arch=cooperlake," \
"arch=tigerlake," \
"arch=cascadelake," \
"arch=skylake-avx512," \
"arch=skylake," \
"arch=znver1," \
"arch=tremont," \
"fma," \
"avx")
static bool is_self_testing;
// m×n → n×m
template <typename TA, typename TB>
void transpose(long m, long n, const TA *A, long lda, TB *B, long ldb) {
#pragma omp parallel for collapse(2) if (m * n > THRESHOLD)
for (long i = 0; i < m; ++i)
for (long j = 0; j < n; ++j) {
B[ldb * j + i] = A[lda * i + j];
}
}
// m×k * k×n → m×n
// k×m * k×n → m×n if aᵀ
// m×k * n×k → m×n if bᵀ
// k×m * n×k → m×n if aᵀ and bᵀ
template <typename TC, typename TA, typename TB>
void dgemm(bool aᵀ, bool bᵀ, long m, long n, long k, float α, const TA *A,
long lda, const TB *B, long ldb, float β, TC *C, long ldc) {
#pragma omp parallel for collapse(2) if (m * n * k > THRESHOLD)
for (long i = 0; i < m; ++i)
for (long j = 0; j < n; ++j) {
double sum = 0;
for (long l = 0; l < k; ++l)
sum = std::fma((aᵀ ? A[lda * l + i] : A[lda * i + l]) * α,
(bᵀ ? B[ldb * j + l] : B[ldb * l + j]), sum);
C[ldc * i + j] = C[ldc * i + j] * β + sum;
}
}
template <typename T, typename TC, typename TA, typename TB>
struct Gemmlin {
public:
Gemmlin(bool aT, bool bT, float α, const TA *A, long lda, const TB *B,
long ldb, float β, TC *C, long ldc)
: aT(aT),
bT(bT),
α(α),
A(A),
lda(lda),
B(B),
ldb(ldb),
β(β),
C(C),
ldc(ldc) {
}
void gemm(long m, long n, long k) {
if (!m || !n)
return;
for (long i = 0; i < m; ++i)
for (long j = 0; j < n; ++j) {
C[ldc * i + j] *= β;
}
if (!k)
return;
cub = sqrt(LV1DCACHE) / sqrt(sizeof(T) * 3);
mnpack(0, m, 0, n, 0, k);
}
private:
void mnpack(long m0, long m, //
long n0, long n, //
long k0, long k) {
long mc = rounddown(std::min(m - m0, cub), 4);
long mp = m0 + (m - m0) / mc * mc;
long nc = rounddown(std::min(n - n0, cub), 4);
long np = n0 + (n - n0) / nc * nc;
long kc = rounddown(std::min(k - k0, cub), 4);
long kp = k0 + (k - k0) / kc * kc;
kpack(m0, mc, mp, n0, nc, np, k0, kc, k, kp);
if (m - mp)
mnpack(mp, m, n0, np, k0, k);
if (n - np)
mnpack(m0, mp, np, n, k0, k);
if (m - mp && n - np)
mnpack(mp, m, np, n, k0, k);
}
void kpack(long m0, long mc, long m, //
long n0, long nc, long n, //
long k0, long kc, long k, //
long kp) {
rpack(m0, mc, m, n0, nc, n, k0, kc, kp);
if (k - kp)
rpack(m0, mc, m, n0, nc, n, kp, k - kp, k);
}
void rpack(long m0, long mc, long m, //
long n0, long nc, long n, //
long k0, long kc, long k) {
if (!(mc % 4) && !(nc % 4))
bgemm<4, 4>(m0, mc, m, n0, nc, n, k0, kc, k);
else
bgemm<1, 1>(m0, mc, m, n0, nc, n, k0, kc, k);
}
template <int mr, int nr>
void bgemm(long m0, long mc, long m, //
long n0, long nc, long n, //
long k0, long kc, long k) {
ops = (m - m0) * (n - n0) * (k - k0);
ml = (m - m0) / mc;
nl = (n - n0) / nc;
locks = new lock[ml * nl];
there_will_be_blocks<mr, nr>(m0, mc, m, n0, nc, n, k0, kc, k);
delete[] locks;
}
template <int mr, int nr>
void there_will_be_blocks(long m0, volatile long mc, long m, long n0, long nc,
long n, long k0, long kc, long k) {
#pragma omp parallel for collapse(2) if (ops > THRESHOLD && mc * kc > 16)
for (long ic = m0; ic < m; ic += mc)
for (long pc = k0; pc < k; pc += kc)
gizmo<mr, nr>(m0, mc, ic, n0, nc, k0, kc, pc, n);
}
template <int mr, int nr>
PORTABLE OPTIMIZED void gizmo(long m0, long mc, long ic, long n0, long nc,
long k0, long kc, long pc, long n) {
T Ac[mc / mr][kc][mr];
for (long i = 0; i < mc; ++i)
for (long j = 0; j < kc; ++j)
Ac[i / mr][j][i % mr] = α * (aT ? A[lda * (pc + j) + (ic + i)]
: A[lda * (ic + i) + (pc + j)]);
for (long jc = n0; jc < n; jc += nc) {
T Bc[nc / nr][nr][kc];
for (long j = 0; j < nc; ++j)
for (long i = 0; i < kc; ++i)
Bc[j / nr][j % nr][i] =
bT ? B[ldb * (jc + j) + (pc + i)] : B[ldb * (pc + i) + (jc + j)];
T Cc[nc / nr][mc / mr][nr][mr];
memset(Cc, 0, nc * mc * sizeof(float));
for (long jr = 0; jr < nc / nr; ++jr)
for (long ir = 0; ir < mc / mr; ++ir)
for (long pr = 0; pr < kc; ++pr)
for (long j = 0; j < nr; ++j)
for (long i = 0; i < mr; ++i)
Cc[jr][ir][j][i] += Ac[ir][pr][i] * Bc[jr][j][pr];
const long lk = nl * ((ic - m0) / mc) + ((jc - n0) / nc);
locks[lk].acquire();
for (long ir = 0; ir < mc; ir += mr)
for (long jr = 0; jr < nc; jr += nr)
for (long i = 0; i < mr; ++i)
for (long j = 0; j < nr; ++j)
C[ldc * (ic + ir + i) + (jc + jr + j)] +=
Cc[jr / nr][ir / mr][j][i];
locks[lk].release();
}
}
inline long rounddown(long x, long r) {
if (x < r)
return x;
else
return x & -r;
}
class lock {
public:
lock() = default;
void acquire() {
while (lock_.exchange(true, std::memory_order_acquire)) {
}
}
void release() {
lock_.store(false, std::memory_order_release);
}
private:
std::atomic_bool lock_ = false;
};
bool aT;
bool bT;
float α;
const TA *A;
long lda;
const TB *B;
long ldb;
float β;
TC *C;
long ldc;
long ops;
long nl;
long ml;
lock *locks;
long cub;
};
template <typename TC, typename TA, typename TB>
void sgemm(bool aT, bool bT, long m, long n, long k, float α, const TA *A,
long lda, const TB *B, long ldb, float β, TC *C, long ldc) {
Gemmlin<float, TC, TA, TB> g{aT, bT, α, A, lda, B, ldb, β, C, ldc};
g.gemm(m, n, k);
}
template <typename TA, typename TB>
void show(FILE *f, long max, long m, long n, const TA *A, long lda, const TB *B,
long ldb) {
flockfile(f);
fprintf(f, " ");
for (long j = 0; j < n; ++j) {
fprintf(f, "%13ld", j);
}
fprintf(f, "\n");
for (long i = 0; i < m; ++i) {
if (i == max) {
fprintf(f, "...\n");
break;
}
fprintf(f, "%5ld ", i);
for (long j = 0; j < n; ++j) {
if (j == max) {
fprintf(f, " ...");
break;
}
char ba[16], bb[16];
sprintf(ba, "%13.7f", static_cast<double>(A[lda * i + j]));
sprintf(bb, "%13.7f", static_cast<double>(B[ldb * i + j]));
for (long k = 0; ba[k] && bb[k]; ++k) {
if (ba[k] != bb[k])
fputs_unlocked("\33[31m", f);
fputc_unlocked(ba[k], f);
if (ba[k] != bb[k])
fputs_unlocked("\33[0m", f);
}
}
fprintf(f, "\n");
}
funlockfile(f);
}
inline unsigned long GetDoubleBits(double f) {
union {
double f;
unsigned long i;
} u;
u.f = f;
return u.i;
}
inline bool IsNan(double x) {
return (GetDoubleBits(x) & (-1ull >> 1)) > (0x7ffull << 52);
}
template <typename TA, typename TB>
double diff(long m, long n, const TA *Want, long lda, const TB *Got, long ldb) {
double s = 0;
int got_nans = 0;
int want_nans = 0;
for (long i = 0; i < m; ++i)
for (long j = 0; j < n; ++j)
if (IsNan(Want[ldb * i + j]))
++want_nans;
else if (IsNan(Got[ldb * i + j]))
++got_nans;
else
s += std::fabs(Want[lda * i + j] - Got[ldb * i + j]);
if (got_nans)
printf("WARNING: got %d NaNs!\n", got_nans);
if (want_nans)
printf("WARNING: want array has %d NaNs!\n", want_nans);
return s / (m * n);
}
template <typename TA, typename TB>
void show_error(FILE *f, long max, long m, long n, const TA *A, long lda,
const TB *B, long ldb, const char *file, int line, double sad,
double tol) {
fprintf(f, "%s:%d: sad %.17g exceeds %g\nwant\n", file, line, sad, tol);
show(f, max, m, n, A, lda, B, ldb);
fprintf(f, "got\n");
show(f, max, m, n, B, ldb, A, lda);
fprintf(f, "\n");
}
template <typename TA, typename TB>
void check(double tol, long m, long n, const TA *A, long lda, const TB *B,
long ldb, const char *file, int line) {
double sad = diff(m, n, A, lda, B, ldb);
if (sad <= tol) {
if (!is_self_testing) {
printf(" %g error\n", sad);
}
} else {
show_error(stderr, 16, m, n, A, lda, B, ldb, file, line, sad, tol);
const char *path = "/tmp/openmp_test.log";
FILE *f = fopen(path, "w");
if (f) {
show_error(f, 10000, m, n, A, lda, B, ldb, file, line, sad, tol);
printf("see also %s\n", path);
}
exit(1);
}
}
#define check(tol, m, n, A, lda, B, ldb) \
check(tol, m, n, A, lda, B, ldb, __FILE__, __LINE__)
long micros(void) {
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec * 1000000 + (ts.tv_nsec + 999) / 1000;
}
#define bench(x) \
do { \
long t1 = micros(); \
for (long i = 0; i < ITERATIONS; ++i) { \
asm volatile("" ::: "memory"); \
x; \
asm volatile("" ::: "memory"); \
} \
long t2 = micros(); \
printf("%8" PRId64 " µs %s\n", (t2 - t1 + ITERATIONS - 1) / ITERATIONS, \
#x); \
} while (0)
double real01(unsigned long x) { // (0,1)
return 1. / 4503599627370496. * ((x >> 12) + .5);
}
double numba(void) { // (-1,1)
return real01(lemur64()) * 2 - 1;
}
template <typename T>
void fill(T *A, long n) {
for (long i = 0; i < n; ++i) {
A[i] = numba();
}
}
void test_gemm(long m, long n, long k) {
float *A = new float[m * k];
float *At = new float[k * m];
float *B = new float[k * n];
float *Bt = new float[n * k];
float *C = new float[m * n];
float *GOLD = new float[m * n];
float α = 1;
float β = 0;
fill(A, m * k);
fill(B, k * n);
dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n);
transpose(m, k, A, k, At, m);
transpose(k, n, B, n, Bt, k);
sgemm(0, 0, m, n, k, α, A, k, B, n, β, C, n);
check(PRECISION, m, n, GOLD, n, C, n);
sgemm(1, 0, m, n, k, α, At, m, B, n, β, C, n);
check(PRECISION, m, n, GOLD, n, C, n);
sgemm(0, 1, m, n, k, α, A, k, Bt, k, β, C, n);
check(PRECISION, m, n, GOLD, n, C, n);
sgemm(1, 1, m, n, k, α, At, m, Bt, k, β, C, n);
check(PRECISION, m, n, GOLD, n, C, n);
delete[] GOLD;
delete[] C;
delete[] Bt;
delete[] B;
delete[] At;
delete[] A;
}
void check_gemm_works(void) {
static long kSizes[] = {1, 2, 3, 4, 5, 6, 7, 17, 31, 33, 63, 128, 129};
is_self_testing = true;
long c = 0;
long N = sizeof(kSizes) / sizeof(kSizes[0]);
for (long i = 0; i < N; ++i) {
long m = kSizes[i];
for (long j = 0; j < N; ++j) {
long n = kSizes[N - 1 - i];
for (long k = 0; k < N; ++k) {
long K = kSizes[i];
if (c++ % 13 == 0) {
printf("testing %2ld %2ld %2ld\r", m, n, K);
}
test_gemm(m, n, K);
}
}
}
printf("\r");
is_self_testing = false;
}
long m = 2333 / 3;
long k = 577 / 3;
long n = 713 / 3;
void check_sgemm(void) {
float *A = new float[m * k];
float *At = new float[k * m];
float *B = new float[k * n];
float *Bt = new float[n * k];
float *C = new float[m * n];
double *GOLD = new double[m * n];
fill(A, m * k);
fill(B, k * n);
transpose(m, k, A, k, At, m);
transpose(k, n, B, n, Bt, k);
bench(dgemm(0, 0, m, n, k, 1, A, k, B, n, 0, GOLD, n));
bench(sgemm(0, 0, m, n, k, 1, A, k, B, n, 0, C, n));
check(PRECISION, m, n, GOLD, n, C, n);
bench(sgemm(1, 0, m, n, k, 1, At, m, B, n, 0, C, n));
check(PRECISION, m, n, GOLD, n, C, n);
bench(sgemm(0, 1, m, n, k, 1, A, k, Bt, k, 0, C, n));
check(PRECISION, m, n, GOLD, n, C, n);
bench(sgemm(1, 1, m, n, k, 1, At, m, Bt, k, 0, C, n));
check(PRECISION, m, n, GOLD, n, C, n);
delete[] GOLD;
delete[] C;
delete[] Bt;
delete[] B;
delete[] At;
delete[] A;
}
int main(int argc, char *argv[]) {
check_gemm_works();
check_sgemm();
}

View file

@ -20,7 +20,6 @@ o/$(MODE)/third_party: \
o/$(MODE)/third_party/libcxxabi \
o/$(MODE)/third_party/libunwind \
o/$(MODE)/third_party/linenoise \
o/$(MODE)/third_party/llm \
o/$(MODE)/third_party/lua \
o/$(MODE)/third_party/lz4cli \
o/$(MODE)/third_party/make \

View file

@ -45,9 +45,9 @@ __static_yoink("musl_libc_notice");
// something as substantive as this library, then we shall assume the
// application is meaty enough to benefit from the performance of the
// chromium zlib library (costs ~40kb binary) versus just having puff
__static_yoink("_Cz_inflateInit2");
__static_yoink("_Cz_inflate");
__static_yoink("_Cz_inflateEnd");
__static_yoink("inflateInit2");
__static_yoink("inflate");
__static_yoink("inflateEnd");
#endif
static char *

View file

@ -31,7 +31,7 @@
#ifndef USE_ZLIB
ZCONST ulg near *get_crc_table OF((void));
#endif
#if (defined(USE_ZLIB) || defined(CRC_TABLE_ONLY))
#if (1 || defined(USE_ZLIB) || defined(CRC_TABLE_ONLY))
# ifdef IZ_CRC_BE_OPTIMIZ
# undef IZ_CRC_BE_OPTIMIZ
# endif

View file

@ -72,7 +72,7 @@
#include "libc/nt/winsock.h"
#endif
unsigned _Cz_crc32(unsigned crc, const unsigned char *buf, unsigned len);
unsigned crc32(unsigned crc, const unsigned char *buf, unsigned len);
/*
* XXX start of zipfile.h
@ -867,7 +867,7 @@ local void read_Unicode_Path_entry(pZipListEntry)
}
strcpy(iname, pZipListEntry->iname);
chksum = _Cz_crc32(chksum, (uch *)(iname), strlen(iname));
chksum = crc32(chksum, (uch *)(iname), strlen(iname));
free(iname);
@ -972,7 +972,7 @@ local void read_Unicode_Path_local_entry(pZipListEntry)
}
strcpy(iname, pZipListEntry->iname);
chksum = _Cz_crc32(chksum, (uch *)(iname), strlen(iname));
chksum = crc32(chksum, (uch *)(iname), strlen(iname));
free(iname);
@ -1558,7 +1558,7 @@ local int add_Unicode_Path_local_extra_field(pZEntry)
# define inameLocal (pZEntry->iname)
#endif
chksum = _Cz_crc32(chksum, (uch *)(inameLocal), strlen(inameLocal));
chksum = crc32(chksum, (uch *)(inameLocal), strlen(inameLocal));
#ifdef WIN32_OEM
free(inameLocal);
@ -1685,7 +1685,7 @@ local int add_Unicode_Path_cen_extra_field(pZEntry)
# define inameLocal (pZEntry->iname)
#endif
chksum = _Cz_crc32(chksum, (uch *)(inameLocal), strlen(inameLocal));
chksum = crc32(chksum, (uch *)(inameLocal), strlen(inameLocal));
#ifdef WIN32_OEM
free(inameLocal);

View file

@ -13,6 +13,7 @@
#define z_const const
#if 0
#define Z_COSMO_PREFIX_SET
#define Bytef _Cz_Bytef
@ -162,6 +163,9 @@
#define zlibCompileFlags _Cz_zlibCompileFlags
#define zlibVersion _Cz_zlibVersion
#pragma message "zconf is included, so zlibVersion should be renamed"
#endif
typedef unsigned char Byte;
typedef unsigned int uInt; /* 16 bits or more */

View file

@ -21,10 +21,10 @@ AMD64=${2:-x86_64}
ARM64=${3:-aarch64}
GCCVER=12.3.0
make -j32 m= \
make -j64 m= \
$APELINK
make -j32 m=$AMD64 \
make -j64 m=$AMD64 \
o/cosmocc.h.txt \
o/$AMD64/ape/ape.lds \
o/$AMD64/libc/crt/crt.o \
@ -47,7 +47,7 @@ make -j32 m=$AMD64 \
o/$AMD64/third_party/make/make.dbg \
o/$AMD64/third_party/ctags/ctags.dbg
make -j32 m=$ARM64 \
make -j64 m=$ARM64 \
o/$ARM64/ape/ape.elf \
o/$ARM64/ape/aarch64.lds \
o/$ARM64/libc/crt/crt.o \
@ -90,10 +90,10 @@ fetch() {
OLD=$PWD
cd "$OUTDIR/"
if [ ! -x bin/x86_64-linux-cosmo-gcc ]; then
fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.35/aarch64-gcc.zip
fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.39/aarch64-gcc.zip
unzip aarch64-gcc.zip
rm -f aarch64-gcc.zip
fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.35/x86_64-gcc.zip
fetch https://github.com/ahgamut/superconfigure/releases/download/z0.0.39/x86_64-gcc.zip
unzip x86_64-gcc.zip
rm -f x86_64-gcc.zip
fi