aboutsummaryrefslogtreecommitdiff
path: root/libs/libc/mem.c
blob: 2e457efa2e1f9d854c12f0d4ee655d3a0854eaac (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// MIT License, Copyright (c) 2020 Marvin Borner

#include <assert.h>
#include <def.h>
#include <mem.h>
#include <sys.h>

void *memcpy(void *dest, const void *src, u32 n)
{
#ifdef userspace
	// Inspired by Jeko at osdev
	u8 *dest_byte = dest;
	const u8 *src_byte = src;
	for (u32 i = 0; i < n / 16; i++) {
		__asm__ volatile("movups (%0), %%xmm0\n"
				 "movntdq %%xmm0, (%1)\n" ::"r"(src_byte),
				 "r"(dest_byte)
				 : "memory");

		src_byte += 16;
		dest_byte += 16;
	}

	if (n & 7) {
		n = n & 7;

		int d0, d1, d2;
		__asm__ volatile("rep ; movsl\n\t"
				 "testb $2,%b4\n\t"
				 "je 1f\n\t"
				 "movsw\n"
				 "1:\ttestb $1,%b4\n\t"
				 "je 2f\n\t"
				 "movsb\n"
				 "2:"
				 : "=&c"(d0), "=&D"(d1), "=&S"(d2)
				 : "0"(n / 4), "q"(n), "1"((long)dest_byte), "2"((long)src_byte)
				 : "memory");
	}
	return dest_byte;
#else
	// Inspired by jgraef at osdev
	u32 num_dwords = n / 4;
	u32 num_bytes = n % 4;
	u32 *dest32 = (u32 *)dest;
	const u32 *src32 = (const u32 *)src;
	u8 *dest8 = ((u8 *)dest) + num_dwords * 4;
	const u8 *src8 = ((const u8 *)src) + num_dwords * 4;

	// TODO: What's faster?
	__asm__ volatile("rep movsl\n"
			 : "=S"(src32), "=D"(dest32), "=c"(num_dwords)
			 : "S"(src32), "D"(dest32), "c"(num_dwords)
			 : "memory");

	/* for (u32 i = 0; i < num_dwords; i++) { */
	/* 	dest32[i] = src32[i]; */
	/* } */

	for (u32 i = 0; i < num_bytes; i++) {
		dest8[i] = src8[i];
	}
	return dest;
#endif
}

void *memset(void *dest, u32 val, u32 n)
{
	u32 uval = val;
	u32 num_dwords = n / 4;
	u32 num_bytes = n % 4;
	u32 *dest32 = (u32 *)dest;
	u8 *dest8 = ((u8 *)dest) + num_dwords * 4;
	u8 val8 = (u8)val;
	u32 val32 = uval | (uval << 8) | (uval << 16) | (uval << 24);

	// TODO: What's faster?
	__asm__ volatile("rep stosl\n"
			 : "=D"(dest32), "=c"(num_dwords)
			 : "D"(dest32), "c"(num_dwords), "a"(val32)
			 : "memory");

	/* for (u32 i = 0; i < num_dwords; i++) { */
	/* 	dest32[i] = val32; */
	/* } */

	for (u32 i = 0; i < num_bytes; i++) {
		dest8[i] = val8;
	}
	return dest;
}

void *memchr(void *src, char c, u32 n)
{
	u8 *s = (u8 *)src;

	while (n-- > 0) {
		if (*s == c)
			return s;
		s++;
	}
	return NULL;
}

int memcmp(const void *s1, const void *s2, u32 n)
{
	const u8 *a = (const u8 *)s1;
	const u8 *b = (const u8 *)s2;
	for (u32 i = 0; i < n; i++) {
		if (a[i] < b[i])
			return -1;
		else if (b[i] < a[i])
			return 1;
	}
	return 0;
}

int mememp(const u8 *buf, u32 n)
{
	return buf[0] == 0 && !memcmp(buf, buf + 1, n - 1);
}