aboutsummaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorMarvin Borner2021-02-21 15:45:41 +0100
committerMarvin Borner2021-02-21 15:45:41 +0100
commitafeb7e4f00eff33e9e1cfcb766825e7c87f5f1d1 (patch)
tree87572c38c6cfd3bf49119ecb63de2e8c3ad4fb02 /libc
parent17b83714ecc713f3faebf668a3d286e531f41859 (diff)
Activated SSE/FPU features for better performance
Diffstat (limited to 'libc')
-rw-r--r--libc/cpu.c98
-rw-r--r--libc/inc/cpu.h84
-rw-r--r--libc/inc/print.h1
-rw-r--r--libc/mem.c65
-rw-r--r--libc/print.c10
5 files changed, 210 insertions, 48 deletions
diff --git a/libc/cpu.c b/libc/cpu.c
index 52c0280..2eb6c46 100644
--- a/libc/cpu.c
+++ b/libc/cpu.c
@@ -48,41 +48,91 @@ void outl(u16 port, u32 data)
__asm__ volatile("outl %0, %1" ::"a"(data), "Nd"(port));
}
-void cpuid(int code, u32 *a, u32 *b, u32 *c, u32 *d)
+#ifdef kernel
+
+static void cpuid(int code, u32 *a, u32 *b, u32 *c, u32 *d)
{
__asm__ volatile("cpuid" : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d) : "a"(code));
}
-char *cpu_string(char buf[13])
-{
- u32 a, b, c, d;
- cpuid(CPUID_VENDOR_STRING, &a, &b, &c, &d);
- char *ebx = (char *)&b;
- char *ecx = (char *)&c;
- char *edx = (char *)&d;
- buf[0] = ebx[0];
- buf[1] = ebx[1];
- buf[2] = ebx[2];
- buf[3] = ebx[3];
- buf[4] = edx[0];
- buf[5] = edx[1];
- buf[6] = edx[2];
- buf[7] = edx[3];
- buf[8] = ecx[0];
- buf[9] = ecx[1];
- buf[10] = ecx[2];
- buf[11] = ecx[3];
- buf[12] = 0;
+static char *cpu_string(char buf[16])
+{
+ // wtf
+ cpuid(CPUID_VENDOR_STRING, (u32 *)(buf + 12), (u32 *)(buf), (u32 *)(buf + 8),
+ (u32 *)(buf + 4));
+
return buf;
}
void cpu_print(void)
{
- char buf[13] = { 0 };
- printf("%s\n", cpu_string(buf));
+ char buf[16] = { 0 };
+ printf("CPU vendor: %s\n", cpu_string(buf));
+}
+
+static u32 cr0_get(void)
+{
+ u32 cr0;
+ __asm__ volatile("movl %%cr0, %%eax" : "=a"(cr0));
+ return cr0;
+}
+
+static void cr0_set(u32 cr0)
+{
+ __asm__ volatile("movl %%eax, %%cr0" ::"a"(cr0));
+}
+
+static u32 cr4_get(void)
+{
+ u32 cr4;
+ __asm__ volatile("movl %%cr4, %%eax" : "=a"(cr4));
+ return cr4;
+}
+
+static void cr4_set(u32 cr4)
+{
+ __asm__ volatile("movl %%eax, %%cr4" ::"a"(cr4));
+}
+
+static u32 cpu_features = 0;
+u8 cpu_has_feature(u32 feature)
+{
+ return (cpu_features & feature) != 0;
+}
+
+void fpu_handler()
+{
+ __asm__ volatile("clts");
+}
+
+static u8 fpu_state[512] __attribute__((aligned(16)));
+void fpu_restore(void)
+{
+ __asm__ volatile("fxrstor (%0)" ::"r"(fpu_state));
+}
+
+void cpu_enable_features(void)
+{
+ u32 a = 0, b = 0, c = 0, d = 0;
+ cpuid(CPUID_FEATURES, &a, &b, &c, &d);
+ cpu_features = d;
+ if (cpu_has_feature(CPUID_FEAT_EDX_SSE)) {
+ cr0_set(cr0_get() & ~(1 << 2));
+ cr0_set(cr0_get() | (1 << 1));
+ cr4_set(cr4_get() | (3 << 9));
+ } else {
+ panic("No SSE support!\n");
+ }
+
+ if (cpu_has_feature(CPUID_FEAT_EDX_FPU)) {
+ __asm__ volatile("fninit");
+ __asm__ volatile("fxsave %0" : "=m"(fpu_state));
+ irq_install_handler(7, fpu_handler);
+ } else {
+ panic("No FPU support!\n");
+ }
}
-#ifdef kernel
void cli(void)
{
__asm__ volatile("cli");
diff --git a/libc/inc/cpu.h b/libc/inc/cpu.h
index 033743d..fa82fbe 100644
--- a/libc/inc/cpu.h
+++ b/libc/inc/cpu.h
@@ -5,8 +5,6 @@
#include <def.h>
-enum cpuid_requests { CPUID_VENDOR_STRING, CPUID_FEATURES, CPUID_TLB, CPUID_SERIAL };
-
u8 inb(u16 port);
u16 inw(u16 port);
u32 inl(u16 port);
@@ -16,24 +14,84 @@ void outb(u16 port, u8 data);
void outw(u16 port, u16 data);
void outl(u16 port, u32 data);
-void cpuid(int code, u32 *a, u32 *b, u32 *c, u32 *d);
-char *cpu_string(char buf[12]);
-void cpu_print(void);
+static inline void spinlock(int *ptr)
+{
+ int prev;
+ do
+ __asm__ volatile("lock xchgl %0,%1" : "=a"(prev) : "m"(*ptr), "a"(1));
+ while (prev);
+}
#ifdef kernel
+void cpu_print(void);
+void cpu_enable_features(void);
+void fpu_restore(void);
+
void cli(void);
void sti(void);
void hlt(void);
void idle(void);
void loop(void);
-#endif
-static inline void spinlock(int *ptr)
-{
- int prev;
- do
- __asm__ volatile("lock xchgl %0,%1" : "=a"(prev) : "m"(*ptr), "a"(1));
- while (prev);
-}
+enum cpuid_requests { CPUID_VENDOR_STRING, CPUID_FEATURES, CPUID_TLB, CPUID_SERIAL };
+enum cpuid_features {
+ CPUID_FEAT_ECX_SSE3 = 1 << 0,
+ CPUID_FEAT_ECX_PCLMUL = 1 << 1,
+ CPUID_FEAT_ECX_DTES64 = 1 << 2,
+ CPUID_FEAT_ECX_MONITOR = 1 << 3,
+ CPUID_FEAT_ECX_DS_CPL = 1 << 4,
+ CPUID_FEAT_ECX_VMX = 1 << 5,
+ CPUID_FEAT_ECX_SMX = 1 << 6,
+ CPUID_FEAT_ECX_EST = 1 << 7,
+ CPUID_FEAT_ECX_TM2 = 1 << 8,
+ CPUID_FEAT_ECX_SSSE3 = 1 << 9,
+ CPUID_FEAT_ECX_CID = 1 << 10,
+ CPUID_FEAT_ECX_FMA = 1 << 12,
+ CPUID_FEAT_ECX_CX16 = 1 << 13,
+ CPUID_FEAT_ECX_ETPRD = 1 << 14,
+ CPUID_FEAT_ECX_PDCM = 1 << 15,
+ CPUID_FEAT_ECX_PCIDE = 1 << 17,
+ CPUID_FEAT_ECX_DCA = 1 << 18,
+ CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
+ CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
+ CPUID_FEAT_ECX_x2APIC = 1 << 21,
+ CPUID_FEAT_ECX_MOVBE = 1 << 22,
+ CPUID_FEAT_ECX_POPCNT = 1 << 23,
+ CPUID_FEAT_ECX_AES = 1 << 25,
+ CPUID_FEAT_ECX_XSAVE = 1 << 26,
+ CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
+ CPUID_FEAT_ECX_AVX = 1 << 28,
+
+ CPUID_FEAT_EDX_FPU = 1 << 0,
+ CPUID_FEAT_EDX_VME = 1 << 1,
+ CPUID_FEAT_EDX_DE = 1 << 2,
+ CPUID_FEAT_EDX_PSE = 1 << 3,
+ CPUID_FEAT_EDX_TSC = 1 << 4,
+ CPUID_FEAT_EDX_MSR = 1 << 5,
+ CPUID_FEAT_EDX_PAE = 1 << 6,
+ CPUID_FEAT_EDX_MCE = 1 << 7,
+ CPUID_FEAT_EDX_CX8 = 1 << 8,
+ CPUID_FEAT_EDX_APIC = 1 << 9,
+ CPUID_FEAT_EDX_SEP = 1 << 11,
+ CPUID_FEAT_EDX_MTRR = 1 << 12,
+ CPUID_FEAT_EDX_PGE = 1 << 13,
+ CPUID_FEAT_EDX_MCA = 1 << 14,
+ CPUID_FEAT_EDX_CMOV = 1 << 15,
+ CPUID_FEAT_EDX_PAT = 1 << 16,
+ CPUID_FEAT_EDX_PSE36 = 1 << 17,
+ CPUID_FEAT_EDX_PSN = 1 << 18,
+ CPUID_FEAT_EDX_CLF = 1 << 19,
+ CPUID_FEAT_EDX_DTES = 1 << 21,
+ CPUID_FEAT_EDX_ACPI = 1 << 22,
+ CPUID_FEAT_EDX_MMX = 1 << 23,
+ CPUID_FEAT_EDX_FXSR = 1 << 24,
+ CPUID_FEAT_EDX_SSE = 1 << 25,
+ CPUID_FEAT_EDX_SSE2 = 1 << 26,
+ CPUID_FEAT_EDX_SS = 1 << 27,
+ CPUID_FEAT_EDX_HTT = 1 << 28,
+ CPUID_FEAT_EDX_TM1 = 1 << 29,
+ CPUID_FEAT_EDX_IA64 = 1 << 30,
+};
+#endif
#endif
diff --git a/libc/inc/print.h b/libc/inc/print.h
index 3e11db7..f32873a 100644
--- a/libc/inc/print.h
+++ b/libc/inc/print.h
@@ -20,6 +20,7 @@ int err(int code, const char *format, ...);
#else
#include <proc.h>
int print_app(enum stream_defaults id, const char *proc_name, const char *str);
+void panic(const char *format, ...);
#endif
#endif
diff --git a/libc/mem.c b/libc/mem.c
index d7ce3dc..953ef33 100644
--- a/libc/mem.c
+++ b/libc/mem.c
@@ -5,24 +5,61 @@
#include <mem.h>
#include <sys.h>
-// Taken from jgraef at osdev
void *memcpy(void *dest, const void *src, u32 n)
{
+#ifdef userspace
+ // Inspired by Jeko at osdev
+ for (u32 i = 0; i < n / 16; i++) {
+ __asm__ __volatile__("movups (%0), %%xmm0\n"
+ "movntdq %%xmm0, (%1)\n" ::"r"(src),
+ "r"(dest)
+ : "memory");
+
+ src = ((u8 *)src) + 16;
+ dest = ((u8 *)dest) + 16;
+ }
+
+ if (n & 7) {
+ n = n & 7;
+
+ int d0, d1, d2;
+ __asm__ __volatile__("rep ; movsl\n\t"
+ "testb $2,%b4\n\t"
+ "je 1f\n\t"
+ "movsw\n"
+ "1:\ttestb $1,%b4\n\t"
+ "je 2f\n\t"
+ "movsb\n"
+ "2:"
+ : "=&c"(d0), "=&D"(d1), "=&S"(d2)
+ : "0"(n / 4), "q"(n), "1"((long)dest), "2"((long)src)
+ : "memory");
+ }
+ return dest;
+#else
+ // Inspired by jgraef at osdev
u32 num_dwords = n / 4;
u32 num_bytes = n % 4;
u32 *dest32 = (u32 *)dest;
u32 *src32 = (u32 *)src;
u8 *dest8 = ((u8 *)dest) + num_dwords * 4;
u8 *src8 = ((u8 *)src) + num_dwords * 4;
- u32 i;
- for (i = 0; i < num_dwords; i++) {
- dest32[i] = src32[i];
- }
- for (i = 0; i < num_bytes; i++) {
+ // TODO: What's faster?
+ __asm__ volatile("rep movsl\n"
+ : "=S"(src32), "=D"(dest32), "=c"(num_dwords)
+ : "S"(src32), "D"(dest32), "c"(num_dwords)
+ : "memory");
+
+ /* for (u32 i = 0; i < num_dwords; i++) { */
+ /* dest32[i] = src32[i]; */
+ /* } */
+
+ for (u32 i = 0; i < num_bytes; i++) {
dest8[i] = src8[i];
}
return dest;
+#endif
}
void *memset(void *dest, int val, u32 n)
@@ -33,12 +70,18 @@ void *memset(void *dest, int val, u32 n)
u8 *dest8 = ((u8 *)dest) + num_dwords * 4;
u8 val8 = (u8)val;
u32 val32 = val | (val << 8) | (val << 16) | (val << 24);
- u32 i;
- for (i = 0; i < num_dwords; i++) {
- dest32[i] = val32;
- }
- for (i = 0; i < num_bytes; i++) {
+ // TODO: What's faster?
+ __asm__ volatile("rep stosl\n"
+ : "=D"(dest32), "=c"(num_dwords)
+ : "D"(dest32), "c"(num_dwords), "a"(val32)
+ : "memory");
+
+ /* for (u32 i = 0; i < num_dwords; i++) { */
+ /* dest32[i] = val32; */
+ /* } */
+
+ for (u32 i = 0; i < num_bytes; i++) {
dest8[i] = val8;
}
return dest;
diff --git a/libc/print.c b/libc/print.c
index ffb1e69..3d64504 100644
--- a/libc/print.c
+++ b/libc/print.c
@@ -213,4 +213,14 @@ int print(const char *str)
return strlen(str);
}
+void panic(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+
+ assert(0);
+}
+
#endif