@@ -25,6 +25,66 @@ function(detect_architecture symbol arch)
25
25
endif ()
26
26
endfunction ()
27
27
28
+ # direwolf versions thru 1.5 were available pre-built for 32 bit Windows targets.
29
+ # Research and experimentation revealed that the SSE instructions made a big
30
+ # difference in runtime speed but SSE2 and later were not significantly better
31
+ # for this application. I decided to build with only the SSE instructions making
32
+ # the Pentium 3 the minimum requirement. SSE2 would require at least a Pentium 4
33
+ # and offered no significant performance advantage.
34
+ # These are ancient history - from the previous Century - but old computers, generally
35
+ # considered useless for anything else, often end up in the ham shack.
36
+ #
37
+ # When cmake was first used for direwolf, the default target became 64 bit and the
38
+ # SSE2, SSE3, SSE4.1, and SSE4.2 instructions were automatically enabled based on the
39
+ # build machine capabilities. This was fine until I tried running the application
40
+ # on a computer much older than where it was built. It did not have the SSE4 instructions
41
+ # and the application died without a clue for the reason.
42
+ # Just how much benefit do these new instructions provide for this application?
43
+ #
44
+ # These were all run on the same computer, but compiled in different ways.
45
+ # Times to run atest with Track 1 of the TNC test CD:
46
+ #
47
+ # direwolf 1.5 - 32 bit target - gcc 6.3.0
48
+ #
49
+ # 60.4 sec. Pentium 3 with SSE
50
+ #
51
+ # direwolf 1.6 - 32 bit target - gcc 7.4.0
52
+ #
53
+ # 81.0 sec. with no SIMD instructions enabled.
54
+ # 54.4 sec. with SSE
55
+ # 52.0 sec. with SSE2
56
+ # 52.4 sec. with SSE2, SSE3
57
+ # 52.3 sec. with SSE2, SSE3, SSE4.1, SSE4.2
58
+ # 49.9 sec. Fedora standard: -m32 -march=i686 -mtune=generic -msse2 -mfpmath=sse
59
+ # 50.4 sec. sse not sse2: -m32 -march=i686 -mtune=generic -msse -mfpmath=sse
60
+ #
61
+ # That's what I found several years ago with a much older compiler.
62
+ # The original SSE helped a lot but SSE2 and later made little difference.
63
+ #
64
+ # direwolf 1.6 - 64 bit target - gcc 7.4.0
65
+ #
66
+ # 34.8 sec. with no SIMD instructions enabled.
67
+ # 34.8 sec. with SSE
68
+ # 34.8 sec. with SSE2
69
+ # 34.2 sec. with SSE2, SSE3
70
+ # 33.5 sec. with SSE2, SSE3, SSE4.1, SSE4.2
71
+ # 33.4 Fedora standard: -mtune=generic
72
+ #
73
+ # Why do we see such little variation? 64-bit target implies
74
+ # SSE, SSE2, SSE3 instructions are available.
75
+ #
76
+ # Building for a 64 bit target makes it run about 1.5x faster on the same hardware.
77
+ #
78
+ # The default will be set for maximum portability so packagers won't need to
79
+ # to anything special.
80
+ #
81
+ set (FORCE_SSE 1)
82
+ #
83
+ # While ENABLE_GENERIC also had the desired result (for x86_64), I don't think
84
+ # it is the right approach. It prevents the detection of the architecture,
85
+ # i.e. x86, x86_64, ARM, ARM64. That's why it did not go looking for the various
86
+ # SSE instructions. For x86, we would miss out on using SSE.
87
+
28
88
if (NOT ENABLE_GENERIC)
29
89
if (C_MSVC)
30
90
detect_architecture("_M_AMD64" x86_64)
@@ -49,12 +109,24 @@ set(TEST_DIR ${PROJECT_SOURCE_DIR}/cmake/cpu_tests)
49
109
50
110
# flag that set the minimum cpu flag requirements
51
111
# used to create re-distribuitable binary
112
+
52
113
if (${ARCHITECTURE} MATCHES "x86_64|x86" AND (FORCE_SSE OR FORCE_SSSE3 OR FORCE_SSE41))
53
114
if (FORCE_SSE)
54
115
set (HAS_SSE ON CACHE BOOL "SSE SIMD enabled" )
55
116
if (C_GCC OR C_CLANG)
56
- set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse" )
57
- set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse" )
117
+ if (${ARCHITECTURE} MATCHES "x86_64" )
118
+ # All 64-bit capable chips support MMX, SSE, SSE2, and SSE3
119
+ # so they are all enabled automatically. We don't want to use
120
+ # SSE4, based on build machine capabilites, because the application
121
+ # would not run properly on an older CPU.
122
+ set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=generic" )
123
+ set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mtune=generic" )
124
+ else ()
125
+ # Fedora standard uses -msse2 here.
126
+ # I dropped it down to -msse for greater compatibility and little penalty.
127
+ set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 -march=i686 -mtune=generic -msse -mfpmath=sse" )
128
+ set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 -march=i686 -mtune=generic -msse -mfpmath=sse" )
129
+ endif ()
58
130
message (STATUS "Use SSE SIMD instructions" )
59
131
add_definitions (-DUSE_SSE)
60
132
elseif (C_MSVC)
0 commit comments