Skip to content
This repository was archived by the owner on Feb 13, 2025. It is now read-only.

Commit d71003f

Browse files
author
Anselm Kruis
committed
Stackless issue #181: Replace slp_dont_optimize... vars
by more appropriate compiler specific code. The new code should work with whole program optimisation.
1 parent a05f330 commit d71003f

File tree

5 files changed

+69
-25
lines changed

5 files changed

+69
-25
lines changed

Python/ceval.c

+5-7
Original file line numberDiff line numberDiff line change
@@ -3854,9 +3854,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
38543854

38553855

38563856
#ifdef STACKLESS
3857-
/* a global write only dummy variable */
3858-
char slp_dont_optimise_away_slp_eval_frame_functions;
3859-
38603857
PyObject * _Py_HOT_FUNCTION
38613858
slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
38623859
{
@@ -3866,7 +3863,8 @@ slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
38663863
* it serves as a marker whether we expect a value or
38673864
* not, and it makes debugging a little easier.
38683865
*/
3869-
slp_dont_optimise_away_slp_eval_frame_functions = 1;
3866+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)1);
3867+
38703868
r = slp_eval_frame_value(f, throwflag, retval);
38713869
return r;
38723870
}
@@ -3881,7 +3879,7 @@ slp_eval_frame_iter(PyFrameObject *f, int throwflag, PyObject *retval)
38813879
* for_iter operation. In this case we need to handle
38823880
* null without error as valid result.
38833881
*/
3884-
slp_dont_optimise_away_slp_eval_frame_functions = 2;
3882+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)2);
38853883
r = slp_eval_frame_value(f, throwflag, retval);
38863884
return r;
38873885
}
@@ -3896,7 +3894,7 @@ slp_eval_frame_setup_with(PyFrameObject *f, int throwflag, PyObject *retval)
38963894
* SETUP_WITH operation.
38973895
* NOTE / XXX: see above.
38983896
*/
3899-
slp_dont_optimise_away_slp_eval_frame_functions = 3;
3897+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)3);
39003898
r = slp_eval_frame_value(f, throwflag, retval);
39013899
return r;
39023900
}
@@ -3911,7 +3909,7 @@ slp_eval_frame_with_cleanup(PyFrameObject *f, int throwflag, PyObject *retval)
39113909
* WITH_CLEANUP operation.
39123910
* NOTE / XXX: see above.
39133911
*/
3914-
slp_dont_optimise_away_slp_eval_frame_functions = 4;
3912+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)4);
39153913
r = slp_eval_frame_value(f, throwflag, retval);
39163914
return r;
39173915
}

Stackless/core/slp_transfer.c

+7-8
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ or disable the STACKLESS flag.
5050
#endif
5151
#include "platf/slp_platformselect.h"
5252

53+
SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS
54+
5355
#ifdef EXTERNAL_ASM
5456
/* CCP addition: Make these functions, to be called from assembler.
5557
* The token include file for the given platform should enable the
@@ -85,8 +87,6 @@ extern int slp_switch(void);
8587

8688
#endif
8789

88-
/* a write only variable used to prevent overly optimisation */
89-
intptr_t *slp_dont_optimise_away_goobledigoobs;
9090
static int
9191
climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
9292
PyTaskletObject *prev)
@@ -101,15 +101,14 @@ climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
101101
intptr_t probe;
102102
register ptrdiff_t needed = &probe - ts->st.cstack_base;
103103
/* in rare cases, the need might have vanished due to the recursion */
104-
register intptr_t *goobledigoobs;
105104
if (needed > 0) {
106-
goobledigoobs = alloca(needed * sizeof(intptr_t));
107-
if (goobledigoobs == NULL)
105+
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
106+
if (stack_ptr_tmp == NULL)
108107
return -1;
109-
/* hinder the compiler to optimise away
110-
goobledigoobs and the alloca call.
108+
/* hinder the compiler to optimise away
109+
stack_ptr_tmp and the alloca call.
111110
This happens with gcc 4.7.x and -O2 */
112-
slp_dont_optimise_away_goobledigoobs = goobledigoobs;
111+
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
113112
}
114113
return slp_transfer(cstprev, cst, prev);
115114
}

Stackless/core/stacklesseval.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,14 @@ climb_stack_and_eval_frame(PyFrameObject *f)
256256
intptr_t probe;
257257
ptrdiff_t needed = &probe - ts->st.cstack_base;
258258
/* in rare cases, the need might have vanished due to the recursion */
259-
intptr_t *goobledigoobs;
260259
if (needed > 0) {
261-
goobledigoobs = alloca(needed * sizeof(intptr_t));
262-
if (goobledigoobs == NULL)
260+
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
261+
if (stack_ptr_tmp == NULL)
263262
return NULL;
263+
/* hinder the compiler to optimise away
264+
stack_ptr_tmp and the alloca call.
265+
This happens with gcc 4.7.x and -O2 */
266+
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
264267
}
265268
return slp_eval_frame(f);
266269
}

Stackless/module/scheduling.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,6 @@ typedef struct {
355355
/* not a valid ptr and not a common integer */
356356
#define SAVED_TSTATE_MAGIC1 (((intptr_t)transfer_with_tracing)+1)
357357
#define SAVED_TSTATE_MAGIC2 (-1*((intptr_t)transfer_with_tracing))
358-
saved_tstat_with_magic_t * slp_dont_optimise_away_saved_tstat_with_magic;
359358

360359
static int
361360
transfer_with_tracing(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletObject *prev)
@@ -370,7 +369,7 @@ transfer_with_tracing(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletOb
370369
/* prevent overly compiler optimisation.
371370
We store the address of sm into a global variable.
372371
This way the optimizer can't change the layout of the structure. */
373-
slp_dont_optimise_away_saved_tstat_with_magic = &sm;
372+
SLP_DO_NOT_OPTIMIZE_AWAY(&sm);
374373

375374
sm.s.tracing = ts->tracing;
376375
sm.s.c_profilefunc = ts->c_profilefunc;

Stackless/platf/slp_platformselect.h

+50-5
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
#elif defined(__GNUC__) && defined(sparc) && defined(sun)
2121
#include "switch_sparc_sun_gcc.h" /* SunOS sparc with gcc */
2222
#elif defined(__GNUC__) && defined(__s390__) && defined(__linux__)
23-
#include "switch_s390_unix.h" /* Linux/S390 */
23+
#include "switch_s390_unix.h" /* Linux/S390 */
2424
#elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__)
25-
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
25+
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
2626
#elif defined(__GNUC__) && defined(__arm__) && defined(__thumb__)
2727
#include "switch_arm_thumb_gcc.h" /* gcc using arm thumb */
2828
#elif defined(__GNUC__) && defined(__arm32__)
@@ -35,6 +35,51 @@
3535

3636
/* default definitions if not defined in above files */
3737

38+
/*
39+
* Call SLP_DO_NOT_OPTIMIZE_AWAY(pointer) to ensure that pointer will be
40+
* computed even post-optimization. Use it for pointers that are computed but
41+
* otherwise are useless. The compiler tends to do a good job at eliminating
42+
* unused variables, and this macro fools it into thinking var is in fact
43+
* needed.
44+
*/
45+
46+
#ifndef SLP_DO_NOT_OPTIMIZE_AWAY
47+
48+
/* Code is based on Facebook folly
49+
* https://github.com/facebook/folly/blob/master/folly/Benchmark.h,
50+
* which has an Apache 2 license.
51+
*/
52+
#ifdef _MSC_VER
53+
54+
#pragma optimize("", off)
55+
56+
static inline void doNotOptimizeDependencySink(const void* p) {}
57+
58+
#pragma optimize("", on)
59+
60+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) doNotOptimizeDependencySink(pointer)
61+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */
62+
63+
#elif (defined(__GNUC__) || defined(__clang__))
64+
/*
65+
* The "r" constraint forces the compiler to make datum available
66+
* in a register to the asm block, which means that it must have
67+
* computed/loaded it.
68+
*/
69+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
70+
do {__asm__ volatile("" ::"r"(pointer));} while(0)
71+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */
72+
#else
73+
/*
74+
* Unknown compiler
75+
*/
76+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
77+
do { slp_do_not_opimize_away_sink = ((void*)(pointer)); } while(0)
78+
extern uint8_t* volatile slp_do_not_opimize_away_sink;
79+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS uint8_t* volatile slp_do_not_opimize_away_sink;
80+
#endif
81+
#endif
82+
3883
/* adjust slots to typical size of a few recursions on your system */
3984

4085
#ifndef CSTACK_SLOTS
@@ -82,8 +127,8 @@
82127
#endif
83128

84129
#define CSTACK_SAVE_NOW(tstate, stackvar) \
85-
((tstate)->st.cstack_root != NULL ? \
86-
CSTACK_SUBTRACT((tstate)->st.cstack_root, \
87-
(intptr_t*)&(stackvar)) > CSTACK_WATERMARK : 1)
130+
((tstate)->st.cstack_root != NULL ? \
131+
CSTACK_SUBTRACT((tstate)->st.cstack_root, \
132+
(intptr_t*)&(stackvar)) > CSTACK_WATERMARK : 1)
88133

89134
#endif

0 commit comments

Comments
 (0)