diff options
Diffstat (limited to 'src/profiler.c')
-rw-r--r-- | src/profiler.c | 485 |
1 files changed, 281 insertions, 204 deletions
diff --git a/src/profiler.c b/src/profiler.c index 243a34872c2..5a6a8b48f6b 100644 --- a/src/profiler.c +++ b/src/profiler.c @@ -34,23 +34,152 @@ saturated_add (EMACS_INT a, EMACS_INT b) /* Logs. */ -typedef struct Lisp_Hash_Table log_t; +/* A fully associative cache of size SIZE, mapping vectors of DEPTH + Lisp objects to counts. */ +typedef struct { + /* We use `int' throughout for table indices because anything bigger + is overkill. (Maybe we should make a typedef, but int is short.) */ + int size; /* number of entries */ + int depth; /* elements in each key vector */ + int index_size; /* size of index */ + Lisp_Object *trace; /* working trace, `depth' elements */ + int *index; /* `index_size' indices or -1 if nothing */ + int *next; /* `size' indices to next bucket or -1 */ + EMACS_UINT *hash; /* `size' hash values */ + Lisp_Object *keys; /* `size' keys of `depth' objects each */ + EMACS_INT *counts; /* `size' entries, 0 indicates unused entry */ + int next_free; /* next free entry, -1 if all taken */ +} log_t; -static Lisp_Object cmpfn_profiler (Lisp_Object, Lisp_Object, - struct Lisp_Hash_Table *); -static Lisp_Object hashfn_profiler (Lisp_Object, struct Lisp_Hash_Table *); +static void +mark_log (log_t *log) +{ + if (log == NULL) + return; + int size = log->size; + int depth = log->depth; + for (int i = 0; i < size; i++) + if (log->counts[i] > 0) /* Only mark valid keys. */ + mark_objects (log->keys + i * depth, depth); +} + +static log_t * +make_log (int size, int depth) +{ + log_t *log = xmalloc (sizeof *log); + log->size = size; + log->depth = depth; + + /* The index size is arbitrary but for there to be any point it should be + bigger than SIZE. FIXME: make it a power of 2 or a (pseudo)prime. */ + int index_size = size * 2 + 1; + log->index_size = index_size; + + log->trace = xmalloc (depth * sizeof *log->trace); + + log->index = xmalloc (index_size * sizeof *log->index); + for (int i = 0; i < index_size; i++) + log->index[i] = -1; + + log->next = xmalloc (size * sizeof *log->next); + for (int i = 0; i < size - 1; i++) + log->next[i] = i + 1; + log->next[size - 1] = -1; + log->next_free = 0; + + log->hash = xmalloc (size * sizeof *log->hash); + log->keys = xzalloc (size * depth * sizeof *log->keys); + log->counts = xzalloc (size * sizeof *log->counts); + + return log; +} + +static void +free_log (log_t *log) +{ + xfree (log->trace); + xfree (log->index); + xfree (log->next); + xfree (log->hash); + xfree (log->keys); + xfree (log->counts); + xfree (log); +} + +static inline EMACS_INT +get_log_count (log_t *log, int idx) +{ + eassume (idx >= 0 && idx < log->size); + return log->counts[idx]; +} + +static inline void +set_log_count (log_t *log, int idx, EMACS_INT val) +{ + eassume (idx >= 0 && idx < log->size && val >= 0); + log->counts[idx] = val; +} + +static inline Lisp_Object * +get_key_vector (log_t *log, int idx) +{ + eassume (idx >= 0 && idx < log->size); + return log->keys + idx * log->depth; +} + +static inline int +log_hash_index (log_t *log, EMACS_UINT hash) +{ + /* FIXME: avoid division. */ + return hash % log->index_size; +} + +static void +remove_log_entry (log_t *log, int idx) +{ + eassume (idx >= 0 && idx < log->size); + /* Remove from index. */ + int hidx = log_hash_index (log, log->hash[idx]); + int *p = &log->index[hidx]; + while (*p != idx) + { + eassert (*p >= 0 && *p < log->size); + p = &log->next[*p]; + } + *p = log->next[*p]; + /* Invalidate entry and put it on the free list. */ + log->counts[idx] = 0; + log->next[idx] = log->next_free; + log->next_free = idx; +} -static const struct hash_table_test hashtest_profiler = - { - LISPSYM_INITIALLY (Qprofiler_backtrace_equal), - LISPSYM_INITIALLY (Qnil) /* user_hash_function */, - LISPSYM_INITIALLY (Qnil) /* user_cmp_function */, - cmpfn_profiler, - hashfn_profiler, - }; +static bool +trace_equal (Lisp_Object *bt1, Lisp_Object *bt2, int depth) +{ + for (int i = 0; i < depth; i++) + if (!BASE_EQ (bt1[i], bt2[i]) && NILP (Ffunction_equal (bt1[i], bt2[i]))) + return false; + return true; +} + +static EMACS_UINT +trace_hash (Lisp_Object *trace, int depth) +{ + EMACS_UINT hash = 0; + for (int i = 0; i < depth; i++) + { + Lisp_Object f = trace[i]; + EMACS_UINT hash1 + = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE)) + : (CONSP (f) && CONSP (XCDR (f)) && BASE_EQ (Qclosure, XCAR (f))) + ? XHASH (XCDR (XCDR (f))) : XHASH (f)); + hash = sxhash_combine (hash, hash1); + } + return hash; +} struct profiler_log { - Lisp_Object log; + log_t *log; EMACS_INT gc_count; /* Samples taken during GC. */ EMACS_INT discarded; /* Samples evicted during table overflow. */ }; @@ -58,32 +187,22 @@ struct profiler_log { static Lisp_Object export_log (struct profiler_log *); static struct profiler_log -make_log (void) -{ - /* We use a standard Elisp hash-table object, but we use it in - a special way. This is OK as long as the object is not exposed - to Elisp, i.e. until it is returned by *-profiler-log, after which - it can't be used any more. */ - EMACS_INT heap_size - = clip_to_bounds (0, profiler_log_size, MOST_POSITIVE_FIXNUM); - ptrdiff_t max_stack_depth - = clip_to_bounds (0, profiler_max_stack_depth, PTRDIFF_MAX);; - struct profiler_log log - = { make_hash_table (hashtest_profiler, heap_size, - DEFAULT_REHASH_SIZE, - DEFAULT_REHASH_THRESHOLD, - Qnil, false), - 0, 0 }; - struct Lisp_Hash_Table *h = XHASH_TABLE (log.log); - - /* What is special about our hash-tables is that the values are pre-filled - with the vectors we'll use as keys. */ - ptrdiff_t i = ASIZE (h->key_and_value) >> 1; - while (i > 0) - set_hash_value_slot (h, --i, make_nil_vector (max_stack_depth)); - return log; +make_profiler_log (void) +{ + int size = clip_to_bounds (0, profiler_log_size, + min (MOST_POSITIVE_FIXNUM, INT_MAX)); + int max_stack_depth = clip_to_bounds (0, profiler_max_stack_depth, INT_MAX); + return (struct profiler_log){make_log (size, max_stack_depth), 0, 0}; } +static void +free_profiler_log (struct profiler_log *plog) +{ + free_log (plog->log); + plog->log = NULL; +} + + /* Evict the least used half of the hash_table. When the table is full, we have to evict someone. @@ -100,22 +219,22 @@ make_log (void) cost of O(1) and we get O(N) time for a new entry to grow larger than the other least counts before a new round of eviction. */ -static EMACS_INT approximate_median (log_t *log, - ptrdiff_t start, ptrdiff_t size) +static EMACS_INT +approximate_median (log_t *log, int start, int size) { eassert (size > 0); if (size < 2) - return XFIXNUM (HASH_VALUE (log, start)); + return get_log_count (log, start); if (size < 3) /* Not an actual median, but better for our application than choosing either of the two numbers. */ - return ((XFIXNUM (HASH_VALUE (log, start)) - + XFIXNUM (HASH_VALUE (log, start + 1))) + return ((get_log_count (log, start) + + get_log_count (log, start + 1)) / 2); else { - ptrdiff_t newsize = size / 3; - ptrdiff_t start2 = start + newsize; + int newsize = size / 3; + int start2 = start + newsize; EMACS_INT i1 = approximate_median (log, start, newsize); EMACS_INT i2 = approximate_median (log, start2, newsize); EMACS_INT i3 = approximate_median (log, start2 + newsize, @@ -126,34 +245,24 @@ static EMACS_INT approximate_median (log_t *log, } } -static void evict_lower_half (struct profiler_log *plog) +static void +evict_lower_half (struct profiler_log *plog) { - log_t *log = XHASH_TABLE (plog->log); - ptrdiff_t size = ASIZE (log->key_and_value) / 2; + log_t *log = plog->log; + int size = log->size; EMACS_INT median = approximate_median (log, 0, size); - for (ptrdiff_t i = 0; i < size; i++) - /* Evict not only values smaller but also values equal to the median, - so as to make sure we evict something no matter what. */ - if (XFIXNUM (HASH_VALUE (log, i)) <= median) - { - Lisp_Object key = HASH_KEY (log, i); - EMACS_INT count = XFIXNUM (HASH_VALUE (log, i)); - plog->discarded = saturated_add (plog->discarded, count); - { /* FIXME: we could make this more efficient. */ - Lisp_Object tmp; - XSET_HASH_TABLE (tmp, log); /* FIXME: Use make_lisp_ptr. */ - Fremhash (key, tmp); + for (int i = 0; i < size; i++) + { + EMACS_INT count = get_log_count (log, i); + /* Evict not only values smaller but also values equal to the median, + so as to make sure we evict something no matter what. */ + if (count <= median) + { + plog->discarded = saturated_add (plog->discarded, count); + remove_log_entry (log, i); } - eassert (BASE_EQ (Qunbound, HASH_KEY (log, i))); - eassert (log->next_free == i); - - eassert (VECTORP (key)); - for (ptrdiff_t j = 0; j < ASIZE (key); j++) - ASET (key, j, Qnil); - - set_hash_value_slot (log, i, key); - } + } } /* Record the current backtrace in LOG. COUNT is the weight of this @@ -163,54 +272,52 @@ static void evict_lower_half (struct profiler_log *plog) static void record_backtrace (struct profiler_log *plog, EMACS_INT count) { - eassert (HASH_TABLE_P (plog->log)); - log_t *log = XHASH_TABLE (plog->log); + log_t *log = plog->log; + get_backtrace (log->trace, log->depth); + EMACS_UINT hash = trace_hash (log->trace, log->depth); + int hidx = log_hash_index (log, hash); + int idx = log->index[hidx]; + while (idx >= 0) + { + if (log->hash[idx] == hash + && trace_equal (log->trace, get_key_vector (log, idx), log->depth)) + { + /* Found existing entry. */ + set_log_count (log, idx, + saturated_add (get_log_count (log, idx), count)); + return; + } + idx = log->next[idx]; + } + + /* Add new entry. */ if (log->next_free < 0) evict_lower_half (plog); - ptrdiff_t index = log->next_free; - - /* Get a "working memory" vector. */ - Lisp_Object backtrace = HASH_VALUE (log, index); - eassert (BASE_EQ (Qunbound, HASH_KEY (log, index))); - get_backtrace (backtrace); - - { /* We basically do a `gethash+puthash' here, except that we have to be - careful to avoid memory allocation since we're in a signal - handler, and we optimize the code to try and avoid computing the - hash+lookup twice. See fns.c:Fputhash for reference. */ - Lisp_Object hash; - ptrdiff_t j = hash_lookup (log, backtrace, &hash); - if (j >= 0) - { - EMACS_INT old_val = XFIXNUM (HASH_VALUE (log, j)); - EMACS_INT new_val = saturated_add (old_val, count); - set_hash_value_slot (log, j, make_fixnum (new_val)); - } - else - { /* BEWARE! hash_put in general can allocate memory. - But currently it only does that if log->next_free is -1. */ - eassert (0 <= log->next_free); - ptrdiff_t j = hash_put (log, backtrace, make_fixnum (count), hash); - /* Let's make sure we've put `backtrace' right where it - already was to start with. */ - eassert (index == j); - - /* FIXME: If the hash-table is almost full, we should set - some global flag so that some Elisp code can offload its - data elsewhere, so as to avoid the eviction code. - There are 2 ways to do that, AFAICT: - - Set a flag checked in maybe_quit, such that maybe_quit can then - call Fprofiler_cpu_log and stash the full log for later use. - - Set a flag check in post-gc-hook, so that Elisp code can call - profiler-cpu-log. That gives us more flexibility since that - Elisp code can then do all kinds of fun stuff like write - the log to disk. Or turn it right away into a call tree. - Of course, using Elisp is generally preferable, but it may - take longer until we get a chance to run the Elisp code, so - there's more risk that the table will get full before we - get there. */ - } - } + idx = log->next_free; + eassert (idx >= 0); + log->next_free = log->next[idx]; + log->next[idx] = log->index[hidx]; + log->index[hidx] = idx; + eassert (log->counts[idx] == 0); + log->hash[idx] = hash; + memcpy (get_key_vector (log, idx), log->trace, + log->depth * sizeof *log->trace); + log->counts[idx] = count; + + /* FIXME: If the hash-table is almost full, we should set + some global flag so that some Elisp code can offload its + data elsewhere, so as to avoid the eviction code. + There are 2 ways to do that: + - Set a flag checked in maybe_quit, such that maybe_quit can then + call Fprofiler_cpu_log and stash the full log for later use. + - Set a flag check in post-gc-hook, so that Elisp code can call + profiler-cpu-log. That gives us more flexibility since that + Elisp code can then do all kinds of fun stuff like write + the log to disk. Or turn it right away into a call tree. + Of course, using Elisp is generally preferable, but it may + take longer until we get a chance to run the Elisp code, so + there's more risk that the table will get full before we + get there. */ } /* Sampling profiler. */ @@ -234,6 +341,9 @@ add_sample (struct profiler_log *plog, EMACS_INT count) #ifdef PROFILER_CPU_SUPPORT +/* The sampling interval specified. */ +static Lisp_Object profiler_cpu_interval = LISPSYM_INITIALLY (Qnil); + /* The profiler timer and whether it was properly initialized, if POSIX timers are available. */ #ifdef HAVE_ITIMERSPEC @@ -356,8 +466,8 @@ See also `profiler-log-size' and `profiler-max-stack-depth'. */) if (profiler_cpu_running) error ("CPU profiler is already running"); - if (NILP (cpu.log)) - cpu = make_log (); + if (cpu.log == NULL) + cpu = make_profiler_log (); int status = setup_cpu_timer (sampling_interval); if (status < 0) @@ -367,6 +477,7 @@ See also `profiler-log-size' and `profiler-max-stack-depth'. */) } else { + profiler_cpu_interval = sampling_interval; profiler_cpu_running = status; if (! profiler_cpu_running) error ("Unable to start profiler timer"); @@ -428,30 +539,49 @@ of functions, where the last few elements may be nil. Before returning, a new log is allocated for future samples. */) (void) { - return (export_log (&cpu)); + /* Temporarily stop profiling to avoid it interfering with our data + access. */ + bool prof_cpu = profiler_cpu_running; + if (prof_cpu) + Fprofiler_cpu_stop (); + + Lisp_Object ret = export_log (&cpu); + + if (prof_cpu) + Fprofiler_cpu_start (profiler_cpu_interval); + + return ret; } #endif /* PROFILER_CPU_SUPPORT */ +/* Extract log data to a Lisp hash table. The log data is then erased. */ static Lisp_Object -export_log (struct profiler_log *log) +export_log (struct profiler_log *plog) { - Lisp_Object result = log->log; - if (log->gc_count) + log_t *log = plog->log; + /* The returned hash table uses `equal' as key equivalence predicate + which is more discriminating than the `function-equal' used by + the log but close enough, and will never confuse two distinct + keys in the log. */ + Lisp_Object h = make_hash_table (&hashtest_equal, DEFAULT_HASH_SIZE, + Weak_None, false); + for (int i = 0; i < log->size; i++) + { + int count = get_log_count (log, i); + if (count > 0) + Fputhash (Fvector (log->depth, get_key_vector (log, i)), + make_fixnum (count), h); + } + if (plog->gc_count) Fputhash (CALLN (Fvector, QAutomatic_GC, Qnil), - make_fixnum (log->gc_count), - result); - if (log->discarded) + make_fixnum (plog->gc_count), + h); + if (plog->discarded) Fputhash (CALLN (Fvector, QDiscarded_Samples, Qnil), - make_fixnum (log->discarded), - result); -#ifdef PROFILER_CPU_SUPPORT - /* Here we're making the log visible to Elisp, so it's not safe any - more for our use afterwards since we can't rely on its special - pre-allocated keys anymore. So we have to allocate a new one. */ - if (profiler_cpu_running) - *log = make_log (); -#endif /* PROFILER_CPU_SUPPORT */ - return result; + make_fixnum (plog->discarded), + h); + free_profiler_log (plog); + return h; } /* Memory profiler. */ @@ -474,8 +604,8 @@ See also `profiler-log-size' and `profiler-max-stack-depth'. */) if (profiler_memory_running) error ("Memory profiler is already running"); - if (NILP (memory.log)) - memory = make_log (); + if (memory.log == NULL) + memory = make_profiler_log (); profiler_memory_running = true; @@ -514,7 +644,16 @@ of functions, where the last few elements may be nil. Before returning, a new log is allocated for future samples. */) (void) { - return (export_log (&memory)); + bool prof_mem = profiler_memory_running; + if (prof_mem) + Fprofiler_memory_stop (); + + Lisp_Object ret = export_log (&memory); + + if (prof_mem) + Fprofiler_memory_start (); + + return ret; } @@ -547,50 +686,15 @@ the same lambda expression, or are really unrelated function. */) return res ? Qt : Qnil; } -static Lisp_Object -cmpfn_profiler (Lisp_Object bt1, Lisp_Object bt2, struct Lisp_Hash_Table *h) -{ - if (EQ (bt1, bt2)) - return Qt; - else if (VECTORP (bt1) && VECTORP (bt2)) - { - ptrdiff_t l = ASIZE (bt1); - if (l != ASIZE (bt2)) - return Qnil; - for (ptrdiff_t i = 0; i < l; i++) - if (NILP (Ffunction_equal (AREF (bt1, i), AREF (bt2, i)))) - return Qnil; - return Qt; - } - else - return Qnil; -} - -static Lisp_Object -hashfn_profiler (Lisp_Object bt, struct Lisp_Hash_Table *h) +void +mark_profiler (void) { - EMACS_UINT hash; - if (VECTORP (bt)) - { - hash = 0; - ptrdiff_t l = ASIZE (bt); - for (ptrdiff_t i = 0; i < l; i++) - { - Lisp_Object f = AREF (bt, i); - EMACS_UINT hash1 - = (COMPILEDP (f) ? XHASH (AREF (f, COMPILED_BYTECODE)) - : (CONSP (f) && CONSP (XCDR (f)) && EQ (Qclosure, XCAR (f))) - ? XHASH (XCDR (XCDR (f))) : XHASH (f)); - hash = sxhash_combine (hash, hash1); - } - } - else - hash = XHASH (bt); - return make_ufixnum (SXHASH_REDUCE (hash)); +#ifdef PROFILER_CPU_SUPPORT + mark_log (cpu.log); +#endif + mark_log (memory.log); } -static void syms_of_profiler_for_pdumper (void); - void syms_of_profiler (void) { @@ -603,47 +707,20 @@ If the log gets full, some of the least-seen call-stacks will be evicted to make room for new entries. */); profiler_log_size = 10000; - DEFSYM (Qprofiler_backtrace_equal, "profiler-backtrace-equal"); DEFSYM (QDiscarded_Samples, "Discarded Samples"); defsubr (&Sfunction_equal); #ifdef PROFILER_CPU_SUPPORT profiler_cpu_running = NOT_RUNNING; - cpu.log = Qnil; - staticpro (&cpu.log); defsubr (&Sprofiler_cpu_start); defsubr (&Sprofiler_cpu_stop); defsubr (&Sprofiler_cpu_running_p); defsubr (&Sprofiler_cpu_log); #endif profiler_memory_running = false; - memory.log = Qnil; - staticpro (&memory.log); defsubr (&Sprofiler_memory_start); defsubr (&Sprofiler_memory_stop); defsubr (&Sprofiler_memory_running_p); defsubr (&Sprofiler_memory_log); - - pdumper_do_now_and_after_load (syms_of_profiler_for_pdumper); -} - -static void -syms_of_profiler_for_pdumper (void) -{ - if (dumped_with_pdumper_p ()) - { -#ifdef PROFILER_CPU_SUPPORT - cpu.log = Qnil; -#endif - memory.log = Qnil; - } - else - { -#ifdef PROFILER_CPU_SUPPORT - eassert (NILP (cpu.log)); -#endif - eassert (NILP (memory.log)); - } - } |