Register
4coder»Forums»Primitive code aware syntax highlighting
Alexey
2 posts
Primitive code aware syntax highlighting
2 weeks ago Edited by Alexey on Feb. 4, 2020, 9:46 p.m. Reason: Initial post
After a few not-quite-fast-enough attempts at brining code aware syntax highlighting to my 4coder custom layer I am happy to report that it's finally working! This includes highlighting of functions, macros, and types (everything that's currently being stored by the code index). Preview with placeholder colors:



As this seems to be quite a popular feature in other code editors, I though that I'd share my implementation. The idea is simple: tokens are hashed and placed into a hash table. When determining if a token should be highlighted its hash is recomputed at looked up in the table. With this setup I finally got it to not dip below 60fps (tested on the 4coder custom layer project).

I rolled the simplest possible separate chaining hashtable.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
struct Code_Index_Note_Mini {
    Code_Index_Note_Mini *next;
    Code_Index_Note_Kind note_kind;
    u64 hash;
};

struct Code_Index_Table {
    Code_Index_Note_Mini **table;
    i32 size;
};

struct Code_Index_File{
    Code_Index_Nest_List nest_list;
    Code_Index_Nest_Ptr_Array nest_array;
    Code_Index_Note_List note_list;
    Code_Index_Note_Ptr_Array note_array;
    Code_Index_Table note_table; /* <--- this */
    Buffer_ID buffer;
};


Table construction and lookup:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
function Code_Index_Table
code_index_note_table_from_array(Arena *arena, Code_Index_Note_Ptr_Array note_array)
{
    Code_Index_Table result = {};
    
    result.size = 1000;
    result.table = push_array_zero(arena, Code_Index_Note_Mini*, result.size);
    
    for (i32 i = 0; i < note_array.count; ++i) {
        struct Code_Index_Note_Mini *mini_note = push_array(arena, Code_Index_Note_Mini, 1);
        
        mini_note->hash = _djb2(note_array.ptrs[i]->text);
        mini_note->note_kind = note_array.ptrs[i]->note_kind;
        mini_note->next = 0;
        
        i32 bucket = mini_note->hash % result.size;
        if (result.table[bucket] != 0) {
            mini_note->next = result.table[bucket];
        }
        result.table[bucket] = mini_note;
    }
    
    return(result);
}

function Code_Index_Note_Kind
code_index_table_lookup(Code_Index_Table table, u64 hash)
{
    i32 bucket = hash % table.size;
    
    if (table.table[bucket] == 0) {
        return(CodeIndexNote_NONE);
    } 
    
    Code_Index_Note_Mini *note = table.table[bucket];
    
    while (note) {
        if (note->hash == hash) {
            return(note->note_kind);
        }
        note = note->next;
    }
    
    return(CodeIndexNote_NONE);
}


The construction routine is called at the end of generic_parse_full_input_breaks:
1
2
3
4
5
6
...
if (result){
        index->nest_array = code_index_nest_ptr_array_from_list(state->arena, &index->nest_list);
        index->note_array = code_index_note_ptr_array_from_list(state->arena, &index->note_list);
        index->note_table = code_index_note_table_from_array(state->arena, index->note_array);  /* <--- this */
}


Updated draw_cpp_token_colors with the lookup:
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
function void
draw_cpp_token_colors(Application_Links *app, Text_Layout_ID text_layout_id, Token_Array *array, Buffer_ID current_buffer){
    Range_i64 visible_range = text_layout_get_visible_range(app, text_layout_id);
    i64 first_index = token_index_from_pos(array, visible_range.first);
    Token_Iterator_Array it = token_iterator_index(0, array, first_index);
    for (;;){
        Token *token = token_it_read(&it);
        if (token->pos >= visible_range.one_past_last){
            break;
        }
        
        String_Const_u8 token_text = push_token_lexeme(app, &global_code_index.node_arena, current_buffer, token);
        u64 hash = _djb2(token_text);
        bool found = false;
        
        for (Buffer_ID buffer = get_buffer_next(app, 0, Access_Always);
             !found && buffer != 0;
             buffer = get_buffer_next(app, buffer, Access_Always))
        {
            Code_Index_File *file = code_index_get_file(buffer);
            if (file != 0) {            
                Code_Index_Note_Kind note_kind = code_index_table_lookup(file->note_table, hash);
                switch (note_kind) {
                    case CodeIndexNote_Function: {
                        token->kind = TokenBaseKind_UserDefinedFunction;
                        found = true;
                        break;
                    };
                    
                    case CodeIndexNote_Macro: {
                        token->kind = TokenBaseKind_UserDefinedMacro;
                        found = true;
                        break;
                    };
                    
                    case CodeIndexNote_Type: {
                        token->kind = TokenBaseKind_UserDefinedType;
                        found = true;
                        break;
                    }
                }
            }
        }
        
        FColor color = get_token_color_cpp(*token);
        ARGB_Color argb = fcolor_resolve(color);
        paint_text_color(app, text_layout_id, Ii64_size(token->pos, token->size), argb);
        if (!token_it_inc_all(&it)){
            break;
        }
    }
}


You also need to add a few items to the Token_Base_Kind enum, as well as register new colors in 4coder_default_colors.h, but that's not worth posting imo (as if everything above is..)
Jesse
64 posts

Programmer at NASA GSFC

Primitive code aware syntax highlighting
1 week, 6 days ago Edited by Jesse on Feb. 5, 2020, 7:06 p.m.
Nice!

If performance starts to become troublesome, I wonder if making
1
Code_Index_Note_Mini
more cache friendly might grant the largest gain for the time investment.

The meaning of life is to dance and sing while the music is being played.
Pascal Beyer
6 posts
Primitive code aware syntax highlighting
1 week, 4 days ago
Really cool thank you for this :)