cc65 / cc65

cc65 - a freeware C compiler for 6502 based systems

Home Page:https://cc65.github.io

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Compiler produces invalid code

mrdudz opened this issue · comments

I came back to a project of mine and noticed cc65 produces code that does not assemble (branch too far). Bisecting shows this happened after commit d8a3938

@colinleroy would you have a look please? It looks like the optimizer replaces some JNEs by BNEs when it shouldn't do that

Attached is a somewhat stripped down test case

cl65 --add-source -Osir --codesize 180 -Wc --local-strings -t none -o bug.prg bug.c
#include <string.h>

typedef unsigned char FRESULT;
typedef unsigned char	BYTE;
typedef unsigned long	DWORD;
typedef unsigned short	WORD;

typedef BYTE	DRESULT;

typedef struct _FATFS_ {
	BYTE	fs_type;	/*!< FAT sub type */
	BYTE	csize;		/*!< Number of sectors per cluster */
	BYTE	n_fats;		/*!< Number of FAT copies */

	BYTE	wflag;		/*!< win[] dirty flag (1:must be written back) */
	BYTE	fsi_flag;	/*!< fsinfo dirty flag (1:must be written back) */
	WORD	n_rootdir;	/*!< Number of root directory entries (0 on FAT32) */
	DWORD	last_clust;	/*!< Last allocated cluster */
	DWORD	free_clust;	/*!< Number of free clusters */
	DWORD	fsi_sector;	/*!< fsinfo sector */
	DWORD	cdir;		/*!< Current directory (0:root)*/
	DWORD	sects_fat;	/*!< Sectors per fat */
	DWORD	max_clust;	/*!< Maximum cluster# + 1. Number of clusters is max_clust - 2 */
	DWORD	fatbase;	/*!< FAT start sector */
	DWORD	dirbase;	/*!< Root directory start sector (Cluster# on FAT32) */
	DWORD	database;	/*!< Data start sector */
	DWORD	winsect;	/*!< Current sector appearing in the win[] */
} FATFS;

FATFS FatFsObj; /* actually extern, but both produces the error */
#pragma zpsym ("FatFsObj")          /* this is the problem */

#define FR_OK 0
#define FS_FAT32	3
#define BS_55AA			510

#define	FSI_LeadSig		0
#define	FSI_StrucSig		484
#define	FSI_Free_Count		488
#define	FSI_Nxt_Free		492

BYTE FatFsObjwin[512];

#define	ST_WORD(ptr,val)	*(WORD*)(BYTE*)(ptr)=(WORD)(val)
#define	ST_DWORD(ptr,val)	*(DWORD*)(BYTE*)(ptr)=(DWORD)(val)

DRESULT __fastcall__ disk_write_win (DWORD sector)
{
    return 0;
}

FRESULT __fastcall__ move_window (DWORD sector)
{
    return 0;
}

FRESULT __fastcall__ sync (void)
{
	FRESULT res;

	res = move_window(0);
	if (res == FR_OK) {
		if ((FatFsObj.fs_type == FS_FAT32) && FatFsObj.fsi_flag) {
			FatFsObj.winsect = 0;
			memset(FatFsObjwin, 0, 512);
			ST_WORD(FatFsObjwin+BS_55AA, 0xAA55);
			ST_DWORD(FatFsObjwin+FSI_LeadSig, 0x41615252);
			ST_DWORD(FatFsObjwin+FSI_StrucSig, 0x61417272);
			ST_DWORD(FatFsObjwin+FSI_Free_Count, FatFsObj.free_clust);
			ST_DWORD(FatFsObjwin+FSI_Nxt_Free, FatFsObj.last_clust);
			disk_write_win(FatFsObj.fsi_sector);
			FatFsObj.fsi_flag = 0;
		}
	}

	return res;
}

int main (void)
{
    sync();
    return 0;
}

😮 I admit I have no idea how coptlong.c has an influence over bnes/jnes... But I'll look into it!

I found it a but odd that removing the pragma "fixes" it. That hints at "longs in zeropage" being the problem somehow

It was - the distance calculated by coptjmp.c was wrong because I didn't copy the relevant field CodeEntry field - size.

It's not completely fixed. here is the test (please include with the PR)

bug2357-optimize-long.c:


/* bug #2357 - Compiler produces invalid code

   regression introduced in d8a3938, fixed in
*/

typedef struct {
    unsigned char    somechar1;
    unsigned char    somechar2;
    unsigned long    somelong1;
    unsigned long    somelong2;
    unsigned long    somelong3;
    unsigned long    somelong4;
} ZPSTRUCT;

ZPSTRUCT zpstruct;
#pragma zpsym ("zpstruct")          /* this is the problem */

unsigned char chararray[512];

#define    ST_L(ptr,val)    *(unsigned long*)(unsigned char*)(ptr)=(unsigned long)(val)

unsigned char __fastcall__ foo (unsigned char res)
{
    if (res == 0) {
        if ((zpstruct.somechar1 == 3) && zpstruct.somechar2)
        {
            ST_L(chararray+10, 0x41615252);
            ST_L(chararray+20, 0x41615252);
            ST_L(chararray+484, 0x61417272);
            ST_L(chararray+488, zpstruct.somelong2);
            ST_L(chararray+492, zpstruct.somelong1);
            ST_L(chararray+492, zpstruct.somelong1);
        }
    }
    return 0;
}

int main (void)
{
    foo(42);
    return 0;
}

Hi!

So in fact it's not a problem with the long optimisation, but with the wrong use of #pragma zpsym here.
This code demonstrates it without the use of longs, breaking in the same way before and after d8a3938 :

/* bug #2357 - Compiler produces invalid code when we lie to it ;)
*/

int test;
#pragma zpsym ("test")          /* this is the problem */

unsigned char chararray[512];

void forget_a(void) {
  
}
unsigned char __fastcall__ foo (unsigned char res)
{
    if (res == 0) {
      /* 8 bytes if test in ZP, 9 otherwise 
       * => the distance checker in coptjmp.c 
       * will count as if test was in ZP because that's
       * what we told it. The real distance at link
       * time will be higher because test is NOT in
       * ZP.
       * 15 calls * 8 = 120 bytes => bne
       * 15 calls * 9 = 135 bytes => jne
       */
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
      chararray[1]=test; forget_a();
    }
    return 0;
}

int main (void)
{
    foo(42);
    return 0;
}

What you want is, instead of

int test;
#pragma zpsym ("test")          /* this is the problem */

Something like

#pragma data-name(push, "ZEROPAGE", "zp")
#pragma bss-name(push, "ZEROPAGE", "zp")
int test;
#pragma data-name(pop)
#pragma bss-name(pop)
#pragma zpsym ("test")  /* this is now true */

I've pushed a new PR because the not-updating of the CodeEntry size still was a problem, but without a test, because there won't be a way to make your code work as-is when the pragma-zpsym'd variable is not really in ZP ;)

because there won't be a way to make your code work as-is when the pragma-zpsym'd variable is not really in ZP ;)

Oh it actually IS in zeropage (in my project, not in that test)

Will have a look now and produce a new test eventually.....

BUT i think the compiler should still not (and never) produce invalid code - i don't know how to achive that right now though :)

For this particular thing, where the compiler generates code as it has been asked for, but the linker does what it can with the reality, the only thing I can think of is to change that zp/absolute warning to an error

I am really baffled - i can not reproduce the problem (except when compiling my project). Moved the variable to a seperate file, with the zp pragmas and all, and it just works. WTH

One thing i noticed: I am getting the error(s) when compiling - not when linking (cl65 -Osir --codesize 180 -Wc --local-strings -t none -c ...). And i am NOT getting any warnings about zpsym (so the header/pragmas should be correct)

Do you mean with or without my PR #2361 applied?

cl65 does three things (strace -f cl65 --add-source -Osir --codesize 180 -Wc --local-strings -t none -o bug.prg test.c 2>&1|grep exec):

"cc65", "-T", "-Osir", "--codesize", "180", "--local-strings", "-t", "none", "test.c"
"ca65", "-t", "none", "test.s"
"ld65", "-o", "bug.prg", "-t", "none", "test.o", "none.lib"

cc65 only outputs a .s file (that's the step I was referring to when I mentioned "compiling")
ca65 assembles all the files and outputs .o files IIRC
ld65 links them and outputs the binary

Is your project public? I could give it a try.

Do you mean with or without my PR #2361 applied?

without, of course

cl65 does three things

note the -c above - so it compiles and assembles (which produces the error) but does not link (that happens much later)

Is your project public?

No :) This is the FAT library used in the menu system of the Turbo-Chameleon from ICOMP.

Mmmmh and apparently it still breaks with your patch too. Args

So it breaks with master and my last patch.
Does it not break with 47e7ed2 right before I introduced long assignment/copy optimisation ?

Mmmmh and apparently it still breaks with your patch too. Args

That was my fault, i screwed it up and had a compiled .s from a previous quick test in the wrong directory (and my makefile picked it up and didnt even recompile). It DOES work with your patch (pfew).

The problem is still creating a test that breaks without and works with your patch :)

I think it's hard to reproduce because the long assign optimisation replaces:

0.      lda     #IMM  
1.      sta     sreg+1 
2.      lda     #IMM  
3.      sta     sreg     
4.      lda     #IMM  
5.      ldx     #IMM  
6.      sta     YYY      
7.      stx     YYY+1  
8.      ldy     sreg     
9.      sty     YYY+2    
10.      ldy     sreg+1
11.     sty     YYY+3   
** and simplify, if not used right after and no branching occurs, to
**      lda    XXX+3
**      sta    YYY+3
**      lda    XXX+2
**      sta    YYY+2
**      ldx    XXX
**      lda    XXX+1
**      sta    YYY
**      stx    YYY+1

And it did so by copying the arg from line 11 to line 1, and the one from line 9 to line 3, then removing the four last lines.
It did not copy the instruction size, so line 1 and 3 still had a size of 2 (STA $ZP) instead of their new real size of 3 (STA $xxxx - absolute), which did offset the branches calculation that forgot +2 bytes.
But it removed the four last lines, removing 10 bytes. So it should not make branches longer, and so that shouldn't be a problem. maybe a later step inlines some subroutine?

Could you try without -i ?
Or maybe diff -u bug.s working.s ?

No forget it that's not it. The removed ten bytes are accounted for, and that's what make coptjmp think Distance <= 125. In reality it's probably 128 or 129. Let me check again if I can trigger it reliably.

The only difference between working and non working is/was, that after the second IF there are two BNEs (non working) instead of JNEs (working)

(unfortunately even with your patch, the code still blows up - as in compiles but does not work. will have to bisect that one now... lets see what commit breaks THAT)

Leaving this open until we have a test.... this is really annoying ;/

Leaving this open until we have a test.... this is really annoying ;/

Got it! :)

I think the zeropage thing was just a misleader in fact, the only problem was counting branch distance with instructions having the wrong size. It went away with a variable correctly set in ZP because then the calculation was not off anymore.

nice one, thanks!