Source file: /~heha/hs/dos/doslfn/doslfn.zip/MK_TABLE.C

#include <stdio.h>
#include <string.h>
	// A second command-line option (value ignored) is
	// to create GB instead of GBK from cp936.txt,
	// ignore all lead and trail bytes below 0xA1
	// ONLY GIVE WHEN COMPILING CP936.TXT!
	// Maybe, the same "shortcut" works at Korean and Taiwan files too.
/* This program converts ASCII code tables provided at www.unicode.org
 * to binary Unicode tables usable for (Volkov Commander and) DOSLFN.
 * These ASCII code tables are at
 * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/
 *
 * To use this program, give the text file name, e.g.
 * mk_table cp737.txt
 * That will produce a file named "cp737uni.tbl".
 *
 * Compiled and linked with Borland C++ 3.1, but should work
 * with any compiler (however, target must have Intel byte order).
 *
 * New version with DBCS support. But without "disk full" check!
 * Format of DBCS .TBL file: see TBL.TXT
 * h#s 01/03
 */
typedef unsigned short wchar_t;	// Bug in BC - wchar_t is defined as a char!
char dbcs=0;			// DBCS flag
unsigned char minmaxtrail[2];	// minimum and maximum trail byte
wchar_t sb_uni[0x80];		// single-byte unicodes, or lead indices
wchar_t db_uni[0x80][0xC0];	// double-byte unicodes, size: 48K

int main(int argc,char**argv) {
 FILE *f;
 unsigned int i,db_index=0;
 unsigned int cp;
 char line[256],cpname[64];

 if (argc<2) {
  fputs("You must provide an input file name!\n",stderr);
  fputs("(Optionally any switch to ignore all DBCS lead and trail bytes below 0xA1)\n",
    stderr);
  return 3;
 }
 f=fopen(argv[1],"r");
 if (!f) {
  fprintf(stderr,"Cannot open input file %s!\n",argv[1]);
  return 3;
 }
 sscanf(argv[1],"%*[^0-9]%d",&cp);

 cpname[0]=0;

 memset(sb_uni,0xFF,sizeof(sb_uni));
 minmaxtrail[0]=0xFF; minmaxtrail[1]=0x00;
 memset(db_uni,0xFF,sizeof(db_uni));

 while (fgets(line,sizeof(line),f)) {
  wchar_t j,c;
  unsigned char lead,trail;
  unsigned int index;
  if (line[0]=='#') {
  /* for C learners, please look at that sscanf() template string,
   * and check out your documentation what "%*[^_]" does.
   * Therefore, a lesson about scanf should take two weeks(!!)
   * (including memory models), for printf another two weeks.
   */
   if (!cpname[0]) sscanf(line,"%*[^_]_DOS%s ",cpname);
   continue;
  }
  c=0xFFFF;		/* if there is an unused entry (e.g. Turkish) */
  if (sscanf(line,"%X %X",&j,&c)<1) continue;
  if (j<0x80) {
   if (c!=j) printf("Code 0x%02X is not ASCII, it's 0x%02X\n",j,c);
  }else if (j<0x100){
   if (sscanf(line,"%*[^#]#DBCS LEAD BYTE%c",&c)) {
    c=0;
    if (!dbcs) {
     puts("MESSAGE: Input file is DBCS");
     dbcs=1;
    }
   }
   sb_uni[j-0x80]=c;
  }else{
   if (!dbcs) {
    puts("MESSAGE: Input file is DBCS");
    dbcs=1;
   }
   lead=j>>8; trail=j&0xFF;
   if (lead<0x80) {
    printf("ERROR: Lead byte is 0x%02X, below 80h, cannot handle!\n",lead);
    continue;
   }
   if (trail<0x40) {
    printf("ERROR: trail byte is 0x%02X, below 40h, cannot handle!\n",trail);
    continue;
   }
   if (argc>=3) {
    if (lead <0xA1) continue;
    if (trail<0xA1) continue;
   }
   if (minmaxtrail[0]>trail) minmaxtrail[0]=trail;
   if (minmaxtrail[1]<trail) minmaxtrail[1]=trail;
   index=sb_uni[lead-0x80];	// should be 0 at first
   if (index==0xFFFF) {
    printf("MESSAGE: lead byte 0x%02X not declared, will use it automatically\n",lead);
    index=0;
   }else if (index>=0x80) {
    printf("ERROR: lead byte 0x%02X is already assigned to U+0x%04X!\n",lead,index);
    continue;
   }
   if (!index) {
    index=++db_index;
    sb_uni[lead-0x80]=index;
   }
   db_uni[index-1][trail-0x40]=c;
  }
 }
 fclose(f);

 if (dbcs) {
  printf("DBCS Summary: lead bytes used=%d, trail byte range=0x%02X..0x%02X\n",
    db_index,minmaxtrail[0],minmaxtrail[1]);
  minmaxtrail[1]-=minmaxtrail[0]-1;	// calculate length
 }
 if (!cpname[0]) {
  puts("Missing header (should contain _DOSlanguage)!");
  printf("Give a short name: "); scanf("%15s",cpname);
  if (!cpname[0]) return 1;
 }
 sprintf(line,"%05duni.tbl",cp);
 if (cp<10000) line[0]='c';
 if (cp<1000)  line[1]='p';
 f=fopen(line,"wb");
 if (!f) {
  printf("ERROR: Cannot create output file %s!\n",line);
  return 3;
 }
 fprintf(f,"Unicode (%s)\r\n%c",cpname,dbcs+1);
 if (dbcs) {
  fwrite(minmaxtrail,2,1,f);
 }
 fwrite(sb_uni,sizeof(sb_uni),1,f);
 if (dbcs) {
  for(i=0;i<db_index;i++) {
   fwrite(db_uni[i]+(minmaxtrail[0]-0x40),
     minmaxtrail[1],2,f);
  }
 }
 if (fclose(f)) {
  puts("ERROR: Cannot close file!");
  return 3;
 }
 return 0;
}
Detected encoding: ASCII (7 bit)2