/* FRESHNEWS Freshnews is a wrapper for fetchnews. Freshnews decides *when* fetchnews should run Purpose: to reduce the load of running a private news server such as leafnode on both the local and the remote systems, while at the same time, *improving* the quality of the newsfeed. Method: Retain knowledge of the age of the newsfeed. Examine the local system to determine the need to run fetchnews based on both outgoing posts and the use or potential use of the news spool. Why not put this in Leafnode? It is not leafnode's job to make complex decisions about system load. Some of the things freshnews does are very Linux-specific. Leafnode achieves a higher degree of portability by specifically not using such OS specific techniques. */ /* COPYRIGHT AND LICENSE This program is: (C) COPYRIGHT 2000 OISIN CURTIN, MONTREAL, ALL RIGHTS RESERVED License to use this program is hereby granted to the public. This program is provided As-Is, without warranty of any kind, including but not limited to warraty of fitness for any task. The author accepts no responsibility for any problem even if caused by this program. Use it freely, but use it at your own risk. This is not a GNU program. */ #include "stdio.h" #include "stdlib.h" #include "string.h" #include "time.h" #include "sys/stat.h" #include "unistd.h" #include "dirent.h" #include "linux/unistd.h" #include "sys/types.h" #include "fcntl.h" #include "syslog.h" #define SECOND 1 // using time() based time. #define MINUTE (60 * SECOND) #define HOUR (MINUTE * 60) /* ******************************************************************* */ /* ********************** CONFIGURE ME HERE ********************** */ /* ************* ************* */ bool debug = false; // messages to stdout about how each decision is made bool log_fetch = true; // one message to syslog when running news bool dmode = true; // deamon: // false -> check once and exit, (useless, really) // true -> run continuously static char *dir = "/var/spool/news"; // base directory of leafnode #define CYCLE_MIN (MINUTE) // how often to test in daemon mode #define CYCLE_READ (MINUTE * 10) // time while reading news localy #define CYCLE_NET (MINUTE * 15) // time if reading news remotely #define CYCLE_NETMAX (MINUTE * 30) // when to ignore old net reads #define CYCLE_LOGIN (MINUTE * 40) // preparation before reading happens #define CYCLE_MAX (HOUR * 30) // longest delay ever: vacation? #define FETCHNEWS "/usr/local/sbin/fetchnews" // actual command /* ************* ************* */ /* ********************** end configuration ********************** */ /* ******************************************************************* */ #define D(cmd) {if(debug){cmd;}} // debug aid void bailout( int line ) { fprintf(stderr,"It was a fatal error on line %d, I'm quitting!\n",line); exit(1); } // chkTime -- return the last time a file or directory was touched // #ifndef NAME_MAX # define NAME_MAX 1024 // overkill, but the OS should have define it. #endif #define max(a,b) (((a)>(b)) ? (a) : (b)) // // name -- name of the file // mac -- which of modified/accessed/created time to examine // 0 = modified, 1 = accessed, 2 = created, 3 = check all // time_t chkTime( char* name, int mac ) { struct stat st; if( stat( name, &st) ){ fprintf(stderr,"the name is <%s>, the first char %d.\n", name, *name ); perror( name ); bailout(__LINE__); } // Different systems deal with a/m/ctime in different ways, // so I'll take the most recent of the three to minimize // portability issues. Of course, this will have to be // tuned if any OS still records the time of a stat-probe. // time_t ntime; switch( mac ){ case 0: ntime = st.st_mtime; break; case 1: ntime = st.st_atime; break; case 2: ntime = st.st_ctime; break; default: ntime = max( st.st_mtime, max( st.st_atime, st.st_ctime) ); break; } D( printf("%s chkTime(%d) is %s", name, mac, ctime(&ntime)) ); return ntime; } // dirTime -- return the time the most recent file in a directory // // name -- pathname of the directory // mac -- which of modified/accessed/created time to examine // 0 = modified, 1 = accessed, 2 = created, 3 = check all // time_t dirTime( char* name, int mac ) { struct dirent *de; int lenname = strlen(name); char *pathname = (char*)calloc( lenname + NAME_MAX + 2, 1 ); if( ! pathname ){ fputs("ERROR:freshnews: Out of memory.\n", stderr); bailout(__LINE__); } strcpy(pathname, name); char* putname = pathname+lenname; strcat(putname, "/"); ++putname; // pointer to the spot to add the filename being examined DIR* h = opendir(name); if( ! h ){ perror(name); free(pathname); return 0; // just assume nothing is found in the directory. } time_t retval = 0; bool save_debug = debug; debug = false; // I don't need a dump of the directory listing! while( de = readdir(h) ){ if( '.' == de->d_name[0] ) continue; strncpy(putname, de->d_name, NAME_MAX); time_t pathtime = chkTime(pathname, mac); if( pathtime > retval ) retval = pathtime; } debug = save_debug; D( printf("%s dirTime(%d) is %s", name, mac, retval ? ctime(&retval) : "--none--\n") ); free(pathname); closedir(h); return retval; } int main( int argc, char** argv ) { nice( 19 ); // seconds don't matter, so make this process very nice. // Security? I don't think so. I check if running as root or news // just to save the user wondering why news is never fetched. // // TODO: check how portable "LOGNAME" is. I think some systems/shells // may prefer to use "USER" or even something else. // if( getuid() && strcmp( "news", getenv("LOGNAME")) ){ fputs("ERROR:freshnews: only `root' or `news' may run this.\n", stderr); exit(1); } // TODO: evaluate command-line arguments. I'm hard-coding the first cut. chdir( dir ); // go there and use relative pathnames // TODO: establish a process lock, 'cause you can never be two fresh. if( dmode ) srand( (unsigned)time(0) ); // we'll use this to jiggle the time static time_t timeUserFound = time(0); time_t lasttime = 0; // time of last fetch (hangs around in daemon mode) time_t reader = 0; // remembers when queue was last accessed time_t fetchcycle; // time between fetches. do{ // at least once, until not a daemon char* cycle = "max"; fetchcycle = CYCLE_MAX; // max: during vacation, once a day. time_t degrade = time(0) - timeUserFound; degrade = (CYCLE_LOGIN + degrade) / 2; if( (degrade >= CYCLE_LOGIN) && (degrade < fetchcycle) ){ fetchcycle = degrade; cycle = "idle"; } // determine number of seconds since the last fetch. // As a daemon, we can remember the time, else check the // timestamp on the leaf.node directory. // if( ! lasttime ) lasttime = chkTime( "leaf.node", 3 ); // TODO: what about inn ? time_t sincefetch = 31999; // assume many seconds if we don't know if( lasttime ) sincefetch = time(0) - lasttime; // but we should always know! D( time_t now=time(0);printf("---------- %ssince %ld\n", ctime(&now), sincefetch) ); // Send outgoing messages ASAP. But don't get hung up on bugs, like // the user could try to post to a group that has been closed. // This happened to me the very first time I tried to analyze the // outgoing queue :-/ // There's a silver lining, now I'm aware that of the problem and // have designed a way around it. // if( fetchcycle > sincefetch ){ // evaluate outgoing queue. Create a time-limit of 1/2 the age // of the newest outgoing post. Reduce fetchcycle to this value. time_t sendme = dirTime( "out.going", 0 ); // TODO: make portable if( sendme ){ // got zero if no files waiting to go time_t waiting = (time(0) - sendme) / 2; if( waiting < fetchcycle ){ fetchcycle = waiting; cycle = "post"; D( printf("post to go: cycle set to %ld.\n", waiting) ); } } } #define ShouldCheck(Cycle) ((fetchcycle > sincefetch) && (sincefetch > Cycle)) #define min(a,b) (((a)<(b)) ? (a) : (b) ) if( ShouldCheck( min(CYCLE_READ, CYCLE_LOGIN)) ){ // TODO: figure out the "right" way to test for a user being here if( 0 == system( "who 2>/dev/null|grep . &>/dev/null") ){ if( fetchcycle > (CYCLE_LOGIN) ){ fetchcycle = CYCLE_LOGIN; cycle = "login"; D( printf("User online: cycle set to 2400.\n") ); } timeUserFound = time(0); } } if( ShouldCheck( CYCLE_NET) ){ // // figure out if the queue is being accessed remotely. // Ignore it if the access was more than NETMAX seconds ago. // Reduce fetchcycle to (10 min + 1/4 time since last access) // (note that local users trigger this if using a program which // /incidentally/ reads news, but is likely doing something // else. I'm speaking of netscape, of course. // Users of local pure-newsreaders (ex: slrn, tin) can be given // better service because we can know it is a newsreader. // reader = dirTime( "interesting.groups", 3 ); if( reader ){ // zero returned if no groups are active time_t sinceread = time(0) - reader; if( sinceread <= CYCLE_NETMAX ){ timeUserFound = time(0); // Activate idle time fetchcycle = CYCLE_NET; cycle = "net"; D( printf("Remote reader: cycle set to 900.\n") ); } } } // if any user is using a known newsreader (slrn, etc) on // this system, reduce fetchcycle to 10 min. Consider // netscape to be remote use of a newsreader because it // remotely resembles one. (Ok, 'cause it might be browsing) // if( ShouldCheck(CYCLE_READ) && (timeUserFound > (time(0) - 30)) ){ // LINUX SPECIFIC // This test for newsreaders takes advantage of // Linux' "/proc" filesystem. Rather than suffer all the overhead // of running (or duplicating) ps, I just check the names of the // processes registered in /proc. It's not infallable, but it // should be reasonably accurrate. // // TODO: Instead of loading bash and grep, run the test interenally // static char cmd[] = "grep -h ^Name /proc/[1-9]*/status 2>/dev/null|" "grep '\t\\(slrn\\)\\|\\(rn\\)\\|\\(tin\\)$' &>/dev/null"; int s = system(cmd); if( 0 == s ){ fetchcycle = CYCLE_READ; cycle = "read"; D( printf("Someone reading, cycle set to 600.\n") ); } } sincefetch = time(0) - lasttime; // allow for time in who or grep if( fetchcycle <= sincefetch ){ // TODO: recover a "fetchnews" command line from "freshnews -- cmd" // if none provided, just use "fetchnews" // Logging isn't working. // Probably the system doesn't think this is important? // if( log_fetch ){ openlog( "freshnews", LOG_PID, LOG_NEWS ); syslog( LOG_INFO, "%s time, %d since last %s", cycle, (sincefetch + 30) / 60, FETCHNEWS ); closelog(); // Workaround for logging: nohup.out. Yeah, grungy, but // this is pre-alpha and it works for me. // time_t now = time(0); printf( "%3d since last, %5s time. %s", (sincefetch +30) / 60, cycle, ctime(&now) ); } fflush(stdout); // force nohup.out to be updated system( FETCHNEWS ); lasttime = time(0); // take the time after fetch completed } // As a daemon, make sure that access to the news server is not // clumped. For example, with everyone running out of cron with // clocks sync'd with an atomic clock, cablemodem news servers // will have a peak load in the first 10 seconds of the minute. // In theory, including the execution time of a fetch will // randomize us, but as machines and connection speeds increase, // this may be a faint hope. Let's do it explicitly using the // pseudo-random delay generator. // if( dmode ) sleep( CYCLE_MIN + rand() % 5); }while( dmode ); return 0; // exit if running once from cron }