add st-1.9 to research

winlin
Commit 7f4c113e57594f4e8c0cea60b6a8ab82f50f62aa 7f4c113e 1 parent ffabcec1
trunk/research/st-1.9/Makefile
trunk/research/st-1.9/README
trunk/research/st-1.9/common.h
trunk/research/st-1.9/docs/fig.gif
trunk/research/st-1.9/docs/notes.html
trunk/research/st-1.9/docs/reference.html
trunk/research/st-1.9/docs/st.html
trunk/research/st-1.9/docs/timeout_heap.txt
trunk/research/st-1.9/event.c
trunk/research/st-1.9/examples/Makefile
trunk/research/st-1.9/examples/README
trunk/research/st-1.9/examples/error.c
trunk/research/st-1.9/examples/lookupdns.c
trunk/research/st-1.9/examples/proxy.c
--- a/trunk/research/st-1.9/Makefile 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/Makefile 0 → 100644
查看文件 @7f4c113
+# The contents of this file are subject to the Mozilla Public
+# License Version 1.1 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of
+# the License at http://www.mozilla.org/MPL/
+# 
+# Software distributed under the License is distributed on an "AS
+# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# rights and limitations under the License.
+# 
+# The Original Code is the Netscape Portable Runtime library.
+# 
+# The Initial Developer of the Original Code is Netscape
+# Communications Corporation.  Portions created by Netscape are 
+# Copyright (C) 1994-2000 Netscape Communications Corporation.  All
+# Rights Reserved.
+# 
+# Contributor(s):  Silicon Graphics, Inc.
+# 
+# Portions created by SGI are Copyright (C) 2000-2001 Silicon
+# Graphics, Inc.  All Rights Reserved.
+# 
+# Alternatively, the contents of this file may be used under the
+# terms of the GNU General Public License Version 2 or later (the
+# "GPL"), in which case the provisions of the GPL are applicable 
+# instead of those above.  If you wish to allow use of your 
+# version of this file only under the terms of the GPL and not to
+# allow others to use your version of this file under the MPL,
+# indicate your decision by deleting the provisions above and
+# replace them with the notice and other provisions required by
+# the GPL.  If you do not delete the provisions above, a recipient
+# may use your version of this file under either the MPL or the
+# GPL.
+
+# This is the full version of the libst library - modify carefully
+VERSION     = 1.9
+
+##########################
+# Supported OSes:
+#
+#OS         = AIX
+#OS         = CYGWIN
+#OS         = DARWIN
+#OS         = FREEBSD
+#OS         = HPUX
+#OS         = HPUX_64
+#OS         = IRIX
+#OS         = IRIX_64
+#OS         = LINUX
+#OS         = NETBSD
+#OS         = OPENBSD
+#OS         = OSF1
+#OS         = SOLARIS
+#OS         = SOLARIS_64
+
+# Please see the "Other possible defines" section below for
+# possible compilation options.
+##########################
+
+CC          = cc
+AR          = ar
+LD          = ld
+RANLIB      = ranlib
+LN          = ln
+
+SHELL       = /bin/sh
+ECHO        = /bin/echo
+
+BUILD       = DBG
+TARGETDIR   = $(OS)_$(shell uname -r)_$(BUILD)
+
+DEFINES     = -D$(OS)
+CFLAGS      =
+SFLAGS      =
+ARFLAGS     = -rv
+LNFLAGS     = -s
+DSO_SUFFIX  = so
+
+MAJOR       = $(shell echo $(VERSION) | sed 's/^\([^\.]*\).*/\1/')
+DESC        = st.pc
+
+##########################
+# Platform section.
+# Possible targets:
+
+TARGETS     = aix-debug aix-optimized               \
+              cygwin-debug cygwin-optimized         \
+              darwin-debug darwin-optimized         \
+              freebsd-debug freebsd-optimized       \
+              hpux-debug hpux-optimized             \
+              hpux-64-debug hpux-64-optimized       \
+              irix-n32-debug irix-n32-optimized     \
+              irix-64-debug irix-64-optimized       \
+              linux-debug linux-optimized           \
+              netbsd-debug netbsd-optimized         \
+              openbsd-debug openbsd-optimized       \
+              osf1-debug osf1-optimized             \
+              solaris-debug solaris-optimized       \
+              solaris-64-debug solaris-64-optimized
+
+#
+# Platform specifics
+#
+
+ifeq ($(OS), AIX)
+AIX_VERSION = $(shell uname -v).$(shell uname -r)
+TARGETDIR   = $(OS)_$(AIX_VERSION)_$(BUILD)
+CC          = xlC
+STATIC_ONLY = yes
+ifeq ($(BUILD), OPT)
+OTHER_FLAGS = -w
+endif
+ifneq ($(filter-out 4.1 4.2, $(AIX_VERSION)),)
+DEFINES     += -DMD_HAVE_SOCKLEN_T
+endif
+endif
+
+ifeq ($(OS), CYGWIN)
+TARGETDIR   = $(OS)_$(BUILD)
+CC          = gcc
+LD          = gcc
+DSO_SUFFIX  = dll
+SLIBRARY    = $(TARGETDIR)/libst.dll.a
+DLIBRARY    = $(TARGETDIR)/libst.dll
+DEF_FILE    = $(TARGETDIR)/libst.def
+LDFLAGS     = libst.def -shared --enable-auto-image-base -Wl,--output-def,$(DEF_FILE),--out-implib,$(SLIBRARY)
+OTHER_FLAGS = -Wall
+endif
+
+ifeq ($(OS), DARWIN)
+LD          = cc
+SFLAGS      = -fPIC -fno-common
+DSO_SUFFIX  = dylib
+RELEASE     = $(shell uname -r | cut -d. -f1)
+PPC         = $(shell test $(RELEASE) -le 9 && echo yes)
+INTEL       = $(shell test $(RELEASE) -ge 9 && echo yes)
+ifeq ($(PPC), yes)
+CFLAGS      += -arch ppc
+LDFLAGS     += -arch ppc
+endif
+ifeq ($(INTEL), yes)
+CFLAGS      += -arch i386 -arch x86_64
+LDFLAGS     += -arch i386 -arch x86_64
+endif
+LDFLAGS     += -dynamiclib -install_name /sw/lib/libst.$(MAJOR).$(DSO_SUFFIX) -compatibility_version $(MAJOR) -current_version $(VERSION)
+OTHER_FLAGS = -Wall
+endif
+
+ifeq ($(OS), FREEBSD)
+SFLAGS      = -fPIC
+LDFLAGS     = -shared -soname=$(SONAME) -lc
+OTHER_FLAGS = -Wall
+ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes)
+DEFINES     += -DMD_HAVE_KQUEUE
+endif
+endif
+
+ifeq (HPUX, $(findstring HPUX, $(OS)))
+ifeq ($(OS), HPUX_64)
+DEFINES     = -DHPUX
+CFLAGS      = -Ae +DD64 +Z
+else
+CFLAGS      = -Ae +DAportable +Z
+endif
+RANLIB      = true
+LDFLAGS     = -b
+DSO_SUFFIX  = sl
+endif
+
+ifeq (IRIX, $(findstring IRIX, $(OS)))
+ifeq ($(OS), IRIX_64)
+DEFINES     = -DIRIX
+ABIFLAG     = -64
+else
+ABIFLAG     = -n32
+endif
+RANLIB      = true
+CFLAGS      = $(ABIFLAG) -mips3
+LDFLAGS     = $(ABIFLAG) -shared
+OTHER_FLAGS = -fullwarn
+endif
+
+ifeq ($(OS), LINUX)
+EXTRA_OBJS  = $(TARGETDIR)/md.o
+SFLAGS      = -fPIC
+LDFLAGS     = -shared -soname=$(SONAME) -lc
+OTHER_FLAGS = -Wall
+ifeq ($(shell test -f /usr/include/sys/epoll.h && echo yes), yes)
+DEFINES     += -DMD_HAVE_EPOLL
+endif
+endif
+
+ifeq ($(OS), NETBSD)
+SFLAGS      = -fPIC
+LDFLAGS     = -shared -soname=$(SONAME) -lc
+OTHER_FLAGS = -Wall
+endif
+
+ifeq ($(OS), OPENBSD)
+SFLAGS      = -fPIC
+LDFLAGS     = -shared -soname=$(SONAME) -lc
+OTHER_FLAGS = -Wall
+ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes)
+DEFINES     += -DMD_HAVE_KQUEUE
+endif
+endif
+
+ifeq ($(OS), OSF1)
+RANLIB      = true
+LDFLAGS     = -shared -all -expect_unresolved "*"
+endif
+
+ifeq (SOLARIS, $(findstring SOLARIS, $(OS)))
+TARGETDIR   = $(OS)_$(shell uname -r | sed 's/^5/2/')_$(BUILD)
+CC          = gcc
+LD          = gcc
+RANLIB      = true
+LDFLAGS     = -G
+OTHER_FLAGS = -Wall
+ifeq ($(OS), SOLARIS_64)
+DEFINES     = -DSOLARIS
+CFLAGS     += -m64
+LDFLAGS    += -m64
+endif
+endif
+
+#
+# End of platform section.
+##########################
+
+
+ifeq ($(BUILD), OPT)
+OTHER_FLAGS += -O
+else
+OTHER_FLAGS += -g
+DEFINES     += -DDEBUG
+endif
+
+##########################
+# Other possible defines:
+# To use poll(2) instead of select(2) for events checking:
+# DEFINES += -DUSE_POLL
+# You may prefer to use select for applications that have many threads
+# using one file descriptor, and poll for applications that have many
+# different file descriptors.  With USE_POLL poll() is called with at
+# least one pollfd per I/O-blocked thread, so 1000 threads sharing one
+# descriptor will poll 1000 identical pollfds and select would be more
+# efficient.  But if the threads all use different descriptors poll()
+# may be better depending on your operating system's implementation of
+# poll and select.  Really, it's up to you.  Oh, and on some platforms
+# poll() fails with more than a few dozen descriptors.
+#
+# Some platforms allow to define FD_SETSIZE (if select() is used), e.g.:
+# DEFINES += -DFD_SETSIZE=4096
+#
+# To use malloc(3) instead of mmap(2) for stack allocation:
+# DEFINES += -DMALLOC_STACK
+#
+# To provision more than the default 16 thread-specific-data keys
+# (but not too many!):
+# DEFINES += -DST_KEYS_MAX=<n>
+#
+# To start with more than the default 64 initial pollfd slots
+# (but the table grows dynamically anyway):
+# DEFINES += -DST_MIN_POLLFDS_SIZE=<n>
+#
+# Note that you can also add these defines by specifying them as
+# make/gmake arguments (without editing this Makefile). For example:
+#
+# make EXTRA_CFLAGS=-DUSE_POLL <target>
+#
+# (replace make with gmake if needed).
+#
+# You can also modify the default selection of an alternative event
+# notification mechanism. E.g., to enable kqueue(2) support (if it's not
+# enabled by default):
+#
+# gmake EXTRA_CFLAGS=-DMD_HAVE_KQUEUE <target>
+#
+# or to disable default epoll(4) support:
+#
+# make EXTRA_CFLAGS=-UMD_HAVE_EPOLL <target>
+#
+##########################
+
+CFLAGS      += $(DEFINES) $(OTHER_FLAGS) $(EXTRA_CFLAGS)
+
+OBJS        = $(TARGETDIR)/sched.o \
+              $(TARGETDIR)/stk.o   \
+              $(TARGETDIR)/sync.o  \
+              $(TARGETDIR)/key.o   \
+              $(TARGETDIR)/io.o    \
+              $(TARGETDIR)/event.o
+OBJS        += $(EXTRA_OBJS)
+HEADER      = $(TARGETDIR)/st.h
+SLIBRARY    = $(TARGETDIR)/libst.a
+DLIBRARY    = $(TARGETDIR)/libst.$(DSO_SUFFIX).$(VERSION)
+EXAMPLES    = examples
+
+LINKNAME    = libst.$(DSO_SUFFIX)
+SONAME      = libst.$(DSO_SUFFIX).$(MAJOR)
+FULLNAME    = libst.$(DSO_SUFFIX).$(VERSION)
+
+ifeq ($(OS), CYGWIN)
+SONAME      = cygst.$(DSO_SUFFIX)
+SLIBRARY    = $(TARGETDIR)/libst.dll.a
+DLIBRARY    = $(TARGETDIR)/$(SONAME)
+LINKNAME    =
+# examples directory does not compile under cygwin
+EXAMPLES    =
+endif
+
+ifeq ($(OS), DARWIN)
+LINKNAME    = libst.$(DSO_SUFFIX)
+SONAME      = libst.$(MAJOR).$(DSO_SUFFIX)
+FULLNAME    = libst.$(VERSION).$(DSO_SUFFIX)
+endif
+
+ifeq ($(STATIC_ONLY), yes)
+LIBRARIES   = $(SLIBRARY)
+else
+LIBRARIES   = $(SLIBRARY) $(DLIBRARY)
+endif
+
+ifeq ($(OS),)
+ST_ALL      = unknown
+else
+ST_ALL      = $(TARGETDIR) $(LIBRARIES) $(HEADER) $(EXAMPLES) $(DESC)
+endif
+
+all: $(ST_ALL)
+
+unknown:
+	@echo
+	@echo "Please specify one of the following targets:"
+	@echo
+	@for target in $(TARGETS); do echo $$target; done
+	@echo
+
+st.pc:	st.pc.in
+	sed "s/@VERSION@/${VERSION}/g" < $< > $@
+
+$(TARGETDIR):
+	if [ ! -d $(TARGETDIR) ]; then mkdir $(TARGETDIR); fi
+
+$(SLIBRARY): $(OBJS)
+	$(AR) $(ARFLAGS) $@ $(OBJS)
+	$(RANLIB) $@
+	rm -f obj; $(LN) $(LNFLAGS) $(TARGETDIR) obj
+
+$(DLIBRARY): $(OBJS:%.o=%-pic.o)
+	$(LD) $(LDFLAGS) $^ -o $@
+	if test "$(LINKNAME)"; then                             \
+		cd $(TARGETDIR);				\
+		rm -f $(SONAME) $(LINKNAME);			\
+		$(LN) $(LNFLAGS) $(FULLNAME) $(SONAME);		\
+		$(LN) $(LNFLAGS) $(FULLNAME) $(LINKNAME);	\
+	fi
+
+$(HEADER): public.h
+	rm -f $@
+	cp public.h $@
+
+$(TARGETDIR)/md.o: md.S
+	$(CC) $(CFLAGS) -c $< -o $@
+
+$(TARGETDIR)/%.o: %.c common.h md.h
+	$(CC) $(CFLAGS) -c $< -o $@
+
+examples::
+	@echo Making $@
+	@cd $@; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" OS="$(OS)" TARGETDIR="$(TARGETDIR)"
+
+clean:
+	rm -rf *_OPT *_DBG obj st.pc
+
+##########################
+# Pattern rules:
+
+ifneq ($(SFLAGS),)
+# Compile with shared library options if it's a C file
+$(TARGETDIR)/%-pic.o: %.c common.h md.h
+	$(CC) $(CFLAGS) $(SFLAGS) -c $< -o $@
+endif
+
+# Compile assembly as normal or C as normal if no SFLAGS
+%-pic.o: %.o
+	rm -f $@; $(LN) $(LNFLAGS) $(<F) $@
+
+##########################
+# Target rules:
+
+default-debug:
+	. ./osguess.sh; $(MAKE) OS="$$OS" BUILD="DBG"
+default default-optimized:
+	. ./osguess.sh; $(MAKE) OS="$$OS" BUILD="OPT"
+
+aix-debug:
+	$(MAKE) OS="AIX" BUILD="DBG"
+aix-optimized:
+	$(MAKE) OS="AIX" BUILD="OPT"
+
+cygwin-debug:
+	$(MAKE) OS="CYGWIN" BUILD="DBG"
+cygwin-optimized:
+	$(MAKE) OS="CYGWIN" BUILD="OPT"
+
+darwin-debug:
+	$(MAKE) OS="DARWIN" BUILD="DBG"
+darwin-optimized:
+	$(MAKE) OS="DARWIN" BUILD="OPT"
+
+freebsd-debug:
+	$(MAKE) OS="FREEBSD" BUILD="DBG"
+freebsd-optimized:
+	$(MAKE) OS="FREEBSD" BUILD="OPT"
+
+hpux-debug:
+	$(MAKE) OS="HPUX" BUILD="DBG"
+hpux-optimized:
+	$(MAKE) OS="HPUX" BUILD="OPT"
+hpux-64-debug:
+	$(MAKE) OS="HPUX_64" BUILD="DBG"
+hpux-64-optimized:
+	$(MAKE) OS="HPUX_64" BUILD="OPT"
+
+irix-n32-debug:
+	$(MAKE) OS="IRIX" BUILD="DBG"
+irix-n32-optimized:
+	$(MAKE) OS="IRIX" BUILD="OPT"
+irix-64-debug:
+	$(MAKE) OS="IRIX_64" BUILD="DBG"
+irix-64-optimized:
+	$(MAKE) OS="IRIX_64" BUILD="OPT"
+
+linux-debug:
+	$(MAKE) OS="LINUX" BUILD="DBG"
+linux-optimized:
+	$(MAKE) OS="LINUX" BUILD="OPT"
+# compatibility
+linux-ia64-debug: linux-debug
+linux-ia64-optimized: linux-optimized
+
+netbsd-debug:
+	$(MAKE) OS="NETBSD" BUILD="DBG"
+netbsd-optimized:
+	$(MAKE) OS="NETBSD" BUILD="OPT"
+
+openbsd-debug:
+	$(MAKE) OS="OPENBSD" BUILD="DBG"
+openbsd-optimized:
+	$(MAKE) OS="OPENBSD" BUILD="OPT"
+
+osf1-debug:
+	$(MAKE) OS="OSF1" BUILD="DBG"
+osf1-optimized:
+	$(MAKE) OS="OSF1" BUILD="OPT"
+
+solaris-debug:
+	$(MAKE) OS="SOLARIS" BUILD="DBG"
+solaris-optimized:
+	$(MAKE) OS="SOLARIS" BUILD="OPT"
+solaris-64-debug:
+	$(MAKE) OS="SOLARIS_64" BUILD="DBG"
+solaris-64-optimized:
+	$(MAKE) OS="SOLARIS_64" BUILD="OPT"
+
+##########################
+
--- a/trunk/research/st-1.9/README 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/README 0 → 100644
查看文件 @7f4c113
+WELCOME!
+
+The State Threads Library is a small application library which provides
+a foundation for writing fast and highly scalable Internet applications
+(such as web servers, proxy servers, mail transfer agents, and so on,
+really any network-data-driven application) on UNIX-like platforms.  It
+combines the simplicity of the multithreaded programming paradigm, in
+which one thread supports each simultaneous connection, with the
+performance and scalability of an event-driven state machine
+architecture.  In other words, this library offers a threading API for
+structuring an Internet application as a state machine.  For more
+details, please see the library documentation in the "docs" directory or
+on-line at
+
+    http://state-threads.sourceforge.net/docs/
+
+The State Threads Project is an open source project for maintaining and
+enhancing the State Threads Library.  For more information about this
+project, please see
+
+    http://state-threads.sourceforge.net/
+
+
+BUILDING
+
+To build the library by hand, use the GNU make utility.  Run the make
+command (e.g., `gmake') with no arguments to display all supported
+targets.
+
+To build more or less automatically, first set the CONFIG_GUESS_PATH
+variable in either osguess.sh or your environment then run "make
+default" which guesses your OS and builds.  Requires the "config.guess"
+utility from GNU autoconf (not included with ST).  You can use one from
+a larger "main" software project or just use any config.guess available
+on your system.  You can also get it directly from GNU:
+ftp://ftp.gnu.org/gnu/autoconf/
+
+To build rpms (RedHat Linux 6.2 or later, Linux/Mandrake, Solaris with
+gnome, etc.):
+    download the latest st-x.y.tar.gz
+    # rpm -ta st-x.y.tar.gz
+The .rpms will land in /usr/src/RPMS/<arch>.  Install them with:
+    # rpm -i libst*.rpm
+Requires GNU automake and rpm 3.0.3 or later.
+
+Debian users:
+  If you run potato, please upgrade to woody.
+  If you run woody, "apt-get install libst-dev" will get you v1.3.
+  If you run testing/unstable, you will get the newest available version.
+  If you *must* have the newest libst in woody, you may follow these
+  not-recommended instructions:
+    1. Add "deb-src <your-favourite-debian-mirror> unstable main" to your
+       /etc/apt/sources.list
+    2. apt-get update
+    3. apt-get source st
+    4. cd st-1.4 (or whatever version you got)
+    5. debuild
+    6. dpkg -i ../*.deb
+
+If your application uses autoconf to search for dependencies and you
+want to search for a given version of libst, you can simply add
+        PKG_CHECK_MODULES(MYAPP, st >= 1.3 mumble >= 0.2.23)
+to your configure.ac/in.  This will define @MYAPP_LIBS@ and
+@MYAPP_CFLAGS@ which you may then use in your Makefile.am/in files to
+link against mumble and st.
+
+
+LICENSE
+
+The State Threads library is a derivative of the Netscape Portable
+Runtime library (NSPR).  All source code in this directory is
+distributed under the terms of the Mozilla Public License (MPL) version
+1.1 or the GNU General Public License (GPL) version 2 or later.  For
+more information about these licenses please see
+http://www.mozilla.org/MPL/ and http://www.gnu.org/copyleft/.
+
+All source code in the "examples" directory is distributed under the BSD
+style license.
+
+
+PLATFORMS
+
+Please see the "docs/notes.html" file for the list of currently
+supported platforms.
+
+
+DEBUGGER SUPPORT
+
+It's almost impossible to print SP and PC in a portable way.  The only
+way to see thread's stack platform-independently is to actually jump to
+the saved context.  That's what the _st_iterate_threads() function does.
+Do the following to iterate over all threads:
+
+- set the _st_iterate_threads_flag to 1 in debugger
+- set breakpoint at the _st_show_thread_stack() function
+  (which does nothing)
+- call the _st_iterate_threads() function which jumps to the
+  next thread
+- at each break you can explore thread's stack
+- continue
+- when iteration is complete, you return to the original
+  point (you can see thread id and a message as arguments of
+  the _st_show_thread_stack() function).
+
+You can call _st_iterate_threads() in three ways:
+
+- Insert it into your source code at the point you want to
+  go over threads.
+- Just run application and this function will be called at
+  the first context switch.
+- Call it directly from the debugger at any point.
+
+This works with gdb and dbx.
+
+Example using gdb:
+
+(gdb) set _st_iterate_threads_flag = 1
+(gdb) b _st_show_thread_stack
+...
+(gdb) call _st_iterate_threads()
+...
+(gdb) bt
+...
+(gdb) c
+...
+(gdb) bt
+...
+(gdb) c
+...
+and so on...
+
+_st_iterate_threads_flag will be set to 0 automatically
+after iteration is over or you can set it to 0 at any time
+to stop iteration.
+
+Sometimes gdb complains about SIGSEGV when you call a function
+directly at gdb command-line.  It can be ignored -- just call the
+same function right away again, it works just fine.  For example:
+
+(gdb) set _st_iterate_threads_flag = 1
+(gdb) b _st_show_thread_stack
+Breakpoint 1 at 0x809bbbb: file sched.c, line 856.
+(gdb) call _st_iterate_threads()
+Program received signal SIGSEGV, Segmentation fault.
+....
+(gdb) # just call the function again:
+(gdb) call _st_iterate_threads()
+Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2
+"Iteration started")    at sched.c:856
+856     }
+....
+
+You can use simple gdb command-line scripting to display
+all threads and their stack traces at once:
+
+(gdb) while _st_iterate_threads_flag
+ >bt
+ >c
+ >end
+....
+
+Another script to stop at the thread with the specific thread id
+(e.g., 0x40252ee4):
+
+(gdb) # set the flag again:
+(gdb) set _st_iterate_threads_flag = 1
+(gdb) call _st_iterate_threads()
+Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2
+"Iteration started")    at sched.c:856
+856     }
+....
+(gdb) while thread != 0x40252ee4
+ >c
+ >end
+....
+....
+Breakpoint 1, _st_show_thread_stack (thread=0x40252ee4, messg=0x0) at
+sched.c:856
+856     }
+(gdb) bt
+....
+(gdb) # don't want to continue iteration, unset the flag:
+(gdb) set _st_iterate_threads_flag = 0
+(gdb) c
+Continuing.
+Breakpoint 1, _st_show_thread_stack (thread=0x0, messg=0x80ae78e "Iteration
+completed")
+    at sched.c:856
+856     }
+(gdb) c
+Continuing.
+(gdb) return
+Make selected stack frame return now? (y or n) y
+#0  0x4011254e in __select ()
+   from /lib/libc.so.6
+(gdb) detach
+
+
+CHANGE LOG
+
+Changes from 1.8 to 1.9.
+------------------------
+o  Support 32-bit and 64-bit Intel Macs.
+
+o  Added ST_VERSION string, and ST_VERSION_MAJOR and ST_VERSION_MINOR
+   [bug 1796801].
+
+o  Fixed some compiler warnings, based on a patch from Brian Wellington
+   [bug 1932741].
+
+
+Changes from 1.7 to 1.8.
+--------------------------
+o  Added support for kqueue and epoll on platforms that support them.
+   Added ability to choose the event notification system at program
+   startup.
+
+o  Long-overdue public definitions of ST_UTIME_NO_TIMEOUT (-1ULL) and
+   ST_UTIME_NO_WAIT (0) [bug 1514436].
+
+o  Documentation patch for st_utime() [bug 1514484].
+
+o  Documentation patch for st_timecache_set() [bug 1514486].
+
+o  Documentation patch for st_netfd_serialize_accept() [bug 1514494].
+
+o  Added st_writev_resid() [rfe 1538344].
+
+o  Added st_readv_resid() [rfe 1538768] and, for symmetry, st_readv().
+
+
+Changes from 1.6 to 1.7.
+------------------------
+o  Support glibc 2.4, which breaks programs that manipulate jump buffers.
+   Replaced Linux IA64 special cases with new md.S that covers all
+   Linux.
+
+
+Changes from 1.5.2 to 1.6.
+--------------------------
+none
+
+
+Changes from 1.5.1 to 1.5.2.
+----------------------------
+o  Alfred Perlstein's context switch callback feature.
+
+o  Claus Assmann's st_recvmsg/st_sendmsg wrappers.
+
+o  Extra stack padding for platforms that need it.
+
+o  Ron Arts's timeout clarifications in the reference manual.
+
+o  Raymond Bero and Anton Berezin's AMD64 FreeBSD port.
+
+o  Claus Assmann's AMD64 SunOS 5.10 port.
+
+o  Claus Assmann's AMD64 OpenBSD port.
+
+o  Michael Abd-El-Malek's Mac OS X port.
+
+o  Michael Abd-El-Malek's stack printing patch.
+
+
+Changes from 1.5.0 to 1.5.1.
+----------------------------
+o  Andreas Gustafsson's USE_POLL fix.
+
+o  Gene's st_set_utime_function() enhancement.
+
+
+Changes from 1.4 to 1.5.0.
+--------------------------
+o  Andreas Gustafsson's performance patch.
+
+o  New extensions:  Improved DNS resolver, generic LRU cache, in-process
+   DNS cache, and a program to test the resolver and cache.
+
+o  Support for AMD Opteron 64-bit CPUs under Linux.
+
+o  Support for SPARC-64 under Solaris.
+
+o  Andreas Gustafsson's support for VAX under NetBSD.
+
+o  Changed unportable #warning directives in md.h to #error.
+
+
+Changes from 1.3 to 1.4.
+------------------------
+o  Andreas Gustafsson's NetBSD port.
+
+o  Wesley W. Terpstra's Darwin (MacOS X) port.
+
+o  Support for many CPU architectures under Linux and *BSD.
+
+o  Renamed private typedefs so they don't conflict with public ones any
+   more.
+
+o  common.h now includes public.h for strict prototyping.
+
+o  Joshua Levy's recommendation to make st_connect() and st_sendto()
+   accept const struct sockaddr pointers, as the originals do.
+
+o  Clarified the documentation regarding blocking vs. non-blocking I/O.
+
+o  Cygwin support.
+
+o  Created the extensions directory.
+
+o  Fixed warnings from ia64asm.S.
+
+
+Changes from 1.2 to 1.3.
+------------------------
+o  Added st_read_resid() and st_write_resid() to allow the caller to know
+   how much data was transferred before an error occurred.  Updated
+   documentation.
+
+o  Updated project link, copyrights, and documentation regarding
+   timeouts.  Added comment to st_connect().
+
+o  Optimized the _st_add_sleep_q() function in sched.c.  Now we walk the
+   sleep queue *backward* when inserting a thread into it.  When you
+   have lots (hundreds) of threads and several timeout values, it takes
+   a while to insert a thread at the appropriate point in the sleep
+   queue.  The idea is that often this appropriate point is closer to
+   the end of the queue rather than the beginning.  Measurements show
+   performance improves with this change.  In any case this change
+   should do no harm.
+
+o  Added a hint of when to define USE_POLL and when not to, to the
+   Makefile.
+
+o  Added debugging support (files common.h and sched.c).   See above.
+
+o  Decreased the number of reallocations of _ST_POLLFDS in sched.c.
+   Inspired by Lev Walkin.
+
+o  Fixed st_usleep(-1) and st_sleep(-1), and added a warning to the
+   documentation about too-large timeouts.
+
+o  Linux/*BSD Alpha port.
+
+o  Wesley W. Terpstra modernized the build process:
+   - properly build relocatable libraries under bsd and linux
+   - use library versioning
+   - added rpm spec file
+   - added debian/ files
+   See above for build instructions.
+
+
+Changes from 1.1 to 1.2.
+------------------------
+o  Added st_randomize_stacks().
+
+o  Added a patch contributed by Sascha Schumann.
+
+
+Changes from 1.0 to 1.1.
+------------------------
+o  Relicensed under dual MPL-GPL.
+
+o  OpenBSD port.
+
+o  Compile-time option to use poll() instead of select() for
+   event polling (see Makefile).
+   This is useful if you want to support a large number of open
+   file descriptors (larger than FD_SETSIZE) within a single
+   process.
+
+o  Linux IA-64 port.
+   Two issues make IA-64 different from other platforms:
+
+   - Besides the traditional call stack in memory, IA-64 uses the
+     general register stack.  Thus each thread needs a backing store
+     for the register stack in addition to the memory stack.
+
+   - Current implementation of setjmp()/longjmp() can not be used
+     for thread context-switching since it assumes that only one
+     register stack exists.  Using special assembly functions for
+     context-switching is unavoidable.
+    
+o  Thread stack capping on IRIX.
+   This allows some profiling tools (such as SpeedShop) to know when
+   to stop unwinding the stack.  Without this libexc, used by SpeedShop,
+   traces right off the stack and crashes.
+
+o  Miscellaneous documentation additions.
+
+
+COPYRIGHTS
+
+Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+All Rights Reserved.
--- a/trunk/research/st-1.9/common.h 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/common.h 0 → 100644
查看文件 @7f4c113
+/* 
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ * 
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ * 
+ * The Original Code is the Netscape Portable Runtime library.
+ * 
+ * The Initial Developer of the Original Code is Netscape
+ * Communications Corporation.  Portions created by Netscape are 
+ * Copyright (C) 1994-2000 Netscape Communications Corporation.  All
+ * Rights Reserved.
+ * 
+ * Contributor(s):  Silicon Graphics, Inc.
+ * 
+ * Portions created by SGI are Copyright (C) 2000-2001 Silicon
+ * Graphics, Inc.  All Rights Reserved.
+ * 
+ * Alternatively, the contents of this file may be used under the
+ * terms of the GNU General Public License Version 2 or later (the
+ * "GPL"), in which case the provisions of the GPL are applicable 
+ * instead of those above.  If you wish to allow use of your 
+ * version of this file only under the terms of the GPL and not to
+ * allow others to use your version of this file under the MPL,
+ * indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by
+ * the GPL.  If you do not delete the provisions above, a recipient
+ * may use your version of this file under either the MPL or the
+ * GPL.
+ */
+
+/*
+ * This file is derived directly from Netscape Communications Corporation,
+ * and consists of extensive modifications made during the year(s) 1999-2000.
+ */
+
+#ifndef __ST_COMMON_H__
+#define __ST_COMMON_H__
+
+#include <stddef.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <setjmp.h>
+
+/* Enable assertions only if DEBUG is defined */
+#ifndef DEBUG
+#define NDEBUG
+#endif
+#include <assert.h>
+#define ST_ASSERT(expr) assert(expr)
+
+#define ST_BEGIN_MACRO  {
+#define ST_END_MACRO    }
+
+#ifdef DEBUG
+#define ST_HIDDEN   /*nothing*/
+#else
+#define	ST_HIDDEN   static
+#endif
+
+#include "public.h"
+#include "md.h"
+
+
+/*****************************************
+ * Circular linked list definitions
+ */
+
+typedef struct _st_clist {
+  struct _st_clist *next;
+  struct _st_clist *prev;
+} _st_clist_t;
+
+/* Insert element "_e" into the list, before "_l" */
+#define ST_INSERT_BEFORE(_e,_l)	 \
+    ST_BEGIN_MACRO		 \
+	(_e)->next = (_l);	 \
+	(_e)->prev = (_l)->prev; \
+	(_l)->prev->next = (_e); \
+	(_l)->prev = (_e);	 \
+    ST_END_MACRO
+
+/* Insert element "_e" into the list, after "_l" */
+#define ST_INSERT_AFTER(_e,_l)	 \
+    ST_BEGIN_MACRO		 \
+	(_e)->next = (_l)->next; \
+	(_e)->prev = (_l);	 \
+	(_l)->next->prev = (_e); \
+	(_l)->next = (_e);	 \
+    ST_END_MACRO
+
+/* Return the element following element "_e" */
+#define ST_NEXT_LINK(_e)  ((_e)->next)
+
+/* Append an element "_e" to the end of the list "_l" */
+#define ST_APPEND_LINK(_e,_l) ST_INSERT_BEFORE(_e,_l)
+
+/* Insert an element "_e" at the head of the list "_l" */
+#define ST_INSERT_LINK(_e,_l) ST_INSERT_AFTER(_e,_l)
+
+/* Return the head/tail of the list */
+#define ST_LIST_HEAD(_l) (_l)->next
+#define ST_LIST_TAIL(_l) (_l)->prev
+
+/* Remove the element "_e" from it's circular list */
+#define ST_REMOVE_LINK(_e)	       \
+    ST_BEGIN_MACRO		       \
+	(_e)->prev->next = (_e)->next; \
+	(_e)->next->prev = (_e)->prev; \
+    ST_END_MACRO
+
+/* Return non-zero if the given circular list "_l" is empty, */
+/* zero if the circular list is not empty */
+#define ST_CLIST_IS_EMPTY(_l) \
+    ((_l)->next == (_l))
+
+/* Initialize a circular list */
+#define ST_INIT_CLIST(_l)  \
+    ST_BEGIN_MACRO	   \
+	(_l)->next = (_l); \
+	(_l)->prev = (_l); \
+    ST_END_MACRO
+
+#define ST_INIT_STATIC_CLIST(_l) \
+    {(_l), (_l)}
+
+
+/*****************************************
+ * Basic types definitions
+ */
+
+typedef void  (*_st_destructor_t)(void *);
+
+
+typedef struct _st_stack {
+  _st_clist_t links;
+  char *vaddr;                /* Base of stack's allocated memory */
+  int  vaddr_size;            /* Size of stack's allocated memory */
+  int  stk_size;              /* Size of usable portion of the stack */
+  char *stk_bottom;           /* Lowest address of stack's usable portion */
+  char *stk_top;              /* Highest address of stack's usable portion */
+  void *sp;                   /* Stack pointer from C's point of view */
+#ifdef __ia64__
+  void *bsp;                  /* Register stack backing store pointer */
+#endif
+} _st_stack_t;
+
+
+typedef struct _st_cond {
+  _st_clist_t wait_q;	      /* Condition variable wait queue */
+} _st_cond_t;
+
+
+typedef struct _st_thread _st_thread_t;
+
+struct _st_thread {
+  int state;                  /* Thread's state */
+  int flags;                  /* Thread's flags */
+
+  void *(*start)(void *arg);  /* The start function of the thread */
+  void *arg;                  /* Argument of the start function */
+  void *retval;               /* Return value of the start function */
+
+  _st_stack_t *stack;	      /* Info about thread's stack */
+
+  _st_clist_t links;          /* For putting on run/sleep/zombie queue */
+  _st_clist_t wait_links;     /* For putting on mutex/condvar wait queue */
+#ifdef DEBUG
+  _st_clist_t tlink;          /* For putting on thread queue */
+#endif
+
+  st_utime_t due;             /* Wakeup time when thread is sleeping */
+  _st_thread_t *left;         /* For putting in timeout heap */
+  _st_thread_t *right;	      /* -- see docs/timeout_heap.txt for details */
+  int heap_index;
+
+  void **private_data;        /* Per thread private data */
+
+  _st_cond_t *term;           /* Termination condition variable for join */
+
+  jmp_buf context;            /* Thread's context */
+};
+
+
+typedef struct _st_mutex {
+  _st_thread_t *owner;        /* Current mutex owner */
+  _st_clist_t  wait_q;        /* Mutex wait queue */
+} _st_mutex_t;
+
+
+typedef struct _st_pollq {
+  _st_clist_t links;          /* For putting on io queue */
+  _st_thread_t  *thread;      /* Polling thread */
+  struct pollfd *pds;         /* Array of poll descriptors */
+  int npds;                   /* Length of the array */
+  int on_ioq;                 /* Is it on ioq? */
+} _st_pollq_t;
+
+
+typedef struct _st_eventsys_ops {
+  const char *name;                          /* Name of this event system */
+  int  val;                                  /* Type of this event system */
+  int  (*init)(void);                        /* Initialization */
+  void (*dispatch)(void);                    /* Dispatch function */
+  int  (*pollset_add)(struct pollfd *, int); /* Add descriptor set */
+  void (*pollset_del)(struct pollfd *, int); /* Delete descriptor set */
+  int  (*fd_new)(int);                       /* New descriptor allocated */
+  int  (*fd_close)(int);                     /* Descriptor closed */
+  int  (*fd_getlimit)(void);                 /* Descriptor hard limit */
+} _st_eventsys_t;
+
+
+typedef struct _st_vp {
+  _st_thread_t *idle_thread;  /* Idle thread for this vp */
+  st_utime_t last_clock;      /* The last time we went into vp_check_clock() */
+
+  _st_clist_t run_q;          /* run queue for this vp */
+  _st_clist_t io_q;           /* io queue for this vp */
+  _st_clist_t zombie_q;       /* zombie queue for this vp */
+#ifdef DEBUG
+  _st_clist_t thread_q;       /* all threads of this vp */
+#endif
+  int pagesize;
+
+  _st_thread_t *sleep_q;      /* sleep queue for this vp */
+  int sleepq_size;	      /* number of threads on sleep queue */
+
+#ifdef ST_SWITCH_CB
+  st_switch_cb_t switch_out_cb;	/* called when a thread is switched out */
+  st_switch_cb_t switch_in_cb;	/* called when a thread is switched in */
+#endif
+} _st_vp_t;
+
+
+typedef struct _st_netfd {
+  int osfd;                   /* Underlying OS file descriptor */
+  int inuse;                  /* In-use flag */
+  void *private_data;         /* Per descriptor private data */
+  _st_destructor_t destructor; /* Private data destructor function */
+  void *aux_data;             /* Auxiliary data for internal use */
+  struct _st_netfd *next;     /* For putting on the free list */
+} _st_netfd_t;
+
+
+/*****************************************
+ * Current vp, thread, and event system
+ */
+
+extern _st_vp_t	    _st_this_vp;
+extern _st_thread_t *_st_this_thread;
+extern _st_eventsys_t *_st_eventsys;
+
+#define _ST_CURRENT_THREAD()            (_st_this_thread)
+#define _ST_SET_CURRENT_THREAD(_thread) (_st_this_thread = (_thread))
+
+#define _ST_LAST_CLOCK                  (_st_this_vp.last_clock)
+
+#define _ST_RUNQ                        (_st_this_vp.run_q)
+#define _ST_IOQ                         (_st_this_vp.io_q)
+#define _ST_ZOMBIEQ                     (_st_this_vp.zombie_q)
+#ifdef DEBUG
+#define _ST_THREADQ                     (_st_this_vp.thread_q)
+#endif
+
+#define _ST_PAGE_SIZE                   (_st_this_vp.pagesize)
+
+#define _ST_SLEEPQ                      (_st_this_vp.sleep_q)
+#define _ST_SLEEPQ_SIZE                 (_st_this_vp.sleepq_size)
+
+#define _ST_VP_IDLE()                   (*_st_eventsys->dispatch)()
+
+
+/*****************************************
+ * vp queues operations
+ */
+
+#define _ST_ADD_IOQ(_pq)    ST_APPEND_LINK(&_pq.links, &_ST_IOQ)
+#define _ST_DEL_IOQ(_pq)    ST_REMOVE_LINK(&_pq.links)
+
+#define _ST_ADD_RUNQ(_thr)  ST_APPEND_LINK(&(_thr)->links, &_ST_RUNQ)
+#define _ST_DEL_RUNQ(_thr)  ST_REMOVE_LINK(&(_thr)->links)
+
+#define _ST_ADD_SLEEPQ(_thr, _timeout)  _st_add_sleep_q(_thr, _timeout)
+#define _ST_DEL_SLEEPQ(_thr)		_st_del_sleep_q(_thr)
+
+#define _ST_ADD_ZOMBIEQ(_thr)  ST_APPEND_LINK(&(_thr)->links, &_ST_ZOMBIEQ)
+#define _ST_DEL_ZOMBIEQ(_thr)  ST_REMOVE_LINK(&(_thr)->links)
+
+#ifdef DEBUG
+#define _ST_ADD_THREADQ(_thr)  ST_APPEND_LINK(&(_thr)->tlink, &_ST_THREADQ)
+#define _ST_DEL_THREADQ(_thr)  ST_REMOVE_LINK(&(_thr)->tlink)
+#endif
+
+
+/*****************************************
+ * Thread states and flags
+ */
+
+#define _ST_ST_RUNNING      0 
+#define _ST_ST_RUNNABLE     1
+#define _ST_ST_IO_WAIT      2
+#define _ST_ST_LOCK_WAIT    3
+#define _ST_ST_COND_WAIT    4
+#define _ST_ST_SLEEPING     5
+#define _ST_ST_ZOMBIE       6
+#define _ST_ST_SUSPENDED    7
+
+#define _ST_FL_PRIMORDIAL   0x01
+#define _ST_FL_IDLE_THREAD  0x02
+#define _ST_FL_ON_SLEEPQ    0x04
+#define _ST_FL_INTERRUPT    0x08
+#define _ST_FL_TIMEDOUT     0x10
+
+
+/*****************************************
+ * Pointer conversion
+ */
+
+#ifndef offsetof
+#define offsetof(type, identifier) ((size_t)&(((type *)0)->identifier))
+#endif
+
+#define _ST_THREAD_PTR(_qp)         \
+    ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, links)))
+
+#define _ST_THREAD_WAITQ_PTR(_qp)   \
+    ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, wait_links)))
+
+#define _ST_THREAD_STACK_PTR(_qp)   \
+    ((_st_stack_t *)((char*)(_qp) - offsetof(_st_stack_t, links)))
+
+#define _ST_POLLQUEUE_PTR(_qp)      \
+    ((_st_pollq_t *)((char *)(_qp) - offsetof(_st_pollq_t, links)))
+
+#ifdef DEBUG
+#define _ST_THREAD_THREADQ_PTR(_qp) \
+    ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, tlink)))
+#endif
+
+
+/*****************************************
+ * Constants
+ */
+
+#ifndef ST_UTIME_NO_TIMEOUT
+#define ST_UTIME_NO_TIMEOUT ((st_utime_t) -1LL)
+#endif
+
+#ifndef __ia64__
+#define ST_DEFAULT_STACK_SIZE (64*1024)
+#else
+#define ST_DEFAULT_STACK_SIZE (128*1024)  /* Includes register stack size */
+#endif
+
+#ifndef ST_KEYS_MAX
+#define ST_KEYS_MAX 16
+#endif
+
+#ifndef ST_MIN_POLLFDS_SIZE
+#define ST_MIN_POLLFDS_SIZE 64
+#endif
+
+
+/*****************************************
+ * Threads context switching
+ */
+
+#ifdef DEBUG
+void _st_iterate_threads(void);
+#define ST_DEBUG_ITERATE_THREADS() _st_iterate_threads()
+#else
+#define ST_DEBUG_ITERATE_THREADS()
+#endif
+
+#ifdef ST_SWITCH_CB
+#define ST_SWITCH_OUT_CB(_thread)		\
+    if (_st_this_vp.switch_out_cb != NULL &&	\
+        _thread != _st_this_vp.idle_thread &&	\
+        _thread->state != _ST_ST_ZOMBIE) {	\
+      _st_this_vp.switch_out_cb();		\
+    }
+#define ST_SWITCH_IN_CB(_thread)		\
+    if (_st_this_vp.switch_in_cb != NULL &&	\
+	_thread != _st_this_vp.idle_thread &&	\
+	_thread->state != _ST_ST_ZOMBIE) {	\
+      _st_this_vp.switch_in_cb();		\
+    }
+#else
+#define ST_SWITCH_OUT_CB(_thread)
+#define ST_SWITCH_IN_CB(_thread)
+#endif
+
+/*
+ * Switch away from the current thread context by saving its state and
+ * calling the thread scheduler
+ */
+#define _ST_SWITCH_CONTEXT(_thread)       \
+    ST_BEGIN_MACRO                        \
+    ST_SWITCH_OUT_CB(_thread);            \
+    if (!MD_SETJMP((_thread)->context)) { \
+      _st_vp_schedule();                  \
+    }                                     \
+    ST_DEBUG_ITERATE_THREADS();           \
+    ST_SWITCH_IN_CB(_thread);             \
+    ST_END_MACRO
+
+/*
+ * Restore a thread context that was saved by _ST_SWITCH_CONTEXT or
+ * initialized by _ST_INIT_CONTEXT
+ */
+#define _ST_RESTORE_CONTEXT(_thread)   \
+    ST_BEGIN_MACRO                     \
+    _ST_SET_CURRENT_THREAD(_thread);   \
+    MD_LONGJMP((_thread)->context, 1); \
+    ST_END_MACRO
+
+/*
+ * Initialize the thread context preparing it to execute _main
+ */
+#ifdef MD_INIT_CONTEXT
+#define _ST_INIT_CONTEXT MD_INIT_CONTEXT
+#else
+#error Unknown OS
+#endif
+
+/*
+ * Number of bytes reserved under the stack "bottom"
+ */
+#define _ST_STACK_PAD_SIZE MD_STACK_PAD_SIZE
+
+
+/*****************************************
+ * Forward declarations
+ */
+
+void _st_vp_schedule(void);
+void _st_vp_check_clock(void);
+void *_st_idle_thread_start(void *arg);
+void _st_thread_main(void);
+void _st_thread_cleanup(_st_thread_t *thread);
+void _st_add_sleep_q(_st_thread_t *thread, st_utime_t timeout);
+void _st_del_sleep_q(_st_thread_t *thread);
+_st_stack_t *_st_stack_new(int stack_size);
+void _st_stack_free(_st_stack_t *ts);
+int _st_io_init(void);
+
+st_utime_t st_utime(void);
+_st_cond_t *st_cond_new(void);
+int st_cond_destroy(_st_cond_t *cvar);
+int st_cond_timedwait(_st_cond_t *cvar, st_utime_t timeout);
+int st_cond_signal(_st_cond_t *cvar);
+ssize_t st_read(_st_netfd_t *fd, void *buf, size_t nbyte, st_utime_t timeout);
+ssize_t st_write(_st_netfd_t *fd, const void *buf, size_t nbyte,
+		 st_utime_t timeout);
+int st_poll(struct pollfd *pds, int npds, st_utime_t timeout);
+_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg,
+			      int joinable, int stk_size);
+
+#endif /* !__ST_COMMON_H__ */
+
--- a/trunk/research/st-1.9/docs/fig.gif 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/docs/fig.gif 0 → 100644
查看文件 @7f4c113
--- a/trunk/research/st-1.9/docs/notes.html 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/docs/notes.html 0 → 100644
查看文件 @7f4c113
+<HTML>
+<HEAD>
+<TITLE>State Threads Library Programming Notes</TITLE>
+</HEAD>
+<BODY BGCOLOR=#FFFFFF>
+<H2>Programming Notes</H2>
+<P>
+<B>
+<UL>
+<LI><A HREF=#porting>Porting</A></LI>
+<LI><A HREF=#signals>Signals</A></LI>
+<LI><A HREF=#intra>Intra-Process Synchronization</A></LI>
+<LI><A HREF=#inter>Inter-Process Synchronization</A></LI>
+<LI><A HREF=#nonnet>Non-Network I/O</A></LI>
+<LI><A HREF=#timeouts>Timeouts</A></LI>
+</UL>
+</B>
+<P>
+<HR>
+<P>
+<A NAME="porting">
+<H3>Porting</H3>
+The State Threads library uses OS concepts that are available in some
+form on most UNIX platforms, making the library very portable across
+many flavors of UNIX.  However, there are several parts of the library
+that rely on platform-specific features.  Here is the list of such parts:
+<P>
+<UL>
+<LI><I>Thread context initialization</I>: Two ingredients of the
+<TT>jmp_buf</TT>
+data structure (the program counter and the stack pointer) have to be
+manually set in the thread creation routine. The <TT>jmp_buf</TT> data
+structure is defined in the <TT>setjmp.h</TT> header file and differs from
+platform to platform.  Usually the program counter is a structure member
+with <TT>PC</TT> in the name and the stack pointer is a structure member
+with <TT>SP</TT> in the name.  One can also look in the
+<A HREF="http://www.mozilla.org/source.html">Netscape's NSPR library source</A>
+which already has this code for many UNIX-like platforms
+(<TT>mozilla/nsprpub/pr/include/md/*.h</TT> files).
+<P>
+Note that on some BSD-derived platforms <TT>_setjmp(3)/_longjmp(3)</TT>
+calls should be used instead of <TT>setjmp(3)/longjmp(3)</TT> (that is
+the calls that manipulate only the stack and registers and do <I>not</I>
+save and restore the process's signal mask).</LI>
+<P>
+Starting with glibc 2.4 on Linux the opacity of the <TT>jmp_buf</TT> data
+structure is enforced by <TT>setjmp(3)/longjmp(3)</TT> so the
+<TT>jmp_buf</TT> ingredients cannot be accessed directly anymore (unless
+special environmental variable LD_POINTER_GUARD is set before application
+execution). To avoid dependency on custom environment, the State Threads
+library provides <TT>setjmp/longjmp</TT> replacement functions for
+all Intel CPU architectures. Other CPU architectures can also be easily
+supported (the <TT>setjmp/longjmp</TT> source code is widely available for
+many CPU architectures).
+<P>
+<LI><I>High resolution time function</I>: Some platforms (IRIX, Solaris)
+provide a high resolution time function based on the free running hardware
+counter.  This function returns the time counted since some arbitrary
+moment in the past (usually machine power up time).  It is not correlated in
+any way to the time of day, and thus is not subject to resetting,
+drifting, etc.  This type of time is ideal for tasks where cheap, accurate
+interval timing is required.  If such a function is not available on a
+particular platform, the <TT>gettimeofday(3)</TT> function can be used
+(though on some platforms it involves a system call).
+<P>
+<LI><I>The stack growth direction</I>: The library needs to know whether the
+stack grows toward lower (down) or higher (up) memory addresses.
+One can write a simple test program that detects the stack growth direction
+on a particular platform.</LI>
+<P>
+<LI><I>Non-blocking attribute inheritance</I>: On some platforms (e.g. IRIX)
+the socket created as a result of the <TT>accept(2)</TT> call inherits the
+non-blocking attribute of the listening socket. One needs to consult the manual
+pages or write a simple test program to see if this applies to a specific
+platform.</LI>
+<P>
+<LI><I>Anonymous memory mapping</I>: The library allocates memory segments
+for thread stacks by doing anonymous memory mapping (<TT>mmap(2)</TT>). This
+mapping is somewhat different on SVR4 and BSD4.3 derived platforms.
+<P>
+The memory mapping can be avoided altogether by using <TT>malloc(3)</TT> for
+stack allocation.  In this case the <TT>MALLOC_STACK</TT> macro should be
+defined.</LI>
+</UL>
+<P>
+All machine-dependent feature test macros should be defined in the
+<TT>md.h</TT> header file. The assembly code for <TT>setjmp/longjmp</TT>
+replacement functions for all CPU architectures should be placed in
+the <TT>md.S</TT> file.
+<P>
+The current version of the library is ported to:
+<UL>
+  <LI>IRIX 6.x (both 32 and 64 bit)</LI>
+  <LI>Linux (kernel 2.x and glibc 2.x) on x86, Alpha, MIPS and MIPSEL,
+  SPARC, ARM, PowerPC, 68k, HPPA, S390, IA-64, and Opteron (AMD-64)</LI>
+  <LI>Solaris 2.x (SunOS 5.x) on x86, AMD64, SPARC, and SPARC-64</LI>
+  <LI>AIX 4.x</LI>
+  <LI>HP-UX 11 (both 32 and 64 bit)</LI>
+  <LI>Tru64/OSF1</LI>
+  <LI>FreeBSD on x86, AMD64, and Alpha</LI>
+  <LI>OpenBSD on x86, AMD64, Alpha, and SPARC</LI>
+  <LI>NetBSD on x86, Alpha, SPARC, and VAX</LI>
+  <LI>MacOS X (Darwin) on PowerPC (32 bit) and Intel (both 32 and 64 bit) [universal]</LI>
+  <LI>Cygwin</LI>
+</UL>
+<P>
+
+<A NAME="signals">
+<H3>Signals</H3>
+Signal handling in an application using State Threads should be treated the
+same way as in a classical UNIX process application. There is no such
+thing as per-thread signal mask, all threads share the same signal handlers,
+and only asynchronous-safe functions can be used in signal handlers.
+However, there is a way to process signals synchronously by converting a
+signal event to an I/O event: a signal catching function does a write to
+a pipe which will be processed synchronously by a dedicated signal handling
+thread.  The following code demonstrates this technique (error handling is
+omitted for clarity):
+<PRE>
+
+/* Per-process pipe which is used as a signal queue. */
+/* Up to PIPE_BUF/sizeof(int) signals can be queued up. */
+int sig_pipe[2];
+
+/* Signal catching function. */
+/* Converts signal event to I/O event. */
+void sig_catcher(int signo)
+{
+  int err;
+
+  /* Save errno to restore it after the write() */
+  err = errno;
+  /* write() is reentrant/async-safe */
+  write(sig_pipe[1], &signo, sizeof(int));
+  errno = err;
+}
+
+/* Signal processing function. */
+/* This is the "main" function of the signal processing thread. */
+void *sig_process(void *arg)
+{
+  st_netfd_t nfd;
+  int signo;
+
+  nfd = st_netfd_open(sig_pipe[0]);
+
+  for ( ; ; ) {
+    /* Read the next signal from the pipe */
+    st_read(nfd, &signo, sizeof(int), ST_UTIME_NO_TIMEOUT);
+
+    /* Process signal synchronously */
+    switch (signo) {
+    case SIGHUP:
+      /* do something here - reread config files, etc. */
+      break;
+    case SIGTERM:
+      /* do something here - cleanup, etc. */
+      break;
+      /*      .
+              .
+         Other signals
+              .
+              .
+      */
+    }
+  }
+
+  return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+  struct sigaction sa;
+        .
+        .
+        .
+
+  /* Create signal pipe */
+  pipe(sig_pipe);
+
+  /* Create signal processing thread */
+  st_thread_create(sig_process, NULL, 0, 0);
+
+  /* Install sig_catcher() as a signal handler */
+  sa.sa_handler = sig_catcher;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sigaction(SIGHUP, &sa, NULL);
+
+  sa.sa_handler = sig_catcher;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  sigaction(SIGTERM, &sa, NULL);
+
+        .
+        .
+        .
+      
+}
+
+</PRE>
+<P>
+Note that if multiple processes are used (see below), the signal pipe should
+be initialized after the <TT>fork(2)</TT> call so that each process has its
+own private pipe.
+<P>
+
+<A NAME="intra">
+<H3>Intra-Process Synchronization</H3>
+Due to the event-driven nature of the library scheduler, the thread context
+switch (process state change) can only happen in a well-known set of
+library functions.  This set includes functions in which a thread may
+"block":<TT>  </TT>I/O functions (<TT>st_read(), st_write(), </TT>etc.),
+sleep functions (<TT>st_sleep(), </TT>etc.), and thread synchronization
+functions (<TT>st_thread_join(), st_cond_wait(), </TT>etc.).  As a result,
+process-specific global data need not to be protected by locks since a thread
+cannot be rescheduled while in a critical section (and only one thread at a
+time can access the same memory location).  By the same token,
+non thread-safe functions (in a traditional sense) can be safely used with
+the State Threads.  The library's mutex facilities are practically useless
+for a correctly written application (no blocking functions in critical
+section) and are provided mostly for completeness.  This absence of locking
+greatly simplifies an application design and provides a foundation for
+scalability.
+<P>
+
+<A NAME="inter">
+<H3>Inter-Process Synchronization</H3>
+The State Threads library makes it possible to multiplex a large number
+of simultaneous connections onto a much smaller number of separate 
+processes, where each process uses a many-to-one user-level threading
+implementation (<B>N</B> of <B>M:1</B> mappings rather than one <B>M:N</B>
+mapping used in native threading libraries on some platforms). This design
+is key to the application's scalability.  One can think about it as if a
+set of all threads is partitioned into separate groups (processes) where
+each group has a separate pool of resources (virtual address space, file
+descriptors, etc.).  An application designer has full control of how many
+groups (processes) an application creates and what resources, if any,
+are shared among different groups via standard UNIX inter-process
+communication (IPC) facilities.<P>
+There are several reasons for creating multiple processes:
+<P>
+<UL>
+<LI>To take advantage of multiple hardware entities (CPUs, disks, etc.)
+available in the system (hardware parallelism).</LI>
+<P>
+<LI>To reduce risk of losing a large number of user connections when one of
+the processes crashes. For example, if <B>C</B> user connections (threads)
+are multiplexed onto <B>P</B> processes and one of the processes crashes,
+only a fraction (<B>C/P</B>) of all connections will be lost.</LI>
+<P>
+<LI>To overcome per-process resource limitations imposed by the OS.  For
+example, if <TT>select(2)</TT> is used for event polling, the number of
+simultaneous connections (threads) per process is
+limited by the <TT>FD_SETSIZE</TT> parameter (see <TT>select(2)</TT>).
+If <TT>FD_SETSIZE</TT> is equal to 1024 and each connection needs one file
+descriptor, then an application should create 10 processes to support 10,000
+simultaneous connections.</LI>
+</UL>
+<P>
+Ideally all user sessions are completely independent, so there is no need for
+inter-process communication.  It is always better to have several separate
+smaller process-specific resources (e.g., data caches) than to have one large
+resource shared (and modified) by all processes.  Sometimes, however, there
+is a need to share a common resource among different processes.  In that case,
+standard UNIX IPC facilities can be used.  In addition to that, there is a way
+to synchronize different processes so that only the thread accessing the
+shared resource will be suspended (but not the entire process) if that resource
+is unavailable.  In the following code fragment a pipe is used as a counting
+semaphore for inter-process synchronization:
+<PRE>
+#ifndef PIPE_BUF
+#define PIPE_BUF 512  /* POSIX */
+#endif
+
+/* Semaphore data structure */
+typedef struct ipc_sem {
+  st_netfd_t rdfd;  /* read descriptor */
+  st_netfd_t wrfd;  /* write descriptor */
+} ipc_sem_t;
+
+/* Create and initialize the semaphore. Should be called before fork(2). */
+/* 'value' must be less than PIPE_BUF. */
+/* If 'value' is 1, the semaphore works as mutex. */
+ipc_sem_t *ipc_sem_create(int value)
+{
+  ipc_sem_t *sem;
+  int p[2];
+  char b[PIPE_BUF];
+
+  /* Error checking is omitted for clarity */
+  sem = malloc(sizeof(ipc_sem_t));
+
+  /* Create the pipe */
+  pipe(p);
+  sem->rdfd = st_netfd_open(p[0]);
+  sem->wrfd = st_netfd_open(p[1]);
+
+  /* Initialize the semaphore: put 'value' bytes into the pipe */
+  write(p[1], b, value);
+
+  return sem;
+}
+
+/* Try to decrement the "value" of the semaphore. */
+/* If "value" is 0, the calling thread blocks on the semaphore. */
+int ipc_sem_wait(ipc_sem_t *sem)
+{
+  char c;
+
+  /* Read one byte from the pipe */
+  if (st_read(sem->rdfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1)
+    return -1;
+
+  return 0;
+}
+
+/* Increment the "value" of the semaphore. */
+int ipc_sem_post(ipc_sem_t *sem)
+{
+  char c;
+
+  if (st_write(sem->wrfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1)
+    return -1;
+
+  return 0;
+}
+
+</PRE>
+<P>
+
+Generally, the following steps should be followed when writing an application
+using the State Threads library:
+<P>
+<OL>
+<LI>Initialize the library (<TT>st_init()</TT>).</LI>
+<P>
+<LI>Create resources that will be shared among different processes:
+    create and bind listening sockets, create shared memory segments, IPC
+    channels, synchronization primitives, etc.</LI>
+<P>
+<LI>Create several processes (<TT>fork(2)</TT>). The parent process should
+    either exit or become a "watchdog" (e.g., it starts a new process when
+    an existing one crashes, does a cleanup upon application termination,
+    etc.).</LI>
+<P>
+<LI>In each child process create a pool of threads
+    (<TT>st_thread_create()</TT>) to handle user connections.</LI>
+</OL>
+<P>
+
+<A NAME="nonnet">
+<H3>Non-Network I/O</H3>
+
+The State Threads architecture uses non-blocking I/O on
+<TT>st_netfd_t</TT> objects for concurrent processing of multiple user
+connections.  This architecture has a drawback:  the entire process and
+all its threads may block for the duration of a <I>disk</I> or other
+non-network I/O operation, whether through State Threads I/O functions,
+direct system calls, or standard I/O functions.  (This is applicable
+mostly to disk <I>reads</I>; disk <I>writes</I> are usually performed
+asynchronously -- data goes to the buffer cache to be written to disk
+later.)  Fortunately, disk I/O (unlike network I/O) usually takes a
+finite and predictable amount of time, but this may not be true for
+special devices or user input devices (including stdin).  Nevertheless,
+such I/O reduces throughput of the system and increases response times.
+There are several ways to design an application to overcome this
+drawback:
+
+<P>
+<UL>
+<LI>Create several identical main processes as described above (symmetric
+    architecture).  This will improve CPU utilization and thus improve the
+    overall throughput of the system.</LI>
+<P>
+<LI>Create multiple "helper" processes in addition to the main process that
+    will handle blocking I/O operations (asymmetric architecture).
+    This approach was suggested for Web servers in a
+    <A HREF="http://www.cs.rice.edu/~vivek/flash99/">paper</A> by Peter
+    Druschel et al. In this architecture the main process communicates with
+    a helper process via an IPC channel (<TT>pipe(2), socketpair(2)</TT>).
+    The main process instructs a helper to perform the potentially blocking
+    operation.  Once the operation completes, the helper returns a
+    notification via IPC.
+</UL>
+<P>
+
+<A NAME="timeouts">
+<H3>Timeouts</H3>
+
+The <TT>timeout</TT> parameter to <TT>st_cond_timedwait()</TT> and the
+I/O functions, and the arguments to <TT>st_sleep()</TT> and
+<TT>st_usleep()</TT> specify a maximum time to wait <I>since the last
+context switch</I> not since the beginning of the function call.
+
+<P>The State Threads' time resolution is actually the time interval
+between context switches.  That time interval may be large in some
+situations, for example, when a single thread does a lot of work
+continuously.  Note that a steady, uninterrupted stream of network I/O
+qualifies for this description; a context switch occurs only when a
+thread blocks.
+
+<P>If a specified I/O timeout is less than the time interval between
+context switches the function may return with a timeout error before
+that amount of time has elapsed since the beginning of the function
+call.  For example, if eight milliseconds have passed since the last
+context switch and an I/O function with a timeout of 10 milliseconds
+blocks, causing a switch, the call may return with a timeout error as
+little as two milliseconds after it was called.  (On Linux,
+<TT>select()</TT>'s timeout is an <I>upper</I> bound on the amount of
+time elapsed before select returns.)  Similarly, if 12 ms have passed
+already, the function may return immediately.
+
+<P>In almost all cases I/O timeouts should be used only for detecting a
+broken network connection or for preventing a peer from holding an idle
+connection for too long.  Therefore for most applications realistic I/O
+timeouts should be on the order of seconds.  Furthermore, there's
+probably no point in retrying operations that time out.  Rather than
+retrying simply use a larger timeout in the first place.
+
+<P>The largest valid timeout value is platform-dependent and may be
+significantly less than <TT>INT_MAX</TT> seconds for <TT>select()</TT>
+or <TT>INT_MAX</TT> milliseconds for <TT>poll()</TT>.  Generally, you
+should not use timeouts exceeding several hours.  Use
+<tt>ST_UTIME_NO_TIMEOUT</tt> (<tt>-1</tt>) as a special value to
+indicate infinite timeout or indefinite sleep.  Use
+<tt>ST_UTIME_NO_WAIT</tt> (<tt>0</tt>) to indicate no waiting at all.
+
+<P>
+<HR>
+<P>
+</BODY>
+</HTML>
+
--- a/trunk/research/st-1.9/docs/reference.html 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/docs/reference.html 0 → 100644
查看文件 @7f4c113
--- a/trunk/research/st-1.9/docs/st.html 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/docs/st.html 0 → 100644
查看文件 @7f4c113
+<HTML>
+<HEAD>
+<TITLE>State Threads for Internet Applications</TITLE>
+</HEAD>
+<BODY BGCOLOR=#FFFFFF>
+<H2>State Threads for Internet Applications</H2>
+<H3>Introduction</H3>
+<P>
+State Threads is an application library which provides a
+foundation for writing fast and highly scalable Internet Applications
+on UNIX-like platforms.  It combines the simplicity of the multithreaded 
+programming paradigm, in which one thread supports each simultaneous 
+connection, with the performance and scalability of an event-driven 
+state machine architecture.</P>
+
+<H3>1. Definitions</H3>
+<P>
+<A NAME="IA">
+<H4>1.1 Internet Applications</H4>
+</A>
+<P>
+An <I>Internet Application</I> (IA) is either a server or client network
+application that accepts connections from clients and may or may not 
+connect to servers.  In an IA the arrival or departure of network data
+often controls processing (that is, IA is a <I>data-driven</I> application).
+For each connection, an IA does some finite amount of work 
+involving data exchange with its peer, where its peer may be either 
+a client or a server.
+The typical transaction steps of an IA are to accept a connection,
+read a request, do some finite and predictable amount of work to 
+process the request, then write a response to the peer that sent the 
+request.  One example of an IA is a Web server; 
+the most general example of an IA is a proxy server, because it both 
+accepts connections from clients and connects to other servers.</P>
+<P>
+We assume that the performance of an IA is constrained by available CPU
+cycles rather than network bandwidth or disk I/O (that is, CPU
+is a bottleneck resource).
+<P>
+
+<A NAME="PS">
+<H4>1.2 Performance and Scalability</H4>
+</A>
+<P>
+The <I>performance</I> of an IA is usually evaluated as its
+throughput measured in transactions per second or bytes per second (one
+can be converted to the other, given the average transaction size).  There are
+several benchmarks that can be used to measure throughput of Web serving
+applications for specific workloads (such as 
+<A HREF="http://www.spec.org/osg/web96/">SPECweb96</A>,
+<A HREF="http://www.mindcraft.com/webstone/">WebStone</A>,
+<A HREF="http://www.zdnet.com/zdbop/webbench/">WebBench</A>).
+Although there is no common definition for <I>scalability</I>, in general it
+expresses the ability of an application to sustain its performance when some
+external condition changes.  For IAs this external condition is either the
+number of clients (also known as "users," "simultaneous connections," or "load
+generators") or the underlying hardware system size (number of CPUs, memory
+size, and so on).  Thus there are two types of scalability: <I>load
+scalability</I> and <I>system scalability</I>, respectively.
+<P>
+The figure below shows how the throughput of an idealized IA changes with
+the increasing number of clients (solid blue line).  Initially the throughput
+grows linearly (the slope represents the maximal throughput that one client
+can provide). Within this initial range, the IA is underutilized and CPUs are
+partially idle.  Further increase in the number of clients leads to a system
+saturation, and the throughput gradually stops growing as all CPUs become fully
+utilized.  After that point, the throughput stays flat because there are no
+more CPU cycles available.
+In the real world, however, each simultaneous connection
+consumes some computational and memory resources, even when idle, and this
+overhead grows with the number of clients.  Therefore, the throughput of the
+real world IA starts dropping after some point (dashed blue line in the figure
+below).  The rate at which the throughput drops depends, among other things, on
+application design.
+<P>
+We say that an application has a good <I>load scalability</I> if it can
+sustain its throughput over a wide range of loads.
+Interestingly, the <A HREF="http://www.spec.org/osg/web99/">SPECweb99</A>
+benchmark somewhat reflects the Web server's load scalability because it
+measures the number of clients (load generators) given a mandatory minimal
+throughput per client (that is, it measures the server's <I>capacity</I>).
+This is unlike <A HREF="http://www.spec.org/osg/web96/">SPECweb96</A> and
+other benchmarks that use the throughput as their main metric (see the figure
+below).
+<P>
+<CENTER><IMG SRC="fig.gif" ALT="Figure: Throughput vs. Number of clients">
+</CENTER>
+<P>
+<I>System scalability</I> is the ability of an application to sustain its
+performance per hardware unit (such as a CPU) with the increasing number of
+these units.  In other words, good system scalability means that doubling the
+number of processors will roughly double the application's throughput (dashed
+green line).  We assume here that the underlying operating system also scales
+well.  Good system scalability allows you to initially run an application on 
+the smallest system possible, while retaining the ability to move that
+application to a larger system if necessary, without excessive effort or
+expense.  That is, an application need not be rewritten or even undergo a
+major porting effort when changing system size.
+<P>
+Although scalability and performance are more important in the case of server
+IAs, they should also be considered for some client applications (such as 
+benchmark load generators).
+<P>
+
+<A NAME="CONC">
+<H4>1.3 Concurrency</H4>
+</A>
+<P>
+Concurrency reflects the parallelism in a system.  The two unrelated types 
+are <I>virtual</I> concurrency and <I>real</I> concurrency.
+<UL>
+<LI>Virtual (or apparent) concurrency is the number of simultaneous
+connections that a system supports.
+<BR><BR>
+<LI>Real concurrency is the number of hardware devices, including
+CPUs, network cards, and disks, that actually allow a system to perform 
+tasks in parallel.
+</UL>
+<P>
+An IA must provide virtual concurrency in order to serve many users
+simultaneously.
+To achieve maximum performance and scalability in doing so, the number of
+programming entities than an IA creates to be scheduled by the OS kernel
+should be
+kept close to (within an order of magnitude of) the real concurrency found on
+the system. These programming entities scheduled by the kernel are known as
+<I>kernel execution vehicles</I>. Examples of kernel execution vehicles
+include Solaris lightweight processes and IRIX kernel threads.
+In other words, the number of kernel execution vehicles should be dictated by
+the system size and not by the number of simultaneous connections.
+<P>
+
+<H3>2. Existing Architectures</H3>
+<P>
+There are a few different architectures that are commonly used by IAs. 
+These include the <I>Multi-Process</I>, 
+<I>Multi-Threaded</I>, and <I>Event-Driven State Machine</I> 
+architectures.
+<P>
+<A NAME="MP">
+<H4>2.1 Multi-Process Architecture</H4>
+</A>
+<P>
+In the Multi-Process (MP) architecture, an individual process is 
+dedicated to each simultaneous connection.
+A process performs all of a transaction's initialization steps 
+and services a connection completely before moving on to service 
+a new connection.
+<P>
+User sessions in IAs are relatively independent; therefore, no 
+synchronization between processes handling different connections is
+necessary.  Because each process has its own private address space,
+this architecture is very robust. If a process serving one of the connections
+crashes, the other sessions will not be affected.  However, to serve many
+concurrent connections, an equal number of processes must be employed.
+Because processes are kernel entities (and are in fact the heaviest ones), 
+the number of kernel entities will be at least as large as the number of 
+concurrent sessions. On most systems, good performance will not be achieved 
+when more than a few hundred processes are created because of the high 
+context-switching overhead. In other words, MP applications have poor load 
+scalability.
+<P>
+On the other hand, MP applications have very good system scalability, because
+no resources are shared among different processes and there is no
+synchronization overhead.
+<P>
+The Apache Web Server 1.x (<A HREF=#refs1>[Reference 1]</A>) uses the MP 
+architecture on UNIX systems.
+<P>
+<A NAME="MT">
+<H4>2.2 Multi-Threaded Architecture</H4>
+</A>
+<P>
+In the Multi-Threaded (MT) architecture, multiple independent threads 
+of control are employed within a single shared address space.  Like a 
+process in the MP architecture, each thread performs all of a
+transaction's initialization steps and services a connection completely
+before moving on to service a new connection.
+<P>
+Many modern UNIX operating systems implement a <I>many-to-few</I> model when 
+mapping user-level threads to kernel entities.  In this model, an 
+arbitrarily large number of user-level threads is multiplexed onto a 
+lesser number of kernel execution vehicles.  Kernel execution 
+vehicles are also known as <I>virtual processors</I>.  Whenever a user-level
+thread makes a blocking system call, the kernel execution vehicle it is using
+will become blocked in the kernel.  If there are no other non-blocked kernel
+execution vehicles and there are other runnable user-level threads, a new
+kernel execution vehicle will be created automatically.  This prevents the
+application from blocking when it can continue to make useful forward
+progress.
+<P>
+Because IAs are by nature network I/O driven, all concurrent sessions block on
+network I/O at various points.  As a result, the number of virtual processors
+created in the kernel grows close to the number of user-level threads
+(or simultaneous connections).  When this occurs, the many-to-few model
+effectively degenerates to a <I>one-to-one</I> model.  Again, like in
+the MP architecture, the number of kernel execution vehicles is dictated by
+the number of simultaneous connections rather than by number of CPUs.  This
+reduces an application's load scalability.  However, because kernel threads
+(lightweight processes) use fewer resources and are more light-weight than
+traditional UNIX processes, an MT application should scale better with load
+than an MP application.
+<P>
+Unexpectedly, the small number of virtual processors sharing the same address
+space in the MT architecture destroys an application's system scalability
+because of contention among the threads on various locks.  Even if an
+application itself is carefully
+optimized to avoid lock contention around its own global data (a non-trivial
+task), there are still standard library functions and system calls
+that use common resources hidden from the application.  For example,
+on many platforms thread safety of memory allocation routines
+(<TT>malloc(3)</TT>, <TT>free(3)</TT>, and so on) is achieved by using a single
+global lock.  Another example is a per-process file descriptor table.
+This common resource table is shared by all kernel execution vehicles within
+the same process and must be protected when one modifies it via
+certain system calls (such as <TT>open(2)</TT>, <TT>close(2)</TT>, and so on).
+In addition to that, maintaining the caches coherent
+among CPUs on multiprocessor systems hurts performance when different threads
+running on different CPUs modify data items on the same cache line.
+<P>
+In order to improve load scalability, some applications employ a different
+type of MT architecture:  they create one or more thread(s) <I>per task</I>
+rather than one thread <I>per connection</I>.  For example, one small group
+of threads may be responsible for accepting client connections, another 
+for request processing, and yet another for serving responses.  The main
+advantage of this architecture is that it eliminates the tight coupling
+between the number of threads and number of simultaneous connections. However,
+in this architecture, different task-specific thread groups must share common
+work queues that must be protected by mutual exclusion locks (a typical
+producer-consumer problem).  This adds synchronization overhead that causes an
+application to perform badly on multiprocessor systems.  In other words, in
+this architecture, the application's system scalability is sacrificed for the
+sake of load scalability.
+<P>
+Of course, the usual nightmares of threaded programming, including data
+corruption, deadlocks, and race conditions, also make MT architecture (in any
+form) non-simplistic to use.
+<P>
+
+<A NAME="EDSM">
+<H4>2.3 Event-Driven State Machine Architecture</H4>
+</A>
+<P>
+In the Event-Driven State Machine (EDSM) architecture, a single process
+is employed to concurrently process multiple connections. The basics of this
+architecture are described in Comer and Stevens
+<A HREF=#refs2>[Reference 2]</A>.
+The EDSM architecture performs one basic data-driven step associated with
+a particular connection at a time, thus multiplexing many concurrent
+connections.  The process operates as a state machine that receives an event
+and then reacts to it.
+<P>
+In the idle state the EDSM calls <TT>select(2)</TT> or <TT>poll(2)</TT> to
+wait for network I/O events.  When a particular file descriptor is ready for
+I/O, the EDSM completes the corresponding basic step (usually by invoking a
+handler function) and starts the next one.  This architecture uses
+non-blocking system calls to perform asynchronous network I/O operations.
+For more details on non-blocking I/O see Stevens
+<A HREF=#refs3>[Reference 3]</A>.
+<P>
+To take advantage of hardware parallelism (real concurrency), multiple
+identical processes may be created.  This is called Symmetric Multi-Process
+EDSM and is used, for example, in the Zeus Web Server
+(<A HREF=#refs4>[Reference 4]</A>).  To more efficiently multiplex disk I/O,
+special "helper" processes may be created.  This is called Asymmetric
+Multi-Process EDSM and was proposed for Web servers by Druschel
+and others <A HREF=#refs5>[Reference 5]</A>.
+<P>
+EDSM is probably the most scalable architecture for IAs.
+Because the number of simultaneous connections (virtual concurrency) is
+completely decoupled from the number of kernel execution vehicles (processes),
+this architecture has very good load scalability.  It requires only minimal 
+user-level resources to create and maintain additional connection.
+<P>
+Like MP applications, Multi-Process EDSM has very good system scalability
+because no resources are shared among different processes and there is no
+synchronization overhead.
+<P>
+Unfortunately, the EDSM architecture is monolithic rather than based on the
+concept of threads, so new applications generally need to be implemented from
+the ground up.  In effect, the EDSM architecture simulates threads and their
+stacks the hard way.
+<P>
+
+<A NAME="ST">
+<H3>3. State Threads Library</H3>
+</A>
+<P>
+The State Threads library combines the advantages of all of the above
+architectures.  The interface preserves the programming simplicity of thread
+abstraction, allowing each simultaneous connection to be treated as a separate
+thread of execution within a single process. The underlying implementation is
+close to the EDSM architecture as the state of each particular concurrent
+session is saved in a separate memory segment.
+<P>
+
+<H4>3.1 State Changes and Scheduling</H4>
+<P>
+The state of each concurrent session includes its stack environment 
+(stack pointer, program counter, CPU registers) and its stack.  Conceptually, 
+a thread context switch can be viewed as a process changing its state.  There 
+are no kernel entities involved other than processes.  
+Unlike other general-purpose threading libraries, the State Threads library
+is fully deterministic.  The thread context switch (process state change) can
+only happen in a well-known set of functions (at I/O points or at explicit
+synchronization points).  As a result, process-specific global data does not
+have to be protected by mutual exclusion locks in most cases.  The entire
+application is free to use all the static variables and non-reentrant library
+functions it wants, greatly simplifying programming and debugging while
+increasing performance.  This is somewhat similar to a <I>co-routine</I> model
+(co-operatively multitasked threads), except that no explicit yield is needed
+--
+sooner or later, a thread performs a blocking I/O operation and thus surrenders
+control.  All threads of execution (simultaneous connections) have the
+same priority, so scheduling is non-preemptive, like in the EDSM architecture.
+Because IAs are data-driven (processing is limited by the size of network 
+buffers and data arrival rates), scheduling is non-time-slicing.
+<P>
+Only two types of external events are handled by the library's
+scheduler, because only these events can be detected by
+<TT>select(2)</TT> or <TT>poll(2)</TT>: I/O events (a file descriptor is ready
+for I/O) and time events
+(some timeout has expired).  However, other types of events (such as
+a signal sent to a process) can also be handled by converting them to I/O
+events.  For example, a signal handling function can perform a write to a pipe
+(<TT>write(2)</TT> is reentrant/asynchronous-safe), thus converting a signal
+event to an I/O event.
+<P>
+To take advantage of hardware parallelism, as in the EDSM architecture,
+multiple processes can be created in either a symmetric or asymmetric manner.
+Process management is not in the library's scope but instead is left up to the
+application.
+<P>
+There are several general-purpose threading libraries that implement a
+<I>many-to-one</I> model (many user-level threads to one kernel execution
+vehicle), using the same basic techniques as the State Threads library 
+(non-blocking I/O, event-driven scheduler, and so on).  For an example, see GNU
+Portable Threads (<A HREF=#refs6>[Reference 6]</A>).  Because they are
+general-purpose, these libraries have different objectives than the State 
+Threads library.  The State Threads library is <I>not</I> a general-purpose
+threading library,
+but rather an application library that targets only certain types of
+applications (IAs) in order to achieve the highest possible performance and
+scalability for those applications.
+<P>
+
+<H4>3.2 Scalability</H4>
+<P>
+State threads are very lightweight user-level entities, and therefore creating
+and maintaining user connections requires minimal resources.  An application
+using the State Threads library scales very well with the increasing number
+of connections.
+<P>
+On multiprocessor systems an application should create multiple processes
+to take advantage of hardware parallelism.  Using multiple separate processes
+is the <I>only</I> way to achieve the highest possible system scalability.
+This is because duplicating per-process resources is the only way to avoid
+significant synchronization overhead on multiprocessor systems.  Creating
+separate UNIX processes naturally offers resource duplication.  Again,
+as in the EDSM architecture, there is no connection between the number of
+simultaneous connections (which may be very large and changes within a wide
+range) and the number of kernel entities (which is usually small and constant).
+In other words, the State Threads library makes it possible to multiplex a
+large number of simultaneous connections onto a much smaller number of
+separate processes, thus allowing an application to scale well with both
+the load and system size.
+<P>
+
+<H4>3.3 Performance</H4>
+<P>
+Performance is one of the library's main objectives.  The State Threads
+library is implemented to minimize the number of system calls and 
+to make thread creation and context switching as fast as possible.
+For example, per-thread signal mask does not exist (unlike
+POSIX threads), so there is no need to save and restore a process's
+signal mask on every thread context switch. This eliminates two system
+calls per context switch.  Signal events can be handled much more
+efficiently by converting them to I/O events (see above).
+<P>
+
+<H4>3.4 Portability</H4>
+<P>
+The library uses the same general, underlying concepts as the EDSM 
+architecture, including non-blocking I/O, file descriptors, and 
+I/O multiplexing.  These concepts are available in some form on most 
+UNIX platforms, making the library very portable across many 
+flavors of UNIX.  There are only a few platform-dependent sections in the
+source.
+<P>
+
+<H4>3.5 State Threads and NSPR</H4>
+<P>
+The State Threads library is a derivative of the Netscape Portable 
+Runtime library (NSPR) <A HREF=#refs7>[Reference 7]</A>. The primary goal of 
+NSPR is to provide a platform-independent layer for system facilities, 
+where system facilities include threads, thread synchronization, and I/O.
+Performance and scalability are not the main concern of NSPR.  The 
+State Threads library addresses performance and scalability while 
+remaining much smaller than NSPR.  It is contained in 8 source files 
+as opposed to more than 400, but provides all the functionality that 
+is needed to write efficient IAs on UNIX-like platforms.
+<P>
+
+<TABLE CELLPADDING=3>
+<TR>
+<TD></TD>
+<TH>NSPR</TH>
+<TH>State Threads</TH>
+</TR>
+<TR>
+<TD><B>Lines of code</B></TD>
+<TD ALIGN=RIGHT>~150,000</TD>
+<TD ALIGN=RIGHT>~3000</TD>
+</TR>
+<TR>
+<TD><B>Dynamic library size&nbsp;&nbsp;<BR>(debug version)</B></TD>
+<TD></TD>
+<TD></TD>
+</TR>
+<TR>
+<TD>IRIX</TD>
+<TD ALIGN=RIGHT>~700 KB</TD>
+<TD ALIGN=RIGHT>~60 KB</TD>
+</TR>
+<TR>
+<TD>Linux</TD>
+<TD ALIGN=RIGHT>~900 KB</TD>
+<TD ALIGN=RIGHT>~70 KB</TD>
+</TR>
+</TABLE>
+<P>
+
+<H3>Conclusion</H3>
+<P>
+State Threads is an application library which provides a foundation for
+writing <A HREF=#IA>Internet Applications</A>.  To summarize, it has the
+following <I>advantages</I>:
+<P>
+<UL>
+<LI>It allows the design of fast and highly scalable applications.  An
+application will scale well with both load and number of CPUs.
+<P>
+<LI>It greatly simplifies application programming and debugging because, as a
+rule, no mutual exclusion locking is necessary and the entire application is
+free to use static variables and non-reentrant library functions.
+</UL>
+<P>
+The library's main <I>limitation</I>:
+<P>
+<UL>
+<LI>All I/O operations on sockets must use the State Thread library's I/O
+functions because only those functions perform thread scheduling and prevent
+the application's processes from blocking.
+</UL>
+<P>
+
+<H3>References</H3>
+<OL>
+<A NAME="refs1">
+<LI> Apache Software Foundation,
+<A HREF="http://www.apache.org">http://www.apache.org</A>.
+<A NAME="refs2">
+<LI> Douglas E. Comer, David L. Stevens, <I>Internetworking With TCP/IP,
+Vol. III: Client-Server Programming And Applications</I>, Second Edition,
+Ch. 8, 12.
+<A NAME="refs3">
+<LI> W. Richard Stevens, <I>UNIX Network Programming</I>, Second Edition,
+Vol. 1, Ch. 15.
+<A NAME="refs4">
+<LI> Zeus Technology Limited,
+<A HREF="http://www.zeus.co.uk/">http://www.zeus.co.uk</A>.
+<A NAME="refs5">
+<LI> Peter Druschel, Vivek S. Pai, Willy Zwaenepoel,
+<A HREF="http://www.cs.rice.edu/~druschel/usenix99flash.ps.gz">
+Flash: An Efficient and Portable Web Server</A>. In <I>Proceedings of the
+USENIX 1999 Annual Technical Conference</I>, Monterey, CA, June 1999.
+<A NAME="refs6">
+<LI> GNU Portable Threads,
+<A HREF="http://www.gnu.org/software/pth/">http://www.gnu.org/software/pth/</A>.
+<A NAME="refs7">
+<LI> Netscape Portable Runtime,
+<A HREF="http://www.mozilla.org/docs/refList/refNSPR/">http://www.mozilla.org/docs/refList/refNSPR/</A>.
+</OL>
+
+<H3>Other resources covering various architectural issues in IAs</H3>
+<OL START=8>
+<LI> Dan Kegel, <I>The C10K problem</I>,
+<A HREF="http://www.kegel.com/c10k.html">http://www.kegel.com/c10k.html</A>.
+</LI>
+<LI> James C. Hu, Douglas C. Schmidt, Irfan Pyarali, <I>JAWS: Understanding
+High Performance Web Systems</I>,
+<A HREF="http://www.cs.wustl.edu/~jxh/research/research.html">http://www.cs.wustl.edu/~jxh/research/research.html</A>.</LI>
+</OL>
+<P>
+<HR>
+<P>
+
+<CENTER><FONT SIZE=-1>Portions created by SGI are Copyright &copy; 2000
+Silicon Graphics, Inc.  All rights reserved.</FONT></CENTER>
+<P>
+
+</BODY>
+</HTML>
+
--- a/trunk/research/st-1.9/docs/timeout_heap.txt 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/docs/timeout_heap.txt 0 → 100644
查看文件 @7f4c113
+How the timeout heap works
+
+As of version 1.5, the State Threads Library represents the queue of
+sleeping threads using a heap data structure rather than a sorted
+linked list.  This improves performance when there is a large number
+of sleeping threads, since insertion into a heap takes O(log N) time
+while insertion into a sorted list takes O(N) time.  For example, in
+one test 1000 threads were created, each thread called st_usleep()
+with a random time interval, and then all the threads where
+immediately interrupted and joined before the sleeps had a chance to
+finish.  The whole process was repeated 1000 times, for a total of a
+million sleep queue insertions and removals.  With the old list-based
+sleep queue, this test took 100 seconds; now it takes only 12 seconds.
+
+Heap data structures are typically based on dynamically resized
+arrays.  However, since the existing ST code base was very nicely
+structured around linking the thread objects into pointer-based lists
+without the need for any auxiliary data structures, implementing the
+heap using a similar nodes-and-pointers based approach seemed more
+appropriate for ST than introducing a separate array.
+
+Thus, the new ST timeout heap works by organizing the existing
+_st_thread_t objects in a balanced binary tree, just as they were
+previously organized into a doubly-linked, sorted list.  The global
+_ST_SLEEPQ variable, formerly a linked list head, is now simply a
+pointer to the root of this tree, and the root node of the tree is the
+thread with the earliest timeout.  Each thread object has two child
+pointers, "left" and "right", pointing to threads with later timeouts.
+
+Each node in the tree is numbered with an integer index, corresponding
+to the array index in an array-based heap, and the tree is kept fully
+balanced and left-adjusted at all times.  In other words, the tree
+consists of any number of fully populated top levels, followed by a
+single bottom level which may be partially populated, such that any
+existing nodes form a contiguous block to the left and the spaces for
+missing nodes form a contiguous block to the right.  For example, if
+there are nine threads waiting for a timeout, they are numbered and
+arranged in a tree exactly as follows:
+
+              1
+           /     \
+          2       3
+         / \     / \
+        4   5   6   7
+       / \
+      8   9
+
+Each node has either no children, only a left child, or both a left
+and a right child.  Children always time out later than their parents
+(this is called the "heap invariant"), but when a node has two
+children, their mutual order is unspecified - the left child may time
+out before or after the right child.  If a node is numbered N, its
+left child is numbered 2N, and its right child is numbered 2N+1.
+
+There is no pointer from a child to its parent; all pointers point
+downward.  Additions and deletions both work by starting at the root
+and traversing the tree towards the leaves, going left or right
+according to the binary digits forming the index of the destination
+node.  As nodes are added or deleted, existing nodes are rearranged to
+maintain the heap invariant.
--- a/trunk/research/st-1.9/event.c 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/event.c 0 → 100644
查看文件 @7f4c113
+/* 
+ * The contents of this file are subject to the Mozilla Public
+ * License Version 1.1 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.mozilla.org/MPL/
+ * 
+ * Software distributed under the License is distributed on an "AS
+ * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * rights and limitations under the License.
+ * 
+ * The Original Code is the Netscape Portable Runtime library.
+ * 
+ * The Initial Developer of the Original Code is Netscape
+ * Communications Corporation.  Portions created by Netscape are 
+ * Copyright (C) 1994-2000 Netscape Communications Corporation.  All
+ * Rights Reserved.
+ * 
+ * Contributor(s):  Silicon Graphics, Inc.
+ *                  Yahoo! Inc.
+ *
+ * Alternatively, the contents of this file may be used under the
+ * terms of the GNU General Public License Version 2 or later (the
+ * "GPL"), in which case the provisions of the GPL are applicable 
+ * instead of those above.  If you wish to allow use of your 
+ * version of this file only under the terms of the GPL and not to
+ * allow others to use your version of this file under the MPL,
+ * indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by
+ * the GPL.  If you do not delete the provisions above, a recipient
+ * may use your version of this file under either the MPL or the
+ * GPL.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include "common.h"
+
+#ifdef MD_HAVE_KQUEUE
+#include <sys/event.h>
+#endif
+#ifdef MD_HAVE_EPOLL
+#include <sys/epoll.h>
+#endif
+
+#if defined(USE_POLL) && !defined(MD_HAVE_POLL)
+/* Force poll usage if explicitly asked for it */
+#define MD_HAVE_POLL
+#endif
+
+
+static struct _st_seldata {
+    fd_set fd_read_set, fd_write_set, fd_exception_set;
+    int fd_ref_cnts[FD_SETSIZE][3];
+    int maxfd;
+} *_st_select_data;
+
+#define _ST_SELECT_MAX_OSFD      (_st_select_data->maxfd)
+#define _ST_SELECT_READ_SET      (_st_select_data->fd_read_set)
+#define _ST_SELECT_WRITE_SET     (_st_select_data->fd_write_set)
+#define _ST_SELECT_EXCEP_SET     (_st_select_data->fd_exception_set)
+#define _ST_SELECT_READ_CNT(fd)  (_st_select_data->fd_ref_cnts[fd][0])
+#define _ST_SELECT_WRITE_CNT(fd) (_st_select_data->fd_ref_cnts[fd][1])
+#define _ST_SELECT_EXCEP_CNT(fd) (_st_select_data->fd_ref_cnts[fd][2])
+
+
+#ifdef MD_HAVE_POLL
+static struct _st_polldata {
+    struct pollfd *pollfds;
+    int pollfds_size;
+    int fdcnt;
+} *_st_poll_data;
+
+#define _ST_POLL_OSFD_CNT        (_st_poll_data->fdcnt) 
+#define _ST_POLLFDS              (_st_poll_data->pollfds) 
+#define _ST_POLLFDS_SIZE         (_st_poll_data->pollfds_size)
+#endif  /* MD_HAVE_POLL */
+
+
+#ifdef MD_HAVE_KQUEUE
+typedef struct _kq_fd_data {
+    int rd_ref_cnt;
+    int wr_ref_cnt;
+    int revents;
+} _kq_fd_data_t;
+
+static struct _st_kqdata {
+    _kq_fd_data_t *fd_data;
+    struct kevent *evtlist;
+    struct kevent *addlist;
+    struct kevent *dellist;
+    int fd_data_size;
+    int evtlist_size;
+    int addlist_size;
+    int addlist_cnt;
+    int dellist_size;
+    int dellist_cnt;
+    int kq;
+    pid_t pid;
+} *_st_kq_data;
+
+#ifndef ST_KQ_MIN_EVTLIST_SIZE
+#define ST_KQ_MIN_EVTLIST_SIZE 64
+#endif
+
+#define _ST_KQ_READ_CNT(fd)      (_st_kq_data->fd_data[fd].rd_ref_cnt)
+#define _ST_KQ_WRITE_CNT(fd)     (_st_kq_data->fd_data[fd].wr_ref_cnt)
+#define _ST_KQ_REVENTS(fd)       (_st_kq_data->fd_data[fd].revents)
+#endif  /* MD_HAVE_KQUEUE */
+
+
+#ifdef MD_HAVE_EPOLL
+typedef struct _epoll_fd_data {
+    int rd_ref_cnt;
+    int wr_ref_cnt;
+    int ex_ref_cnt;
+    int revents;
+} _epoll_fd_data_t;
+
+static struct _st_epolldata {
+    _epoll_fd_data_t *fd_data;
+    struct epoll_event *evtlist;
+    int fd_data_size;
+    int evtlist_size;
+    int evtlist_cnt;
+    int fd_hint;
+    int epfd;
+    pid_t pid;
+} *_st_epoll_data;
+
+#ifndef ST_EPOLL_EVTLIST_SIZE
+/* Not a limit, just a hint */
+#define ST_EPOLL_EVTLIST_SIZE 4096
+#endif
+
+#define _ST_EPOLL_READ_CNT(fd)   (_st_epoll_data->fd_data[fd].rd_ref_cnt)
+#define _ST_EPOLL_WRITE_CNT(fd)  (_st_epoll_data->fd_data[fd].wr_ref_cnt)
+#define _ST_EPOLL_EXCEP_CNT(fd)  (_st_epoll_data->fd_data[fd].ex_ref_cnt)
+#define _ST_EPOLL_REVENTS(fd)    (_st_epoll_data->fd_data[fd].revents)
+
+#define _ST_EPOLL_READ_BIT(fd)   (_ST_EPOLL_READ_CNT(fd) ? EPOLLIN : 0)
+#define _ST_EPOLL_WRITE_BIT(fd)  (_ST_EPOLL_WRITE_CNT(fd) ? EPOLLOUT : 0)
+#define _ST_EPOLL_EXCEP_BIT(fd)  (_ST_EPOLL_EXCEP_CNT(fd) ? EPOLLPRI : 0)
+#define _ST_EPOLL_EVENTS(fd) \
+    (_ST_EPOLL_READ_BIT(fd)|_ST_EPOLL_WRITE_BIT(fd)|_ST_EPOLL_EXCEP_BIT(fd))
+
+#endif  /* MD_HAVE_EPOLL */
+
+_st_eventsys_t *_st_eventsys = NULL;
+
+
+/*****************************************
+ * select event system
+ */
+
+ST_HIDDEN int _st_select_init(void)
+{
+    _st_select_data = (struct _st_seldata *) malloc(sizeof(*_st_select_data));
+    if (!_st_select_data)
+        return -1;
+
+    memset(_st_select_data, 0, sizeof(*_st_select_data));
+    _st_select_data->maxfd = -1;
+
+    return 0;
+}
+
+ST_HIDDEN int _st_select_pollset_add(struct pollfd *pds, int npds)
+{
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+
+    /* Do checks up front */
+    for (pd = pds; pd < epd; pd++) {
+        if (pd->fd < 0 || pd->fd >= FD_SETSIZE || !pd->events ||
+            (pd->events & ~(POLLIN | POLLOUT | POLLPRI))) {
+            errno = EINVAL;
+            return -1;
+        }
+    }
+
+    for (pd = pds; pd < epd; pd++) {
+        if (pd->events & POLLIN) {
+            FD_SET(pd->fd, &_ST_SELECT_READ_SET);
+            _ST_SELECT_READ_CNT(pd->fd)++;
+        }
+        if (pd->events & POLLOUT) {
+            FD_SET(pd->fd, &_ST_SELECT_WRITE_SET);
+            _ST_SELECT_WRITE_CNT(pd->fd)++;
+        }
+        if (pd->events & POLLPRI) {
+            FD_SET(pd->fd, &_ST_SELECT_EXCEP_SET);
+            _ST_SELECT_EXCEP_CNT(pd->fd)++;
+        }
+        if (_ST_SELECT_MAX_OSFD < pd->fd)
+            _ST_SELECT_MAX_OSFD = pd->fd;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN void _st_select_pollset_del(struct pollfd *pds, int npds)
+{
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+
+    for (pd = pds; pd < epd; pd++) {
+        if (pd->events & POLLIN) {
+            if (--_ST_SELECT_READ_CNT(pd->fd) == 0)
+                FD_CLR(pd->fd, &_ST_SELECT_READ_SET);
+        }
+        if (pd->events & POLLOUT) {
+            if (--_ST_SELECT_WRITE_CNT(pd->fd) == 0)
+                FD_CLR(pd->fd, &_ST_SELECT_WRITE_SET);
+        }
+        if (pd->events & POLLPRI) {
+            if (--_ST_SELECT_EXCEP_CNT(pd->fd) == 0)
+                FD_CLR(pd->fd, &_ST_SELECT_EXCEP_SET);
+        }
+    }
+}
+
+ST_HIDDEN void _st_select_find_bad_fd(void)
+{
+    _st_clist_t *q;
+    _st_pollq_t *pq;
+    int notify;
+    struct pollfd *pds, *epds;
+    int pq_max_osfd, osfd;
+    short events;
+
+    _ST_SELECT_MAX_OSFD = -1;
+
+    for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+        pq = _ST_POLLQUEUE_PTR(q);
+        notify = 0;
+        epds = pq->pds + pq->npds;
+        pq_max_osfd = -1;
+      
+        for (pds = pq->pds; pds < epds; pds++) {
+            osfd = pds->fd;
+            pds->revents = 0;
+            if (pds->events == 0)
+                continue;
+            if (fcntl(osfd, F_GETFL, 0) < 0) {
+                pds->revents = POLLNVAL;
+                notify = 1;
+            }
+            if (osfd > pq_max_osfd) {
+                pq_max_osfd = osfd;
+            }
+        }
+
+        if (notify) {
+            ST_REMOVE_LINK(&pq->links);
+            pq->on_ioq = 0;
+            /*
+             * Decrement the count of descriptors for each descriptor/event
+             * because this I/O request is being removed from the ioq
+             */
+            for (pds = pq->pds; pds < epds; pds++) {
+                osfd = pds->fd;
+                events = pds->events;
+                if (events & POLLIN) {
+                    if (--_ST_SELECT_READ_CNT(osfd) == 0) {
+                        FD_CLR(osfd, &_ST_SELECT_READ_SET);
+                    }
+                }
+                if (events & POLLOUT) {
+                    if (--_ST_SELECT_WRITE_CNT(osfd) == 0) {
+                        FD_CLR(osfd, &_ST_SELECT_WRITE_SET);
+                    }
+                }
+                if (events & POLLPRI) {
+                    if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) {
+                        FD_CLR(osfd, &_ST_SELECT_EXCEP_SET);
+                    }
+                }
+            }
+
+            if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
+                _ST_DEL_SLEEPQ(pq->thread);
+            pq->thread->state = _ST_ST_RUNNABLE;
+            _ST_ADD_RUNQ(pq->thread);
+        } else {
+            if (_ST_SELECT_MAX_OSFD < pq_max_osfd)
+                _ST_SELECT_MAX_OSFD = pq_max_osfd;
+        }
+    }
+}
+
+ST_HIDDEN void _st_select_dispatch(void)
+{
+    struct timeval timeout, *tvp;
+    fd_set r, w, e;
+    fd_set *rp, *wp, *ep;
+    int nfd, pq_max_osfd, osfd;
+    _st_clist_t *q;
+    st_utime_t min_timeout;
+    _st_pollq_t *pq;
+    int notify;
+    struct pollfd *pds, *epds;
+    short events, revents;
+
+    /*
+     * Assignment of fd_sets
+     */
+    r = _ST_SELECT_READ_SET;
+    w = _ST_SELECT_WRITE_SET;
+    e = _ST_SELECT_EXCEP_SET;
+
+    rp = &r;
+    wp = &w;
+    ep = &e;
+
+    if (_ST_SLEEPQ == NULL) {
+        tvp = NULL;
+    } else {
+        min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
+            (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
+        timeout.tv_sec  = (int) (min_timeout / 1000000);
+        timeout.tv_usec = (int) (min_timeout % 1000000);
+        tvp = &timeout;
+    }
+
+    /* Check for I/O operations */
+    nfd = select(_ST_SELECT_MAX_OSFD + 1, rp, wp, ep, tvp);
+
+    /* Notify threads that are associated with the selected descriptors */
+    if (nfd > 0) {
+        _ST_SELECT_MAX_OSFD = -1;
+        for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+            pq = _ST_POLLQUEUE_PTR(q);
+            notify = 0;
+            epds = pq->pds + pq->npds;
+            pq_max_osfd = -1;
+      
+            for (pds = pq->pds; pds < epds; pds++) {
+                osfd = pds->fd;
+                events = pds->events;
+                revents = 0;
+                if ((events & POLLIN) && FD_ISSET(osfd, rp)) {
+                    revents |= POLLIN;
+                }
+                if ((events & POLLOUT) && FD_ISSET(osfd, wp)) {
+                    revents |= POLLOUT;
+                }
+                if ((events & POLLPRI) && FD_ISSET(osfd, ep)) {
+                    revents |= POLLPRI;
+                }
+                pds->revents = revents;
+                if (revents) {
+                    notify = 1;
+                }
+                if (osfd > pq_max_osfd) {
+                    pq_max_osfd = osfd;
+                }
+            }
+            if (notify) {
+                ST_REMOVE_LINK(&pq->links);
+                pq->on_ioq = 0;
+                /*
+                 * Decrement the count of descriptors for each descriptor/event
+                 * because this I/O request is being removed from the ioq
+                 */
+                for (pds = pq->pds; pds < epds; pds++) {
+                    osfd = pds->fd;
+                    events = pds->events;
+                    if (events & POLLIN) {
+                        if (--_ST_SELECT_READ_CNT(osfd) == 0) {
+                            FD_CLR(osfd, &_ST_SELECT_READ_SET);
+                        }
+                    }
+                    if (events & POLLOUT) {
+                        if (--_ST_SELECT_WRITE_CNT(osfd) == 0) {
+                            FD_CLR(osfd, &_ST_SELECT_WRITE_SET);
+                        }
+                    }
+                    if (events & POLLPRI) {
+                        if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) {
+                            FD_CLR(osfd, &_ST_SELECT_EXCEP_SET);
+                        }
+                    }
+                }
+
+                if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
+                    _ST_DEL_SLEEPQ(pq->thread);
+                pq->thread->state = _ST_ST_RUNNABLE;
+                _ST_ADD_RUNQ(pq->thread);
+            } else {
+                if (_ST_SELECT_MAX_OSFD < pq_max_osfd)
+                    _ST_SELECT_MAX_OSFD = pq_max_osfd;
+            }
+        }
+    } else if (nfd < 0) {
+        /*
+         * It can happen when a thread closes file descriptor
+         * that is being used by some other thread -- BAD!
+         */
+        if (errno == EBADF)
+            _st_select_find_bad_fd();
+    }
+}
+
+ST_HIDDEN int _st_select_fd_new(int osfd)
+{
+    if (osfd >= FD_SETSIZE) {
+        errno = EMFILE;
+        return -1;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN int _st_select_fd_close(int osfd)
+{
+    if (_ST_SELECT_READ_CNT(osfd) || _ST_SELECT_WRITE_CNT(osfd) ||
+        _ST_SELECT_EXCEP_CNT(osfd)) {
+        errno = EBUSY;
+        return -1;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN int _st_select_fd_getlimit(void)
+{
+    return FD_SETSIZE;
+}
+
+static _st_eventsys_t _st_select_eventsys = {
+    "select",
+    ST_EVENTSYS_SELECT,
+    _st_select_init,
+    _st_select_dispatch,
+    _st_select_pollset_add,
+    _st_select_pollset_del,
+    _st_select_fd_new,
+    _st_select_fd_close,
+    _st_select_fd_getlimit
+};
+
+
+#ifdef MD_HAVE_POLL
+/*****************************************
+ * poll event system
+ */
+
+ST_HIDDEN int _st_poll_init(void)
+{
+    _st_poll_data = (struct _st_polldata *) malloc(sizeof(*_st_poll_data));
+    if (!_st_poll_data)
+        return -1;
+
+    _ST_POLLFDS = (struct pollfd *) malloc(ST_MIN_POLLFDS_SIZE *
+                                           sizeof(struct pollfd));
+    if (!_ST_POLLFDS) {
+        free(_st_poll_data);
+        _st_poll_data = NULL;
+        return -1;
+    }
+    _ST_POLLFDS_SIZE = ST_MIN_POLLFDS_SIZE;
+    _ST_POLL_OSFD_CNT = 0;
+
+    return 0;
+}
+
+ST_HIDDEN int _st_poll_pollset_add(struct pollfd *pds, int npds)
+{
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+
+    for (pd = pds; pd < epd; pd++) {
+        if (pd->fd < 0 || !pd->events) {
+            errno = EINVAL;
+            return -1;
+        }
+    }
+
+    _ST_POLL_OSFD_CNT += npds;
+
+    return 0;
+}
+
+/* ARGSUSED */
+ST_HIDDEN void _st_poll_pollset_del(struct pollfd *pds, int npds)
+{
+    _ST_POLL_OSFD_CNT -= npds;
+    ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
+}
+
+ST_HIDDEN void _st_poll_dispatch(void)
+{
+    int timeout, nfd;
+    _st_clist_t *q;
+    st_utime_t min_timeout;
+    _st_pollq_t *pq;
+    struct pollfd *pds, *epds, *pollfds;
+
+    /*
+     * Build up the array of struct pollfd to wait on.
+     * If existing array is not big enough, release it and allocate a new one.
+     */
+    ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
+    if (_ST_POLL_OSFD_CNT > _ST_POLLFDS_SIZE) {
+        free(_ST_POLLFDS);
+        _ST_POLLFDS = (struct pollfd *) malloc((_ST_POLL_OSFD_CNT + 10) *
+                                               sizeof(struct pollfd));
+        ST_ASSERT(_ST_POLLFDS != NULL);
+        _ST_POLLFDS_SIZE = _ST_POLL_OSFD_CNT + 10;
+    }
+    pollfds = _ST_POLLFDS;
+
+    /* Gather all descriptors into one array */
+    for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+        pq = _ST_POLLQUEUE_PTR(q);
+        memcpy(pollfds, pq->pds, sizeof(struct pollfd) * pq->npds);
+        pollfds += pq->npds;
+    }
+    ST_ASSERT(pollfds <= _ST_POLLFDS + _ST_POLLFDS_SIZE);
+
+    if (_ST_SLEEPQ == NULL) {
+        timeout = -1;
+    } else {
+        min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
+            (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
+        timeout = (int) (min_timeout / 1000);
+    }
+
+    /* Check for I/O operations */
+    nfd = poll(_ST_POLLFDS, _ST_POLL_OSFD_CNT, timeout);
+
+    /* Notify threads that are associated with the selected descriptors */
+    if (nfd > 0) {
+        pollfds = _ST_POLLFDS;
+        for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+            pq = _ST_POLLQUEUE_PTR(q);
+            epds = pollfds + pq->npds;
+            for (pds = pollfds; pds < epds; pds++) {
+                if (pds->revents)
+                    break;
+            }
+            if (pds < epds) {
+                memcpy(pq->pds, pollfds, sizeof(struct pollfd) * pq->npds);
+                ST_REMOVE_LINK(&pq->links);
+                pq->on_ioq = 0;
+
+                if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
+                    _ST_DEL_SLEEPQ(pq->thread);
+                pq->thread->state = _ST_ST_RUNNABLE;
+                _ST_ADD_RUNQ(pq->thread);
+
+                _ST_POLL_OSFD_CNT -= pq->npds;
+                ST_ASSERT(_ST_POLL_OSFD_CNT >= 0);
+            }
+            pollfds = epds;
+        }
+    }
+}
+
+/* ARGSUSED */
+ST_HIDDEN int _st_poll_fd_new(int osfd)
+{
+    return 0;
+}
+
+/* ARGSUSED */
+ST_HIDDEN int _st_poll_fd_close(int osfd)
+{
+    /*
+     * We don't maintain I/O counts for poll event system
+     * so nothing to check here.
+     */
+    return 0;
+}
+
+ST_HIDDEN int _st_poll_fd_getlimit(void)
+{
+    /* zero means no specific limit */
+    return 0;
+}
+
+static _st_eventsys_t _st_poll_eventsys = {
+    "poll",
+    ST_EVENTSYS_POLL,
+    _st_poll_init,
+    _st_poll_dispatch,
+    _st_poll_pollset_add,
+    _st_poll_pollset_del,
+    _st_poll_fd_new,
+    _st_poll_fd_close,
+    _st_poll_fd_getlimit
+};
+#endif  /* MD_HAVE_POLL */
+
+
+#ifdef MD_HAVE_KQUEUE
+/*****************************************
+ * kqueue event system
+ */
+                    
+ST_HIDDEN int _st_kq_init(void)
+{
+    int err = 0;
+    int rv = 0;
+
+    _st_kq_data = (struct _st_kqdata *) calloc(1, sizeof(*_st_kq_data));
+    if (!_st_kq_data)
+        return -1;
+
+    if ((_st_kq_data->kq = kqueue()) < 0) {
+        err = errno;
+        rv = -1;
+        goto cleanup_kq;
+    }
+    fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC);
+    _st_kq_data->pid = getpid();
+
+    /*
+     * Allocate file descriptor data array.
+     * FD_SETSIZE looks like good initial size.
+     */
+    _st_kq_data->fd_data_size = FD_SETSIZE;
+    _st_kq_data->fd_data = (_kq_fd_data_t *)calloc(_st_kq_data->fd_data_size,
+                                                   sizeof(_kq_fd_data_t));
+    if (!_st_kq_data->fd_data) {
+        err = errno;
+        rv = -1;
+        goto cleanup_kq;
+    }
+
+    /* Allocate event lists */
+    _st_kq_data->evtlist_size = ST_KQ_MIN_EVTLIST_SIZE;
+    _st_kq_data->evtlist = (struct kevent *)malloc(_st_kq_data->evtlist_size *
+                                                   sizeof(struct kevent));
+    _st_kq_data->addlist_size = ST_KQ_MIN_EVTLIST_SIZE;
+    _st_kq_data->addlist = (struct kevent *)malloc(_st_kq_data->addlist_size *
+                                                   sizeof(struct kevent));
+    _st_kq_data->dellist_size = ST_KQ_MIN_EVTLIST_SIZE;
+    _st_kq_data->dellist = (struct kevent *)malloc(_st_kq_data->dellist_size *
+                                                   sizeof(struct kevent));
+    if (!_st_kq_data->evtlist || !_st_kq_data->addlist ||
+        !_st_kq_data->dellist) {
+        err = ENOMEM;
+        rv = -1;
+    }
+
+ cleanup_kq:
+    if (rv < 0) {
+        if (_st_kq_data->kq >= 0)
+            close(_st_kq_data->kq);
+        free(_st_kq_data->fd_data);
+        free(_st_kq_data->evtlist);
+        free(_st_kq_data->addlist);
+        free(_st_kq_data->dellist);
+        free(_st_kq_data);
+        _st_kq_data = NULL;
+        errno = err;
+    }
+
+    return rv;
+}
+
+ST_HIDDEN int _st_kq_fd_data_expand(int maxfd)
+{
+    _kq_fd_data_t *ptr;
+    int n = _st_kq_data->fd_data_size;
+
+    while (maxfd >= n)
+        n <<= 1;
+
+    ptr = (_kq_fd_data_t *)realloc(_st_kq_data->fd_data,
+                                   n * sizeof(_kq_fd_data_t));
+    if (!ptr)
+        return -1;
+
+    memset(ptr + _st_kq_data->fd_data_size, 0,
+           (n - _st_kq_data->fd_data_size) * sizeof(_kq_fd_data_t));
+
+    _st_kq_data->fd_data = ptr;
+    _st_kq_data->fd_data_size = n;
+
+    return 0;
+}
+
+ST_HIDDEN int _st_kq_addlist_expand(int avail)
+{
+    struct kevent *ptr;
+    int n = _st_kq_data->addlist_size;
+
+    while (avail > n - _st_kq_data->addlist_cnt)
+        n <<= 1;
+
+    ptr = (struct kevent *)realloc(_st_kq_data->addlist,
+                                   n * sizeof(struct kevent));
+    if (!ptr)
+        return -1;
+
+    _st_kq_data->addlist = ptr;
+    _st_kq_data->addlist_size = n;
+
+    /*
+     * Try to expand the result event list too
+     * (although we don't have to do it).
+     */
+    ptr = (struct kevent *)realloc(_st_kq_data->evtlist,
+                                   n * sizeof(struct kevent));
+    if (ptr) {
+        _st_kq_data->evtlist = ptr;
+        _st_kq_data->evtlist_size = n;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN void _st_kq_addlist_add(const struct kevent *kev)
+{
+    ST_ASSERT(_st_kq_data->addlist_cnt < _st_kq_data->addlist_size);
+    memcpy(_st_kq_data->addlist + _st_kq_data->addlist_cnt, kev,
+           sizeof(struct kevent));
+    _st_kq_data->addlist_cnt++;
+}
+
+ST_HIDDEN void _st_kq_dellist_add(const struct kevent *kev)
+{
+    int n = _st_kq_data->dellist_size;
+
+    if (_st_kq_data->dellist_cnt >= n) {
+        struct kevent *ptr;
+
+        n <<= 1;
+        ptr = (struct kevent *)realloc(_st_kq_data->dellist,
+                                       n * sizeof(struct kevent));
+        if (!ptr) {
+            /* See comment in _st_kq_pollset_del() */
+            return;
+        }
+
+        _st_kq_data->dellist = ptr;
+        _st_kq_data->dellist_size = n;
+    }
+
+    memcpy(_st_kq_data->dellist + _st_kq_data->dellist_cnt, kev,
+           sizeof(struct kevent));
+    _st_kq_data->dellist_cnt++;
+}
+
+ST_HIDDEN int _st_kq_pollset_add(struct pollfd *pds, int npds)
+{
+    struct kevent kev;
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+
+    /*
+     * Pollset adding is "atomic". That is, either it succeeded for
+     * all descriptors in the set or it failed. It means that we
+     * need to do all the checks up front so we don't have to
+     * "unwind" if adding of one of the descriptors failed.
+     */
+    for (pd = pds; pd < epd; pd++) {
+        /* POLLIN and/or POLLOUT must be set, but nothing else */
+        if (pd->fd < 0 || !pd->events || (pd->events & ~(POLLIN | POLLOUT))) {
+            errno = EINVAL;
+            return -1;
+        }
+        if (pd->fd >= _st_kq_data->fd_data_size &&
+            _st_kq_fd_data_expand(pd->fd) < 0)
+            return -1;
+    }
+
+    /*
+     * Make sure we have enough room in the addlist for twice as many
+     * descriptors as in the pollset (for both READ and WRITE filters).
+     */
+    npds <<= 1;
+    if (npds > _st_kq_data->addlist_size - _st_kq_data->addlist_cnt &&
+        _st_kq_addlist_expand(npds) < 0)
+        return -1;
+
+    for (pd = pds; pd < epd; pd++) {
+        if ((pd->events & POLLIN) && (_ST_KQ_READ_CNT(pd->fd)++ == 0)) {
+            memset(&kev, 0, sizeof(kev));
+            kev.ident = pd->fd;
+            kev.filter = EVFILT_READ;
+#ifdef NOTE_EOF
+            /* Make it behave like select() and poll() */
+            kev.fflags = NOTE_EOF;
+#endif
+            kev.flags = (EV_ADD | EV_ONESHOT);
+            _st_kq_addlist_add(&kev);
+        }
+        if ((pd->events & POLLOUT) && (_ST_KQ_WRITE_CNT(pd->fd)++ == 0)) {
+            memset(&kev, 0, sizeof(kev));
+            kev.ident = pd->fd;
+            kev.filter = EVFILT_WRITE;
+            kev.flags = (EV_ADD | EV_ONESHOT);
+            _st_kq_addlist_add(&kev);
+        }
+    }
+
+    return 0;
+}
+
+ST_HIDDEN void _st_kq_pollset_del(struct pollfd *pds, int npds)
+{
+    struct kevent kev;
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+
+    /*
+     * It's OK if deleting fails because a descriptor will either be
+     * closed or fire only once (we set EV_ONESHOT flag).
+     */
+    _st_kq_data->dellist_cnt = 0;
+    for (pd = pds; pd < epd; pd++) {
+        if ((pd->events & POLLIN) && (--_ST_KQ_READ_CNT(pd->fd) == 0)) {
+            memset(&kev, 0, sizeof(kev));
+            kev.ident = pd->fd;
+            kev.filter = EVFILT_READ;
+            kev.flags = EV_DELETE;
+            _st_kq_dellist_add(&kev);
+        }
+        if ((pd->events & POLLOUT) && (--_ST_KQ_WRITE_CNT(pd->fd) == 0)) {
+            memset(&kev, 0, sizeof(kev));
+            kev.ident = pd->fd;
+            kev.filter = EVFILT_WRITE;
+            kev.flags = EV_DELETE;
+            _st_kq_dellist_add(&kev);
+        }
+    }
+
+    if (_st_kq_data->dellist_cnt > 0) {
+        /*
+         * We do "synchronous" kqueue deletes to avoid deleting
+         * closed descriptors and other possible problems.
+         */
+        int rv;
+        do {
+            /* This kevent() won't block since result list size is 0 */
+            rv = kevent(_st_kq_data->kq, _st_kq_data->dellist,
+                        _st_kq_data->dellist_cnt, NULL, 0, NULL);
+        } while (rv < 0 && errno == EINTR);
+    }
+}
+
+ST_HIDDEN void _st_kq_dispatch(void)
+{
+    struct timespec timeout, *tsp;
+    struct kevent kev;
+    st_utime_t min_timeout;
+    _st_clist_t *q;
+    _st_pollq_t *pq;
+    struct pollfd *pds, *epds;
+    int nfd, i, osfd, notify, filter;
+    short events, revents;
+
+    if (_ST_SLEEPQ == NULL) {
+        tsp = NULL;
+    } else {
+        min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
+            (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
+        timeout.tv_sec  = (time_t) (min_timeout / 1000000);
+        timeout.tv_nsec = (long) ((min_timeout % 1000000) * 1000);
+        tsp = &timeout;
+    }
+
+ retry_kevent:
+    /* Check for I/O operations */
+    nfd = kevent(_st_kq_data->kq,
+                 _st_kq_data->addlist, _st_kq_data->addlist_cnt,
+                 _st_kq_data->evtlist, _st_kq_data->evtlist_size, tsp);
+
+    _st_kq_data->addlist_cnt = 0;
+
+    if (nfd > 0) {
+        for (i = 0; i < nfd; i++) {
+            osfd = _st_kq_data->evtlist[i].ident;
+            filter = _st_kq_data->evtlist[i].filter;
+
+            if (filter == EVFILT_READ) {
+                _ST_KQ_REVENTS(osfd) |= POLLIN;
+            } else if (filter == EVFILT_WRITE) {
+                _ST_KQ_REVENTS(osfd) |= POLLOUT;
+            }
+            if (_st_kq_data->evtlist[i].flags & EV_ERROR) {
+                if (_st_kq_data->evtlist[i].data == EBADF) {
+                    _ST_KQ_REVENTS(osfd) |= POLLNVAL;
+                } else {
+                    _ST_KQ_REVENTS(osfd) |= POLLERR;
+                }
+            }
+        }
+
+        _st_kq_data->dellist_cnt = 0;
+
+        for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+            pq = _ST_POLLQUEUE_PTR(q);
+            notify = 0;
+            epds = pq->pds + pq->npds;
+                     
+            for (pds = pq->pds; pds < epds; pds++) {
+                osfd = pds->fd;
+                events = pds->events;
+                revents = (short)(_ST_KQ_REVENTS(osfd) & ~(POLLIN | POLLOUT));
+                if ((events & POLLIN) && (_ST_KQ_REVENTS(osfd) & POLLIN)) {
+                    revents |= POLLIN;
+                }
+                if ((events & POLLOUT) && (_ST_KQ_REVENTS(osfd) & POLLOUT)) {
+                    revents |= POLLOUT;
+                }
+                pds->revents = revents;
+                if (revents) {
+                    notify = 1;
+                }
+            }
+            if (notify) {
+                ST_REMOVE_LINK(&pq->links);
+                pq->on_ioq = 0;
+                for (pds = pq->pds; pds < epds; pds++) {
+                    osfd = pds->fd;
+                    events = pds->events;
+                    /*
+                     * We set EV_ONESHOT flag so we only need to delete
+                     * descriptor if it didn't fire.
+                     */
+                    if ((events & POLLIN) && (--_ST_KQ_READ_CNT(osfd) == 0) &&
+                        ((_ST_KQ_REVENTS(osfd) & POLLIN) == 0)) {
+                        memset(&kev, 0, sizeof(kev));
+                        kev.ident = osfd;
+                        kev.filter = EVFILT_READ;
+                        kev.flags = EV_DELETE;
+                        _st_kq_dellist_add(&kev);
+                    }
+                    if ((events & POLLOUT) && (--_ST_KQ_WRITE_CNT(osfd) == 0)
+                        && ((_ST_KQ_REVENTS(osfd) & POLLOUT) == 0)) {
+                        memset(&kev, 0, sizeof(kev));
+                        kev.ident = osfd;
+                        kev.filter = EVFILT_WRITE;
+                        kev.flags = EV_DELETE;
+                        _st_kq_dellist_add(&kev);
+                    }
+                }
+
+                if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
+                    _ST_DEL_SLEEPQ(pq->thread);
+                pq->thread->state = _ST_ST_RUNNABLE;
+                _ST_ADD_RUNQ(pq->thread);
+            }
+        }
+
+        if (_st_kq_data->dellist_cnt > 0) {
+            int rv;
+            do {
+                /* This kevent() won't block since result list size is 0 */
+                rv = kevent(_st_kq_data->kq, _st_kq_data->dellist,
+                            _st_kq_data->dellist_cnt, NULL, 0, NULL);
+            } while (rv < 0 && errno == EINTR);
+        }
+
+        for (i = 0; i < nfd; i++) {
+            osfd = _st_kq_data->evtlist[i].ident;
+            _ST_KQ_REVENTS(osfd) = 0;
+        }
+
+    } else if (nfd < 0) {
+        if (errno == EBADF && _st_kq_data->pid != getpid()) {
+            /* We probably forked, reinitialize kqueue */
+            if ((_st_kq_data->kq = kqueue()) < 0) {
+                /* There is nothing we can do here, will retry later */
+                return;
+            }
+            fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC);
+            _st_kq_data->pid = getpid();
+            /* Re-register all descriptors on ioq with new kqueue */
+            memset(_st_kq_data->fd_data, 0,
+                   _st_kq_data->fd_data_size * sizeof(_kq_fd_data_t));
+            for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+                pq = _ST_POLLQUEUE_PTR(q);
+                _st_kq_pollset_add(pq->pds, pq->npds);
+            }
+            goto retry_kevent;
+        }
+    }
+}
+
+ST_HIDDEN int _st_kq_fd_new(int osfd)
+{
+    if (osfd >= _st_kq_data->fd_data_size && _st_kq_fd_data_expand(osfd) < 0)
+        return -1;
+
+    return 0;
+}
+
+ST_HIDDEN int _st_kq_fd_close(int osfd)
+{
+    if (_ST_KQ_READ_CNT(osfd) || _ST_KQ_WRITE_CNT(osfd)) {
+        errno = EBUSY;
+        return -1;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN int _st_kq_fd_getlimit(void)
+{
+    /* zero means no specific limit */
+    return 0;
+}
+
+static _st_eventsys_t _st_kq_eventsys = {
+    "kqueue",
+    ST_EVENTSYS_ALT,
+    _st_kq_init,
+    _st_kq_dispatch,
+    _st_kq_pollset_add,
+    _st_kq_pollset_del,
+    _st_kq_fd_new,
+    _st_kq_fd_close,  
+    _st_kq_fd_getlimit
+};
+#endif  /* MD_HAVE_KQUEUE */
+
+
+#ifdef MD_HAVE_EPOLL
+/*****************************************
+ * epoll event system
+ */
+
+ST_HIDDEN int _st_epoll_init(void)
+{
+    int fdlim;
+    int err = 0;
+    int rv = 0;
+
+    _st_epoll_data =
+        (struct _st_epolldata *) calloc(1, sizeof(*_st_epoll_data));
+    if (!_st_epoll_data)
+        return -1;
+
+    fdlim = st_getfdlimit();
+    _st_epoll_data->fd_hint = (fdlim > 0 && fdlim < ST_EPOLL_EVTLIST_SIZE) ?
+        fdlim : ST_EPOLL_EVTLIST_SIZE;
+
+    if ((_st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint)) < 0) {
+        err = errno;
+        rv = -1;
+        goto cleanup_epoll;
+    }
+    fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC);
+    _st_epoll_data->pid = getpid();
+
+    /* Allocate file descriptor data array */
+    _st_epoll_data->fd_data_size = _st_epoll_data->fd_hint;
+    _st_epoll_data->fd_data =
+        (_epoll_fd_data_t *)calloc(_st_epoll_data->fd_data_size,
+                                   sizeof(_epoll_fd_data_t));
+    if (!_st_epoll_data->fd_data) {
+        err = errno;
+        rv = -1;
+        goto cleanup_epoll;
+    }
+
+    /* Allocate event lists */
+    _st_epoll_data->evtlist_size = _st_epoll_data->fd_hint;
+    _st_epoll_data->evtlist =
+        (struct epoll_event *)malloc(_st_epoll_data->evtlist_size *
+                                     sizeof(struct epoll_event));
+    if (!_st_epoll_data->evtlist) {
+        err = errno;
+        rv = -1;
+    }
+
+ cleanup_epoll:
+    if (rv < 0) {
+        if (_st_epoll_data->epfd >= 0)
+            close(_st_epoll_data->epfd);
+        free(_st_epoll_data->fd_data);
+        free(_st_epoll_data->evtlist);
+        free(_st_epoll_data);
+        _st_epoll_data = NULL;
+        errno = err;
+    }
+
+    return rv;
+}
+
+ST_HIDDEN int _st_epoll_fd_data_expand(int maxfd)
+{
+    _epoll_fd_data_t *ptr;
+    int n = _st_epoll_data->fd_data_size;
+
+    while (maxfd >= n)
+        n <<= 1;
+
+    ptr = (_epoll_fd_data_t *)realloc(_st_epoll_data->fd_data,
+                                      n * sizeof(_epoll_fd_data_t));
+    if (!ptr)
+        return -1;
+
+    memset(ptr + _st_epoll_data->fd_data_size, 0,
+           (n - _st_epoll_data->fd_data_size) * sizeof(_epoll_fd_data_t));
+
+    _st_epoll_data->fd_data = ptr;
+    _st_epoll_data->fd_data_size = n;
+
+    return 0;
+}
+
+ST_HIDDEN void _st_epoll_evtlist_expand(void)
+{
+    struct epoll_event *ptr;
+    int n = _st_epoll_data->evtlist_size;
+
+    while (_st_epoll_data->evtlist_cnt > n)
+        n <<= 1;
+
+    ptr = (struct epoll_event *)realloc(_st_epoll_data->evtlist,
+                                        n * sizeof(struct epoll_event));
+    if (ptr) {
+        _st_epoll_data->evtlist = ptr;
+        _st_epoll_data->evtlist_size = n;
+    }
+}
+
+ST_HIDDEN void _st_epoll_pollset_del(struct pollfd *pds, int npds)
+{
+    struct epoll_event ev;
+    struct pollfd *pd;
+    struct pollfd *epd = pds + npds;
+    int old_events, events, op;
+
+    /*
+     * It's more or less OK if deleting fails because a descriptor
+     * will either be closed or deleted in dispatch function after
+     * it fires.
+     */
+    for (pd = pds; pd < epd; pd++) {
+        old_events = _ST_EPOLL_EVENTS(pd->fd);
+
+        if (pd->events & POLLIN)
+            _ST_EPOLL_READ_CNT(pd->fd)--;
+        if (pd->events & POLLOUT)
+            _ST_EPOLL_WRITE_CNT(pd->fd)--;
+        if (pd->events & POLLPRI)
+            _ST_EPOLL_EXCEP_CNT(pd->fd)--;
+
+        events = _ST_EPOLL_EVENTS(pd->fd);
+        /*
+         * The _ST_EPOLL_REVENTS check below is needed so we can use
+         * this function inside dispatch(). Outside of dispatch()
+         * _ST_EPOLL_REVENTS is always zero for all descriptors.
+         */
+        if (events != old_events && _ST_EPOLL_REVENTS(pd->fd) == 0) {
+            op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
+            ev.events = events;
+            ev.data.fd = pd->fd;
+            if (epoll_ctl(_st_epoll_data->epfd, op, pd->fd, &ev) == 0 &&
+                op == EPOLL_CTL_DEL) {
+                _st_epoll_data->evtlist_cnt--;
+            }
+        }
+    }
+}
+
+ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds)
+{
+    struct epoll_event ev;
+    int i, fd;
+    int old_events, events, op;
+
+    /* Do as many checks as possible up front */
+    for (i = 0; i < npds; i++) {
+        fd = pds[i].fd;
+        if (fd < 0 || !pds[i].events ||
+            (pds[i].events & ~(POLLIN | POLLOUT | POLLPRI))) {
+            errno = EINVAL;
+            return -1;
+        }
+        if (fd >= _st_epoll_data->fd_data_size &&
+            _st_epoll_fd_data_expand(fd) < 0)
+            return -1;
+    }
+
+    for (i = 0; i < npds; i++) {
+        fd = pds[i].fd;
+        old_events = _ST_EPOLL_EVENTS(fd);
+
+        if (pds[i].events & POLLIN)
+            _ST_EPOLL_READ_CNT(fd)++;
+        if (pds[i].events & POLLOUT)
+            _ST_EPOLL_WRITE_CNT(fd)++;
+        if (pds[i].events & POLLPRI)
+            _ST_EPOLL_EXCEP_CNT(fd)++;
+
+        events = _ST_EPOLL_EVENTS(fd);
+        if (events != old_events) {
+            op = old_events ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+            ev.events = events;
+            ev.data.fd = fd;
+            if (epoll_ctl(_st_epoll_data->epfd, op, fd, &ev) < 0 &&
+                (op != EPOLL_CTL_ADD || errno != EEXIST))
+                break;
+            if (op == EPOLL_CTL_ADD) {
+                _st_epoll_data->evtlist_cnt++;
+                if (_st_epoll_data->evtlist_cnt > _st_epoll_data->evtlist_size)
+                    _st_epoll_evtlist_expand();
+            }
+        }
+    }
+
+    if (i < npds) {
+        /* Error */
+        int err = errno;
+        /* Unroll the state */
+        _st_epoll_pollset_del(pds, i + 1);
+        errno = err;
+        return -1;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN void _st_epoll_dispatch(void)
+{
+    st_utime_t min_timeout;
+    _st_clist_t *q;
+    _st_pollq_t *pq;
+    struct pollfd *pds, *epds;
+    struct epoll_event ev;
+    int timeout, nfd, i, osfd, notify;
+    int events, op;
+    short revents;
+
+    if (_ST_SLEEPQ == NULL) {
+        timeout = -1;
+    } else {
+        min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 :
+            (_ST_SLEEPQ->due - _ST_LAST_CLOCK);
+        timeout = (int) (min_timeout / 1000);
+    }
+
+    if (_st_epoll_data->pid != getpid()) {
+        /* We probably forked, reinitialize epoll set */
+        close(_st_epoll_data->epfd);
+        _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint);
+        if (_st_epoll_data->epfd < 0) {
+            /* There is nothing we can do here, will retry later */
+            return;
+        }
+        fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC);
+        _st_epoll_data->pid = getpid();
+
+        /* Put all descriptors on ioq into new epoll set */
+        memset(_st_epoll_data->fd_data, 0,
+               _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t));
+        _st_epoll_data->evtlist_cnt = 0;
+        for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+            pq = _ST_POLLQUEUE_PTR(q);
+            _st_epoll_pollset_add(pq->pds, pq->npds);
+        }
+    }
+
+    /* Check for I/O operations */
+    nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist,
+                     _st_epoll_data->evtlist_size, timeout);
+
+    if (nfd > 0) {
+        for (i = 0; i < nfd; i++) {
+            osfd = _st_epoll_data->evtlist[i].data.fd;
+            _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events;
+            if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) {
+                /* Also set I/O bits on error */
+                _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd);
+            }
+        }
+
+        for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) {
+            pq = _ST_POLLQUEUE_PTR(q);
+            notify = 0;
+            epds = pq->pds + pq->npds;
+
+            for (pds = pq->pds; pds < epds; pds++) {
+                if (_ST_EPOLL_REVENTS(pds->fd) == 0) {
+                    pds->revents = 0;
+                    continue;
+                }
+                osfd = pds->fd;
+                events = pds->events;
+                revents = 0;
+                if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN))
+                    revents |= POLLIN;
+                if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT))
+                    revents |= POLLOUT;
+                if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI))
+                    revents |= POLLPRI;
+                if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR)
+                    revents |= POLLERR;
+                if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP)
+                    revents |= POLLHUP;
+
+                pds->revents = revents;
+                if (revents) {
+                    notify = 1;
+                }
+            }
+            if (notify) {
+                ST_REMOVE_LINK(&pq->links);
+                pq->on_ioq = 0;
+                /*
+                 * Here we will only delete/modify descriptors that
+                 * didn't fire (see comments in _st_epoll_pollset_del()).
+                 */
+                _st_epoll_pollset_del(pq->pds, pq->npds);
+
+                if (pq->thread->flags & _ST_FL_ON_SLEEPQ)
+                    _ST_DEL_SLEEPQ(pq->thread);
+                pq->thread->state = _ST_ST_RUNNABLE;
+                _ST_ADD_RUNQ(pq->thread);
+            }
+        }
+
+        for (i = 0; i < nfd; i++) {
+            /* Delete/modify descriptors that fired */
+            osfd = _st_epoll_data->evtlist[i].data.fd;
+            _ST_EPOLL_REVENTS(osfd) = 0;
+            events = _ST_EPOLL_EVENTS(osfd);
+            op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
+            ev.events = events;
+            ev.data.fd = osfd;
+            if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 &&
+                op == EPOLL_CTL_DEL) {
+                _st_epoll_data->evtlist_cnt--;
+            }
+        }
+    }
+}
+
+ST_HIDDEN int _st_epoll_fd_new(int osfd)
+{
+    if (osfd >= _st_epoll_data->fd_data_size &&
+        _st_epoll_fd_data_expand(osfd) < 0)
+        return -1;
+
+    return 0;   
+}
+
+ST_HIDDEN int _st_epoll_fd_close(int osfd)
+{
+    if (_ST_EPOLL_READ_CNT(osfd) || _ST_EPOLL_WRITE_CNT(osfd) ||
+        _ST_EPOLL_EXCEP_CNT(osfd)) {
+        errno = EBUSY;
+        return -1;
+    }
+
+    return 0;
+}
+
+ST_HIDDEN int _st_epoll_fd_getlimit(void)
+{
+    /* zero means no specific limit */
+    return 0;
+}
+
+/*
+ * Check if epoll functions are just stubs.
+ */
+ST_HIDDEN int _st_epoll_is_supported(void)
+{
+    struct epoll_event ev;
+
+    ev.events = EPOLLIN;
+    ev.data.ptr = NULL;
+    /* Guaranteed to fail */
+    epoll_ctl(-1, EPOLL_CTL_ADD, -1, &ev);
+
+    return (errno != ENOSYS);
+}
+
+static _st_eventsys_t _st_epoll_eventsys = {
+    "epoll",
+    ST_EVENTSYS_ALT,
+    _st_epoll_init,
+    _st_epoll_dispatch,
+    _st_epoll_pollset_add,
+    _st_epoll_pollset_del,
+    _st_epoll_fd_new,
+    _st_epoll_fd_close,
+    _st_epoll_fd_getlimit
+};
+#endif  /* MD_HAVE_EPOLL */
+
+
+/*****************************************
+ * Public functions
+ */
+
+int st_set_eventsys(int eventsys)
+{
+    if (_st_eventsys) {
+        errno = EBUSY;
+        return -1;
+    }
+
+    switch (eventsys) {
+    case ST_EVENTSYS_DEFAULT:
+#ifdef USE_POLL
+        _st_eventsys = &_st_poll_eventsys;
+#else
+        _st_eventsys = &_st_select_eventsys;
+#endif
+        break;
+    case ST_EVENTSYS_SELECT:
+        _st_eventsys = &_st_select_eventsys;
+        break;
+#ifdef MD_HAVE_POLL
+    case ST_EVENTSYS_POLL:
+        _st_eventsys = &_st_poll_eventsys;
+        break;
+#endif
+    case ST_EVENTSYS_ALT:
+#if defined (MD_HAVE_KQUEUE)
+        _st_eventsys = &_st_kq_eventsys;
+#elif defined (MD_HAVE_EPOLL)
+        if (_st_epoll_is_supported())
+            _st_eventsys = &_st_epoll_eventsys;
+#endif
+        break;
+    default:
+        errno = EINVAL;
+        return -1;
+    }
+
+    return 0;
+}
+
+int st_get_eventsys(void)
+{
+    return _st_eventsys ? _st_eventsys->val : -1;
+}
+
+const char *st_get_eventsys_name(void)
+{
+    return _st_eventsys ? _st_eventsys->name : "";
+}
+
--- a/trunk/research/st-1.9/examples/Makefile 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/examples/Makefile 0 → 100644
查看文件 @7f4c113
+#
+# Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met: 
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer. 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+# 3. Neither the name of Silicon Graphics, Inc. nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission. 
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+##########################
+# Supported OSes:
+#
+# AIX
+# FREEBSD
+# HPUX
+# HPUX_64
+# IRIX
+# IRIX_64
+# LINUX
+# LINUX_IA64
+# NETBSD
+# OPENBSD
+# OSF1
+# SOLARIS
+# SOLARIS_64
+
+##########################
+
+CC          = cc
+
+SHELL       = /bin/sh
+ECHO        = /bin/echo
+
+DEPTH       = ..
+BUILD       =
+TARGETDIR   =
+
+DEFINES     =
+CFLAGS      =
+OTHER_FLAGS =
+
+OBJDIR      = $(DEPTH)/$(TARGETDIR)
+INCDIR      = $(DEPTH)/$(TARGETDIR)
+LIBST       = $(OBJDIR)/libst.a
+HEADER      = $(INCDIR)/st.h
+
+LIBRESOLV   =
+EXTRALIBS   =
+
+ifeq ($(OS),)
+EXAMPLES    = unknown
+else
+EXAMPLES    = $(OBJDIR)/lookupdns $(OBJDIR)/proxy $(OBJDIR)/server
+endif
+
+
+##########################
+# Platform section.
+#
+
+ifeq (DARWIN, $(findstring DARWIN, $(OS)))
+LIBRESOLV   = -lresolv
+endif
+
+ifeq (LINUX, $(findstring LINUX, $(OS)))
+LIBRESOLV   = -lresolv
+endif
+
+ifeq (SOLARIS, $(findstring SOLARIS, $(OS)))
+LIBRESOLV   = -lresolv
+EXTRALIBS   = -lsocket -lnsl
+endif
+
+#
+# End of platform section.
+##########################
+
+
+all: $(EXAMPLES)
+
+$(OBJDIR)/lookupdns: lookupdns.c $(OBJDIR)/res.o $(LIBST) $(HEADER)
+	$(CC) $(CFLAGS) -I$(INCDIR) lookupdns.c $(OBJDIR)/res.o $(LIBST) $(LIBRESOLV) $(EXTRALIBS) -o $@
+
+$(OBJDIR)/proxy: proxy.c $(LIBST) $(HEADER)
+	$(CC) $(CFLAGS) -I$(INCDIR) proxy.c $(LIBST) $(EXTRALIBS) -o $@
+
+$(OBJDIR)/server: server.c $(OBJDIR)/error.o $(LIBST) $(HEADER)
+	$(CC) $(CFLAGS) -I$(INCDIR) server.c $(OBJDIR)/error.o $(LIBST) $(EXTRALIBS) -o $@
+
+$(OBJDIR)/%.o: %.c
+	$(CC) $(CFLAGS) -I$(INCDIR) -c $< -o $@
+
+.DEFAULT:
+	@cd $(DEPTH); $(MAKE) $@
+
--- a/trunk/research/st-1.9/examples/README 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/examples/README 0 → 100644
查看文件 @7f4c113
+Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+All Rights Reserved.
+
+
+This directory contains three example programs.
+
+
+---------------------------------------------------------------------------
+
+PROGRAM
+
+    lookupdns
+
+FILES
+
+    lookupdns.c
+    res.c
+
+USAGE
+
+    lookupdns <hostname1> [<hostname2>] ...
+
+DESCRIPTION
+
+    This program performs asynchronous DNS host name resolution and reports
+    IP address for each <hostname> specified as a command line argument.
+    One ST thread is created for each host name.  All threads do host name
+    resolution concurrently.
+
+
+---------------------------------------------------------------------------
+
+PROGRAM
+
+    proxy
+
+FILES
+
+    proxy.c
+
+USAGE
+
+    proxy -l <local_addr> -r <remote_addr> [-p <num_processes>] [-S]
+
+    -l <local_addr>      bind to local address specified as [<host>]:<port>
+    -r <remote_addr>     connect to remote address specified as <host>:<port>
+    -p <num_processes>   create specified number of processes
+    -S                   serialize accept() calls from different processes
+                         on the same listening socket (if needed).
+
+DESCRIPTION
+
+    This program acts as a generic gateway.  It listens for connections to a
+    local address.  Upon accepting a client connection, it connects to the
+    specified remote address and then just pumps the data through without any
+    modification.
+
+
+---------------------------------------------------------------------------
+
+PROGRAM
+
+    server
+
+FILES
+
+    server.c
+    error.c
+
+USAGE
+
+    server -l <log_directory> [<options>]
+
+    -l <log_directory>      open all log files in specified directory.
+
+    Possible options:
+
+    -b <host>:<port>        bind to specified address (multiple addresses
+                            are permitted)
+    -p <num_processes>      create specified number of processes
+    -t <min_thr>:<max_thr>  specify thread limits per listening socket
+                            across all processes
+    -u <user>               change server's user id to specified value
+    -q <backlog>            set max length of pending connections queue
+    -a                      enable access logging
+    -i                      run in interactive mode (useful for debugging)
+    -S                      serialize accept() calls from different processes
+                            on the same listening socket (if needed).
+
+DESCRIPTION
+
+    This program is a general server example.  It accepts a client connection
+    and outputs a short HTML page.  It can be easily adapted to provide
+    other services.
+
+
+---------------------------------------------------------------------------
+
--- a/trunk/research/st-1.9/examples/error.c 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/examples/error.c 0 → 100644
查看文件 @7f4c113
+/*
+ * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met: 
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Silicon Graphics, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include "st.h"
+
+/*
+ * Simple error reporting functions.
+ * Suggested in W. Richard Stevens' "Advanced Programming in UNIX
+ * Environment".
+ */
+
+#define MAXLINE 4096  /* max line length */
+
+static void err_doit(int, int, const char *, va_list);
+
+
+/*
+ * Nonfatal error related to a system call.
+ * Print a message and return.
+ */
+void err_sys_report(int fd, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start(ap, fmt);
+  err_doit(fd, 1, fmt, ap);
+  va_end(ap);
+}
+
+
+/*
+ * Fatal error related to a system call.
+ * Print a message and terminate.
+ */
+void err_sys_quit(int fd, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start(ap, fmt);
+  err_doit(fd, 1, fmt, ap);
+  va_end(ap);
+  exit(1);
+}
+
+
+/*
+ * Fatal error related to a system call.
+ * Print a message, dump core, and terminate.
+ */
+void err_sys_dump(int fd, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start(ap, fmt);
+  err_doit(fd, 1, fmt, ap);
+  va_end(ap);
+  abort();  /* dump core and terminate */
+  exit(1);  /* shouldn't get here */
+}
+
+
+/*
+ * Nonfatal error unrelated to a system call.
+ * Print a message and return.
+ */
+void err_report(int fd, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start(ap, fmt);
+  err_doit(fd, 0, fmt, ap);
+  va_end(ap);
+}
+
+
+/*
+ * Fatal error unrelated to a system call.
+ * Print a message and terminate.
+ */
+void err_quit(int fd, const char *fmt, ...)
+{
+  va_list ap;
+
+  va_start(ap, fmt);
+  err_doit(fd, 0, fmt, ap);
+  va_end(ap);
+  exit(1);
+}
+
+
+/*
+ * Return a pointer to a string containing current time.
+ */
+char *err_tstamp(void)
+{
+  static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+  static char str[32];
+  static time_t lastt = 0;
+  struct tm *tmp;
+  time_t currt = st_time();
+
+  if (currt == lastt)
+    return str;
+
+  tmp = localtime(&currt);
+  sprintf(str, "[%02d/%s/%d:%02d:%02d:%02d] ", tmp->tm_mday,
+          months[tmp->tm_mon], 1900 + tmp->tm_year, tmp->tm_hour,
+          tmp->tm_min, tmp->tm_sec);
+  lastt = currt;
+
+  return str;
+}
+
+
+/*
+ * Print a message and return to caller.
+ * Caller specifies "errnoflag".
+ */
+static void err_doit(int fd, int errnoflag, const char *fmt, va_list ap)
+{
+  int errno_save;
+  char buf[MAXLINE];
+
+  errno_save = errno;         /* value caller might want printed   */
+  strcpy(buf, err_tstamp());  /* prepend a message with time stamp */
+  vsprintf(buf + strlen(buf), fmt, ap);
+  if (errnoflag)
+    sprintf(buf + strlen(buf), ": %s\n", strerror(errno_save));
+  else
+    strcat(buf, "\n");
+  write(fd, buf, strlen(buf));
+  errno = errno_save;
+}
+
--- a/trunk/research/st-1.9/examples/lookupdns.c 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/examples/lookupdns.c 0 → 100644
查看文件 @7f4c113
+/*
+ * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met: 
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Silicon Graphics, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include "st.h"
+
+#if !defined(NETDB_INTERNAL) && defined(h_NETDB_INTERNAL)
+#define NETDB_INTERNAL h_NETDB_INTERNAL
+#endif
+
+/* Resolution timeout (in microseconds) */
+#define TIMEOUT (2*1000000LL)
+
+/* External function defined in the res.c file */
+int dns_getaddr(const char *host, struct in_addr *addr, st_utime_t timeout);
+
+
+void *do_resolve(void *host)
+{
+  struct in_addr addr;
+
+  /* Use dns_getaddr() instead of gethostbyname(3) to get IP address */
+  if (dns_getaddr(host, &addr, TIMEOUT) < 0) {
+    fprintf(stderr, "dns_getaddr: can't resolve %s: ", (char *)host);
+    if (h_errno == NETDB_INTERNAL)
+      perror("");
+    else
+      herror("");
+  } else
+    printf("%-40s %s\n", (char *)host, inet_ntoa(addr));
+
+  return NULL;
+}
+
+
+/*
+ * Asynchronous DNS host name resolution. This program creates one
+ * ST thread for each host name (specified as command line arguments).
+ * All threads do host name resolution concurrently.
+ */
+int main(int argc, char *argv[])
+{
+  int i;
+
+  if (argc < 2) {
+    fprintf(stderr, "Usage: %s <hostname1> [<hostname2>] ...\n", argv[0]);
+    exit(1);
+  }
+
+  if (st_init() < 0) {
+    perror("st_init");
+    exit(1);
+  }
+
+  for (i = 1; i < argc; i++) {
+    /* Create a separate thread for each host name */
+    if (st_thread_create(do_resolve, argv[i], 0, 0) == NULL) {
+      perror("st_thread_create");
+      exit(1);
+    }
+  }
+
+  st_thread_exit(NULL);
+
+  /* NOTREACHED */
+  return 1;
+}
+
--- a/trunk/research/st-1.9/examples/proxy.c 0 → 100644
查看文件 @7f4c113
+++ b/trunk/research/st-1.9/examples/proxy.c 0 → 100644
查看文件 @7f4c113
+/*
+ * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met: 
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Silicon Graphics, Inc. nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include "st.h"
+
+#define IOBUFSIZE (16*1024)
+
+#define IOV_LEN   256
+#define IOV_COUNT (IOBUFSIZE / IOV_LEN)
+
+#ifndef INADDR_NONE
+#define INADDR_NONE 0xffffffff
+#endif
+
+static char *prog;                     /* Program name   */
+static struct sockaddr_in rmt_addr;    /* Remote address */
+
+static unsigned long testing;
+#define TESTING_VERBOSE		0x1
+#define TESTING_READV		0x2
+#define	TESTING_READ_RESID	0x4
+#define TESTING_WRITEV		0x8
+#define TESTING_WRITE_RESID	0x10
+
+static void read_address(const char *str, struct sockaddr_in *sin);
+static void start_daemon(void);
+static int  cpu_count(void);
+static void set_concurrency(int nproc);
+static void *handle_request(void *arg);
+static void print_sys_error(const char *msg);
+
+
+/*
+ * This program acts as a generic gateway. It listens for connections
+ * to a local address ('-l' option). Upon accepting a client connection,
+ * it connects to the specified remote address ('-r' option) and then
+ * just pumps the data through without any modification.
+ */
+int main(int argc, char *argv[])
+{
+  extern char *optarg;
+  int opt, sock, n;
+  int laddr, raddr, num_procs, alt_ev, one_process;
+  int serialize_accept = 0;
+  struct sockaddr_in lcl_addr, cli_addr;
+  st_netfd_t cli_nfd, srv_nfd;
+
+  prog = argv[0];
+  num_procs = laddr = raddr = alt_ev = one_process = 0;
+
+  /* Parse arguments */
+  while((opt = getopt(argc, argv, "l:r:p:Saht:X")) != EOF) {
+    switch (opt) {
+    case 'a':
+      alt_ev = 1;
+      break;
+    case 'l':
+      read_address(optarg, &lcl_addr);
+      laddr = 1;
+      break;
+    case 'r':
+      read_address(optarg, &rmt_addr);
+      if (rmt_addr.sin_addr.s_addr == INADDR_ANY) {
+	fprintf(stderr, "%s: invalid remote address: %s\n", prog, optarg);
+	exit(1);
+      }
+      raddr = 1;
+      break;
+    case 'p':
+      num_procs = atoi(optarg);
+      if (num_procs < 1) {
+	fprintf(stderr, "%s: invalid number of processes: %s\n", prog, optarg);
+	exit(1);
+      }
+      break;
+    case 'S':
+      /*
+       * Serialization decision is tricky on some platforms. For example,
+       * Solaris 2.6 and above has kernel sockets implementation, so supposedly
+       * there is no need for serialization. The ST library may be compiled
+       * on one OS version, but used on another, so the need for serialization
+       * should be determined at run time by the application. Since it's just
+       * an example, the serialization decision is left up to user.
+       * Only on platforms where the serialization is never needed on any OS
+       * version st_netfd_serialize_accept() is a no-op.
+       */
+      serialize_accept = 1;
+      break;
+    case 't':
+      testing = strtoul(optarg, NULL, 0);
+      break;
+    case 'X':
+      one_process = 1;
+      break;
+    case 'h':
+    case '?':
+      fprintf(stderr, "Usage: %s [options] -l <[host]:port> -r <host:port>\n",
+       prog);
+      fprintf(stderr, "options are:\n");
+      fprintf(stderr, "  -p <num_processes>	number of parallel processes\n");
+      fprintf(stderr, "  -S			serialize accepts\n");
+      fprintf(stderr, "  -a			use alternate event system\n");
+#ifdef DEBUG
+      fprintf(stderr, "  -t mask		testing/debugging mode\n");
+      fprintf(stderr, "  -X			one process, don't daemonize\n");
+#endif
+      exit(1);
+    }
+  }
+  if (!laddr) {
+    fprintf(stderr, "%s: local address required\n", prog);
+    exit(1);
+  }
+  if (!raddr) {
+    fprintf(stderr, "%s: remote address required\n", prog);
+    exit(1);
+  }
+  if (num_procs == 0)
+    num_procs = cpu_count();
+
+  fprintf(stderr, "%s: starting proxy daemon on %s:%d\n", prog,
+	  inet_ntoa(lcl_addr.sin_addr), ntohs(lcl_addr.sin_port));
+
+  /* Start the daemon */
+  if (one_process)
+    num_procs = 1;
+  else
+    start_daemon();
+
+  if (alt_ev)
+    st_set_eventsys(ST_EVENTSYS_ALT);
+
+  /* Initialize the ST library */
+  if (st_init() < 0) {
+    print_sys_error("st_init");
+    exit(1);
+  }
+
+  /* Create and bind listening socket */
+  if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
+    print_sys_error("socket");
+    exit(1);
+  }
+  n = 1;
+  if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n)) < 0) {
+    print_sys_error("setsockopt");
+    exit(1);
+  }
+  if (bind(sock, (struct sockaddr *)&lcl_addr, sizeof(lcl_addr)) < 0) {
+    print_sys_error("bind");
+    exit(1);
+  }
+  listen(sock, 128);
+  if ((srv_nfd = st_netfd_open_socket(sock)) == NULL) {
+    print_sys_error("st_netfd_open");
+    exit(1);
+  }
+  /* See the comment regarding serialization decision above */
+  if (num_procs > 1 && serialize_accept && st_netfd_serialize_accept(srv_nfd)
+      < 0) {
+    print_sys_error("st_netfd_serialize_accept");
+    exit(1);
+  }
+
+  /* Start server processes */
+  if (!one_process)
+    set_concurrency(num_procs);
+
+  for ( ; ; ) {
+    n = sizeof(cli_addr);
+    cli_nfd = st_accept(srv_nfd, (struct sockaddr *)&cli_addr, &n,
+     ST_UTIME_NO_TIMEOUT);
+    if (cli_nfd == NULL) {
+      print_sys_error("st_accept");
+      exit(1);
+    }
+    if (st_thread_create(handle_request, cli_nfd, 0, 0) == NULL) {
+      print_sys_error("st_thread_create");
+      exit(1);
+    }
+  }
+
+  /* NOTREACHED */
+  return 1;
+}
+
+
+static void read_address(const char *str, struct sockaddr_in *sin)
+{
+  char host[128], *p;
+  struct hostent *hp;
+  unsigned short port;
+
+  strcpy(host, str);
+  if ((p = strchr(host, ':')) == NULL) {
+    fprintf(stderr, "%s: invalid address: %s\n", prog, host);
+    exit(1);
+  }
+  *p++ = '\0';
+  port = (unsigned short) atoi(p);
+  if (port < 1) {
+    fprintf(stderr, "%s: invalid port: %s\n", prog, p);
+    exit(1);
+  }
+
+  memset(sin, 0, sizeof(struct sockaddr_in));
+  sin->sin_family = AF_INET;
+  sin->sin_port = htons(port);
+  if (host[0] == '\0') {
+    sin->sin_addr.s_addr = INADDR_ANY;
+    return;
+  }
+  sin->sin_addr.s_addr = inet_addr(host);
+  if (sin->sin_addr.s_addr == INADDR_NONE) {
+    /* not dotted-decimal */
+    if ((hp = gethostbyname(host)) == NULL) {
+      fprintf(stderr, "%s: can't resolve address: %s\n", prog, host);
+      exit(1);
+    }
+    memcpy(&sin->sin_addr, hp->h_addr, hp->h_length);
+  }
+}
+
+#ifdef DEBUG
+static void show_iov(const struct iovec *iov, int niov)
+{
+  int i;
+  size_t total;
+
+  printf("iov %p has %d entries:\n", iov, niov);
+  total = 0;
+  for (i = 0; i < niov; i++) {
+    printf("iov[%3d] iov_base=%p iov_len=0x%lx(%lu)\n",
+     i, iov[i].iov_base, (unsigned long) iov[i].iov_len,
+     (unsigned long) iov[i].iov_len);
+    total += iov[i].iov_len;
+  }
+  printf("total 0x%lx(%ld)\n", (unsigned long) total, (unsigned long) total);
+}
+
+/*
+ * This version is tricked out to test all the
+ * st_(read|write)v?(_resid)? variants.  Use the non-DEBUG version for
+ * anything serious.  st_(read|write) are all this function really
+ * needs.
+ */
+static int pass(st_netfd_t in, st_netfd_t out)
+{
+  char buf[IOBUFSIZE];
+  struct iovec iov[IOV_COUNT];
+  int ioviter, nw, nr;
+
+  if (testing & TESTING_READV) {
+    for (ioviter = 0; ioviter < IOV_COUNT; ioviter++) {
+      iov[ioviter].iov_base = &buf[ioviter * IOV_LEN];
+      iov[ioviter].iov_len = IOV_LEN;
+    }
+    if (testing & TESTING_VERBOSE) {
+      printf("readv(%p)...\n", in);
+      show_iov(iov, IOV_COUNT);
+    }
+    if (testing & TESTING_READ_RESID) {
+      struct iovec *riov = iov;
+      int riov_cnt = IOV_COUNT;
+      if (st_readv_resid(in, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) {
+	if (testing & TESTING_VERBOSE) {
+	  printf("resid\n");
+	  show_iov(riov, riov_cnt);
+	  printf("full\n");
+	  show_iov(iov, IOV_COUNT);
+	}
+	nr = 0;
+	for (ioviter = 0; ioviter < IOV_COUNT; ioviter++)
+	  nr += iov[ioviter].iov_len;
+	nr = IOBUFSIZE - nr;
+      } else
+	nr = -1;
+    } else
+      nr = (int) st_readv(in, iov, IOV_COUNT, ST_UTIME_NO_TIMEOUT);
+  } else {
+    if (testing & TESTING_READ_RESID) {
+      size_t resid = IOBUFSIZE;
+      if (st_read_resid(in, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0)
+	nr = IOBUFSIZE - resid;
+      else
+	nr = -1;
+    } else
+      nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT);
+  }
+  if (testing & TESTING_VERBOSE)
+    printf("got 0x%x(%d) E=%d\n", nr, nr, errno);
+
+  if (nr <= 0)
+    return 0;
+
+  if (testing & TESTING_WRITEV) {
+    for (nw = 0, ioviter = 0; nw < nr;
+     nw += iov[ioviter].iov_len, ioviter++) {
+      iov[ioviter].iov_base = &buf[nw];
+      iov[ioviter].iov_len = nr - nw;
+      if (iov[ioviter].iov_len > IOV_LEN)
+	iov[ioviter].iov_len = IOV_LEN;
+    }
+    if (testing & TESTING_VERBOSE) {
+      printf("writev(%p)...\n", out);
+      show_iov(iov, ioviter);
+    }
+    if (testing & TESTING_WRITE_RESID) {
+      struct iovec *riov = iov;
+      int riov_cnt = ioviter;
+      if (st_writev_resid(out, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) {
+	if (testing & TESTING_VERBOSE) {
+	  printf("resid\n");
+	  show_iov(riov, riov_cnt);
+	  printf("full\n");
+	  show_iov(iov, ioviter);
+	}
+	nw = 0;
+	while (--ioviter >= 0)
+	  nw += iov[ioviter].iov_len;
+	nw = nr - nw;
+      } else
+	nw = -1;
+    } else
+      nw = st_writev(out, iov, ioviter, ST_UTIME_NO_TIMEOUT);
+  } else {
+    if (testing & TESTING_WRITE_RESID) {
+      size_t resid = nr;
+      if (st_write_resid(out, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0)
+	nw = nr - resid;
+      else
+	nw = -1;
+    } else
+      nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT);
+  }
+  if (testing & TESTING_VERBOSE)
+    printf("put 0x%x(%d) E=%d\n", nw, nw, errno);
+
+  if (nw != nr)
+    return 0;
+
+  return 1;
+}
+#else /* DEBUG */
+/*
+ * This version is the simple one suitable for serious use.
+ */
+static int pass(st_netfd_t in, st_netfd_t out)
+{
+  char buf[IOBUFSIZE];
+  int nw, nr;
+
+  nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT);
+  if (nr <= 0)
+    return 0;
+
+  nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT);
+  if (nw != nr)
+    return 0;
+
+  return 1;
+}
+#endif
+
+static void *handle_request(void *arg)
+{
+  struct pollfd pds[2];
+  st_netfd_t cli_nfd, rmt_nfd;
+  int sock;
+
+  cli_nfd = (st_netfd_t) arg;
+  pds[0].fd = st_netfd_fileno(cli_nfd);
+  pds[0].events = POLLIN;
+
+  /* Connect to remote host */
+  if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
+    print_sys_error("socket");
+    goto done;
+  }
+  if ((rmt_nfd = st_netfd_open_socket(sock)) == NULL) {
+    print_sys_error("st_netfd_open_socket");
+    close(sock);
+    goto done;
+  }
+  if (st_connect(rmt_nfd, (struct sockaddr *)&rmt_addr,
+		 sizeof(rmt_addr), ST_UTIME_NO_TIMEOUT) < 0) {
+    print_sys_error("st_connect");
+    st_netfd_close(rmt_nfd);
+    goto done;
+  }
+  pds[1].fd = sock;
+  pds[1].events = POLLIN;
+
+  /*
+   * Now just pump the data through.
+   * XXX This should use one thread for each direction for true full-duplex.
+   */
+  for ( ; ; ) {
+    pds[0].revents = 0;
+    pds[1].revents = 0;
+
+    if (st_poll(pds, 2, ST_UTIME_NO_TIMEOUT) <= 0) {
+      print_sys_error("st_poll");
+      break;
+    }
+
+    if (pds[0].revents & POLLIN) {
+      if (!pass(cli_nfd, rmt_nfd))
+	break;
+    }
+
+    if (pds[1].revents & POLLIN) {
+      if (!pass(rmt_nfd, cli_nfd))
+	break;
+    }
+  }
+  st_netfd_close(rmt_nfd);
+
+done:
+
+  st_netfd_close(cli_nfd);
+
+  return NULL;
+}
+
+static void start_daemon(void)
+{
+  pid_t pid;
+
+  /* Start forking */
+  if ((pid = fork()) < 0) {
+    print_sys_error("fork");
+    exit(1);
+  }
+  if (pid > 0)
+    exit(0);                        /* parent */
+
+  /* First child process */
+  setsid();                         /* become session leader */
+
+  if ((pid = fork()) < 0) {
+    print_sys_error("fork");
+    exit(1);
+  }
+  if (pid > 0)                      /* first child */
+    exit(0);
+
+  chdir("/");
+  umask(022);
+}
+
+/*
+ * Create separate processes ("virtual processors"). Since it's just an
+ * example, there is no watchdog - the parent just exits leaving children
+ * on their own.
+ */
+static void set_concurrency(int nproc)
+{
+  pid_t pid;
+  int i;
+
+  if (nproc < 1)
+    nproc = 1;
+
+  for (i = 0; i < nproc; i++) {
+    if ((pid = fork()) < 0) {
+      print_sys_error("fork");
+      exit(1);
+    }
+    /* Child returns */
+    if (pid == 0)
+      return;
+  }
+
+  /* Parent just exits */
+  exit(0);
+}
+
+static int cpu_count(void)
+{
+  int n;
+
+#if defined (_SC_NPROCESSORS_ONLN)
+  n = (int) sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined (_SC_NPROC_ONLN)
+  n = (int) sysconf(_SC_NPROC_ONLN);
+#elif defined (HPUX)
+#include <sys/mpctl.h>
+  n = mpctl(MPC_GETNUMSPUS, 0, 0);
+#else
+  n = -1;
+  errno = ENOSYS;
+#endif
+
+  return n;
+}
+
+static void print_sys_error(const char *msg)
+{
+  fprintf(stderr, "%s: %s: %s\n", prog, msg, strerror(errno));
+}
+