正在显示
14 个修改的文件
包含
4801 行增加
和
0 行删除
trunk/research/st-1.9/Makefile
0 → 100644
| 1 | +# The contents of this file are subject to the Mozilla Public | ||
| 2 | +# License Version 1.1 (the "License"); you may not use this file | ||
| 3 | +# except in compliance with the License. You may obtain a copy of | ||
| 4 | +# the License at http://www.mozilla.org/MPL/ | ||
| 5 | +# | ||
| 6 | +# Software distributed under the License is distributed on an "AS | ||
| 7 | +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or | ||
| 8 | +# implied. See the License for the specific language governing | ||
| 9 | +# rights and limitations under the License. | ||
| 10 | +# | ||
| 11 | +# The Original Code is the Netscape Portable Runtime library. | ||
| 12 | +# | ||
| 13 | +# The Initial Developer of the Original Code is Netscape | ||
| 14 | +# Communications Corporation. Portions created by Netscape are | ||
| 15 | +# Copyright (C) 1994-2000 Netscape Communications Corporation. All | ||
| 16 | +# Rights Reserved. | ||
| 17 | +# | ||
| 18 | +# Contributor(s): Silicon Graphics, Inc. | ||
| 19 | +# | ||
| 20 | +# Portions created by SGI are Copyright (C) 2000-2001 Silicon | ||
| 21 | +# Graphics, Inc. All Rights Reserved. | ||
| 22 | +# | ||
| 23 | +# Alternatively, the contents of this file may be used under the | ||
| 24 | +# terms of the GNU General Public License Version 2 or later (the | ||
| 25 | +# "GPL"), in which case the provisions of the GPL are applicable | ||
| 26 | +# instead of those above. If you wish to allow use of your | ||
| 27 | +# version of this file only under the terms of the GPL and not to | ||
| 28 | +# allow others to use your version of this file under the MPL, | ||
| 29 | +# indicate your decision by deleting the provisions above and | ||
| 30 | +# replace them with the notice and other provisions required by | ||
| 31 | +# the GPL. If you do not delete the provisions above, a recipient | ||
| 32 | +# may use your version of this file under either the MPL or the | ||
| 33 | +# GPL. | ||
| 34 | + | ||
| 35 | +# This is the full version of the libst library - modify carefully | ||
| 36 | +VERSION = 1.9 | ||
| 37 | + | ||
| 38 | +########################## | ||
| 39 | +# Supported OSes: | ||
| 40 | +# | ||
| 41 | +#OS = AIX | ||
| 42 | +#OS = CYGWIN | ||
| 43 | +#OS = DARWIN | ||
| 44 | +#OS = FREEBSD | ||
| 45 | +#OS = HPUX | ||
| 46 | +#OS = HPUX_64 | ||
| 47 | +#OS = IRIX | ||
| 48 | +#OS = IRIX_64 | ||
| 49 | +#OS = LINUX | ||
| 50 | +#OS = NETBSD | ||
| 51 | +#OS = OPENBSD | ||
| 52 | +#OS = OSF1 | ||
| 53 | +#OS = SOLARIS | ||
| 54 | +#OS = SOLARIS_64 | ||
| 55 | + | ||
| 56 | +# Please see the "Other possible defines" section below for | ||
| 57 | +# possible compilation options. | ||
| 58 | +########################## | ||
| 59 | + | ||
| 60 | +CC = cc | ||
| 61 | +AR = ar | ||
| 62 | +LD = ld | ||
| 63 | +RANLIB = ranlib | ||
| 64 | +LN = ln | ||
| 65 | + | ||
| 66 | +SHELL = /bin/sh | ||
| 67 | +ECHO = /bin/echo | ||
| 68 | + | ||
| 69 | +BUILD = DBG | ||
| 70 | +TARGETDIR = $(OS)_$(shell uname -r)_$(BUILD) | ||
| 71 | + | ||
| 72 | +DEFINES = -D$(OS) | ||
| 73 | +CFLAGS = | ||
| 74 | +SFLAGS = | ||
| 75 | +ARFLAGS = -rv | ||
| 76 | +LNFLAGS = -s | ||
| 77 | +DSO_SUFFIX = so | ||
| 78 | + | ||
| 79 | +MAJOR = $(shell echo $(VERSION) | sed 's/^\([^\.]*\).*/\1/') | ||
| 80 | +DESC = st.pc | ||
| 81 | + | ||
| 82 | +########################## | ||
| 83 | +# Platform section. | ||
| 84 | +# Possible targets: | ||
| 85 | + | ||
| 86 | +TARGETS = aix-debug aix-optimized \ | ||
| 87 | + cygwin-debug cygwin-optimized \ | ||
| 88 | + darwin-debug darwin-optimized \ | ||
| 89 | + freebsd-debug freebsd-optimized \ | ||
| 90 | + hpux-debug hpux-optimized \ | ||
| 91 | + hpux-64-debug hpux-64-optimized \ | ||
| 92 | + irix-n32-debug irix-n32-optimized \ | ||
| 93 | + irix-64-debug irix-64-optimized \ | ||
| 94 | + linux-debug linux-optimized \ | ||
| 95 | + netbsd-debug netbsd-optimized \ | ||
| 96 | + openbsd-debug openbsd-optimized \ | ||
| 97 | + osf1-debug osf1-optimized \ | ||
| 98 | + solaris-debug solaris-optimized \ | ||
| 99 | + solaris-64-debug solaris-64-optimized | ||
| 100 | + | ||
| 101 | +# | ||
| 102 | +# Platform specifics | ||
| 103 | +# | ||
| 104 | + | ||
| 105 | +ifeq ($(OS), AIX) | ||
| 106 | +AIX_VERSION = $(shell uname -v).$(shell uname -r) | ||
| 107 | +TARGETDIR = $(OS)_$(AIX_VERSION)_$(BUILD) | ||
| 108 | +CC = xlC | ||
| 109 | +STATIC_ONLY = yes | ||
| 110 | +ifeq ($(BUILD), OPT) | ||
| 111 | +OTHER_FLAGS = -w | ||
| 112 | +endif | ||
| 113 | +ifneq ($(filter-out 4.1 4.2, $(AIX_VERSION)),) | ||
| 114 | +DEFINES += -DMD_HAVE_SOCKLEN_T | ||
| 115 | +endif | ||
| 116 | +endif | ||
| 117 | + | ||
| 118 | +ifeq ($(OS), CYGWIN) | ||
| 119 | +TARGETDIR = $(OS)_$(BUILD) | ||
| 120 | +CC = gcc | ||
| 121 | +LD = gcc | ||
| 122 | +DSO_SUFFIX = dll | ||
| 123 | +SLIBRARY = $(TARGETDIR)/libst.dll.a | ||
| 124 | +DLIBRARY = $(TARGETDIR)/libst.dll | ||
| 125 | +DEF_FILE = $(TARGETDIR)/libst.def | ||
| 126 | +LDFLAGS = libst.def -shared --enable-auto-image-base -Wl,--output-def,$(DEF_FILE),--out-implib,$(SLIBRARY) | ||
| 127 | +OTHER_FLAGS = -Wall | ||
| 128 | +endif | ||
| 129 | + | ||
| 130 | +ifeq ($(OS), DARWIN) | ||
| 131 | +LD = cc | ||
| 132 | +SFLAGS = -fPIC -fno-common | ||
| 133 | +DSO_SUFFIX = dylib | ||
| 134 | +RELEASE = $(shell uname -r | cut -d. -f1) | ||
| 135 | +PPC = $(shell test $(RELEASE) -le 9 && echo yes) | ||
| 136 | +INTEL = $(shell test $(RELEASE) -ge 9 && echo yes) | ||
| 137 | +ifeq ($(PPC), yes) | ||
| 138 | +CFLAGS += -arch ppc | ||
| 139 | +LDFLAGS += -arch ppc | ||
| 140 | +endif | ||
| 141 | +ifeq ($(INTEL), yes) | ||
| 142 | +CFLAGS += -arch i386 -arch x86_64 | ||
| 143 | +LDFLAGS += -arch i386 -arch x86_64 | ||
| 144 | +endif | ||
| 145 | +LDFLAGS += -dynamiclib -install_name /sw/lib/libst.$(MAJOR).$(DSO_SUFFIX) -compatibility_version $(MAJOR) -current_version $(VERSION) | ||
| 146 | +OTHER_FLAGS = -Wall | ||
| 147 | +endif | ||
| 148 | + | ||
| 149 | +ifeq ($(OS), FREEBSD) | ||
| 150 | +SFLAGS = -fPIC | ||
| 151 | +LDFLAGS = -shared -soname=$(SONAME) -lc | ||
| 152 | +OTHER_FLAGS = -Wall | ||
| 153 | +ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes) | ||
| 154 | +DEFINES += -DMD_HAVE_KQUEUE | ||
| 155 | +endif | ||
| 156 | +endif | ||
| 157 | + | ||
| 158 | +ifeq (HPUX, $(findstring HPUX, $(OS))) | ||
| 159 | +ifeq ($(OS), HPUX_64) | ||
| 160 | +DEFINES = -DHPUX | ||
| 161 | +CFLAGS = -Ae +DD64 +Z | ||
| 162 | +else | ||
| 163 | +CFLAGS = -Ae +DAportable +Z | ||
| 164 | +endif | ||
| 165 | +RANLIB = true | ||
| 166 | +LDFLAGS = -b | ||
| 167 | +DSO_SUFFIX = sl | ||
| 168 | +endif | ||
| 169 | + | ||
| 170 | +ifeq (IRIX, $(findstring IRIX, $(OS))) | ||
| 171 | +ifeq ($(OS), IRIX_64) | ||
| 172 | +DEFINES = -DIRIX | ||
| 173 | +ABIFLAG = -64 | ||
| 174 | +else | ||
| 175 | +ABIFLAG = -n32 | ||
| 176 | +endif | ||
| 177 | +RANLIB = true | ||
| 178 | +CFLAGS = $(ABIFLAG) -mips3 | ||
| 179 | +LDFLAGS = $(ABIFLAG) -shared | ||
| 180 | +OTHER_FLAGS = -fullwarn | ||
| 181 | +endif | ||
| 182 | + | ||
| 183 | +ifeq ($(OS), LINUX) | ||
| 184 | +EXTRA_OBJS = $(TARGETDIR)/md.o | ||
| 185 | +SFLAGS = -fPIC | ||
| 186 | +LDFLAGS = -shared -soname=$(SONAME) -lc | ||
| 187 | +OTHER_FLAGS = -Wall | ||
| 188 | +ifeq ($(shell test -f /usr/include/sys/epoll.h && echo yes), yes) | ||
| 189 | +DEFINES += -DMD_HAVE_EPOLL | ||
| 190 | +endif | ||
| 191 | +endif | ||
| 192 | + | ||
| 193 | +ifeq ($(OS), NETBSD) | ||
| 194 | +SFLAGS = -fPIC | ||
| 195 | +LDFLAGS = -shared -soname=$(SONAME) -lc | ||
| 196 | +OTHER_FLAGS = -Wall | ||
| 197 | +endif | ||
| 198 | + | ||
| 199 | +ifeq ($(OS), OPENBSD) | ||
| 200 | +SFLAGS = -fPIC | ||
| 201 | +LDFLAGS = -shared -soname=$(SONAME) -lc | ||
| 202 | +OTHER_FLAGS = -Wall | ||
| 203 | +ifeq ($(shell test -f /usr/include/sys/event.h && echo yes), yes) | ||
| 204 | +DEFINES += -DMD_HAVE_KQUEUE | ||
| 205 | +endif | ||
| 206 | +endif | ||
| 207 | + | ||
| 208 | +ifeq ($(OS), OSF1) | ||
| 209 | +RANLIB = true | ||
| 210 | +LDFLAGS = -shared -all -expect_unresolved "*" | ||
| 211 | +endif | ||
| 212 | + | ||
| 213 | +ifeq (SOLARIS, $(findstring SOLARIS, $(OS))) | ||
| 214 | +TARGETDIR = $(OS)_$(shell uname -r | sed 's/^5/2/')_$(BUILD) | ||
| 215 | +CC = gcc | ||
| 216 | +LD = gcc | ||
| 217 | +RANLIB = true | ||
| 218 | +LDFLAGS = -G | ||
| 219 | +OTHER_FLAGS = -Wall | ||
| 220 | +ifeq ($(OS), SOLARIS_64) | ||
| 221 | +DEFINES = -DSOLARIS | ||
| 222 | +CFLAGS += -m64 | ||
| 223 | +LDFLAGS += -m64 | ||
| 224 | +endif | ||
| 225 | +endif | ||
| 226 | + | ||
| 227 | +# | ||
| 228 | +# End of platform section. | ||
| 229 | +########################## | ||
| 230 | + | ||
| 231 | + | ||
| 232 | +ifeq ($(BUILD), OPT) | ||
| 233 | +OTHER_FLAGS += -O | ||
| 234 | +else | ||
| 235 | +OTHER_FLAGS += -g | ||
| 236 | +DEFINES += -DDEBUG | ||
| 237 | +endif | ||
| 238 | + | ||
| 239 | +########################## | ||
| 240 | +# Other possible defines: | ||
| 241 | +# To use poll(2) instead of select(2) for events checking: | ||
| 242 | +# DEFINES += -DUSE_POLL | ||
| 243 | +# You may prefer to use select for applications that have many threads | ||
| 244 | +# using one file descriptor, and poll for applications that have many | ||
| 245 | +# different file descriptors. With USE_POLL poll() is called with at | ||
| 246 | +# least one pollfd per I/O-blocked thread, so 1000 threads sharing one | ||
| 247 | +# descriptor will poll 1000 identical pollfds and select would be more | ||
| 248 | +# efficient. But if the threads all use different descriptors poll() | ||
| 249 | +# may be better depending on your operating system's implementation of | ||
| 250 | +# poll and select. Really, it's up to you. Oh, and on some platforms | ||
| 251 | +# poll() fails with more than a few dozen descriptors. | ||
| 252 | +# | ||
| 253 | +# Some platforms allow to define FD_SETSIZE (if select() is used), e.g.: | ||
| 254 | +# DEFINES += -DFD_SETSIZE=4096 | ||
| 255 | +# | ||
| 256 | +# To use malloc(3) instead of mmap(2) for stack allocation: | ||
| 257 | +# DEFINES += -DMALLOC_STACK | ||
| 258 | +# | ||
| 259 | +# To provision more than the default 16 thread-specific-data keys | ||
| 260 | +# (but not too many!): | ||
| 261 | +# DEFINES += -DST_KEYS_MAX=<n> | ||
| 262 | +# | ||
| 263 | +# To start with more than the default 64 initial pollfd slots | ||
| 264 | +# (but the table grows dynamically anyway): | ||
| 265 | +# DEFINES += -DST_MIN_POLLFDS_SIZE=<n> | ||
| 266 | +# | ||
| 267 | +# Note that you can also add these defines by specifying them as | ||
| 268 | +# make/gmake arguments (without editing this Makefile). For example: | ||
| 269 | +# | ||
| 270 | +# make EXTRA_CFLAGS=-DUSE_POLL <target> | ||
| 271 | +# | ||
| 272 | +# (replace make with gmake if needed). | ||
| 273 | +# | ||
| 274 | +# You can also modify the default selection of an alternative event | ||
| 275 | +# notification mechanism. E.g., to enable kqueue(2) support (if it's not | ||
| 276 | +# enabled by default): | ||
| 277 | +# | ||
| 278 | +# gmake EXTRA_CFLAGS=-DMD_HAVE_KQUEUE <target> | ||
| 279 | +# | ||
| 280 | +# or to disable default epoll(4) support: | ||
| 281 | +# | ||
| 282 | +# make EXTRA_CFLAGS=-UMD_HAVE_EPOLL <target> | ||
| 283 | +# | ||
| 284 | +########################## | ||
| 285 | + | ||
| 286 | +CFLAGS += $(DEFINES) $(OTHER_FLAGS) $(EXTRA_CFLAGS) | ||
| 287 | + | ||
| 288 | +OBJS = $(TARGETDIR)/sched.o \ | ||
| 289 | + $(TARGETDIR)/stk.o \ | ||
| 290 | + $(TARGETDIR)/sync.o \ | ||
| 291 | + $(TARGETDIR)/key.o \ | ||
| 292 | + $(TARGETDIR)/io.o \ | ||
| 293 | + $(TARGETDIR)/event.o | ||
| 294 | +OBJS += $(EXTRA_OBJS) | ||
| 295 | +HEADER = $(TARGETDIR)/st.h | ||
| 296 | +SLIBRARY = $(TARGETDIR)/libst.a | ||
| 297 | +DLIBRARY = $(TARGETDIR)/libst.$(DSO_SUFFIX).$(VERSION) | ||
| 298 | +EXAMPLES = examples | ||
| 299 | + | ||
| 300 | +LINKNAME = libst.$(DSO_SUFFIX) | ||
| 301 | +SONAME = libst.$(DSO_SUFFIX).$(MAJOR) | ||
| 302 | +FULLNAME = libst.$(DSO_SUFFIX).$(VERSION) | ||
| 303 | + | ||
| 304 | +ifeq ($(OS), CYGWIN) | ||
| 305 | +SONAME = cygst.$(DSO_SUFFIX) | ||
| 306 | +SLIBRARY = $(TARGETDIR)/libst.dll.a | ||
| 307 | +DLIBRARY = $(TARGETDIR)/$(SONAME) | ||
| 308 | +LINKNAME = | ||
| 309 | +# examples directory does not compile under cygwin | ||
| 310 | +EXAMPLES = | ||
| 311 | +endif | ||
| 312 | + | ||
| 313 | +ifeq ($(OS), DARWIN) | ||
| 314 | +LINKNAME = libst.$(DSO_SUFFIX) | ||
| 315 | +SONAME = libst.$(MAJOR).$(DSO_SUFFIX) | ||
| 316 | +FULLNAME = libst.$(VERSION).$(DSO_SUFFIX) | ||
| 317 | +endif | ||
| 318 | + | ||
| 319 | +ifeq ($(STATIC_ONLY), yes) | ||
| 320 | +LIBRARIES = $(SLIBRARY) | ||
| 321 | +else | ||
| 322 | +LIBRARIES = $(SLIBRARY) $(DLIBRARY) | ||
| 323 | +endif | ||
| 324 | + | ||
| 325 | +ifeq ($(OS),) | ||
| 326 | +ST_ALL = unknown | ||
| 327 | +else | ||
| 328 | +ST_ALL = $(TARGETDIR) $(LIBRARIES) $(HEADER) $(EXAMPLES) $(DESC) | ||
| 329 | +endif | ||
| 330 | + | ||
| 331 | +all: $(ST_ALL) | ||
| 332 | + | ||
| 333 | +unknown: | ||
| 334 | + @echo | ||
| 335 | + @echo "Please specify one of the following targets:" | ||
| 336 | + @echo | ||
| 337 | + @for target in $(TARGETS); do echo $$target; done | ||
| 338 | + @echo | ||
| 339 | + | ||
| 340 | +st.pc: st.pc.in | ||
| 341 | + sed "s/@VERSION@/${VERSION}/g" < $< > $@ | ||
| 342 | + | ||
| 343 | +$(TARGETDIR): | ||
| 344 | + if [ ! -d $(TARGETDIR) ]; then mkdir $(TARGETDIR); fi | ||
| 345 | + | ||
| 346 | +$(SLIBRARY): $(OBJS) | ||
| 347 | + $(AR) $(ARFLAGS) $@ $(OBJS) | ||
| 348 | + $(RANLIB) $@ | ||
| 349 | + rm -f obj; $(LN) $(LNFLAGS) $(TARGETDIR) obj | ||
| 350 | + | ||
| 351 | +$(DLIBRARY): $(OBJS:%.o=%-pic.o) | ||
| 352 | + $(LD) $(LDFLAGS) $^ -o $@ | ||
| 353 | + if test "$(LINKNAME)"; then \ | ||
| 354 | + cd $(TARGETDIR); \ | ||
| 355 | + rm -f $(SONAME) $(LINKNAME); \ | ||
| 356 | + $(LN) $(LNFLAGS) $(FULLNAME) $(SONAME); \ | ||
| 357 | + $(LN) $(LNFLAGS) $(FULLNAME) $(LINKNAME); \ | ||
| 358 | + fi | ||
| 359 | + | ||
| 360 | +$(HEADER): public.h | ||
| 361 | + rm -f $@ | ||
| 362 | + cp public.h $@ | ||
| 363 | + | ||
| 364 | +$(TARGETDIR)/md.o: md.S | ||
| 365 | + $(CC) $(CFLAGS) -c $< -o $@ | ||
| 366 | + | ||
| 367 | +$(TARGETDIR)/%.o: %.c common.h md.h | ||
| 368 | + $(CC) $(CFLAGS) -c $< -o $@ | ||
| 369 | + | ||
| 370 | +examples:: | ||
| 371 | + @echo Making $@ | ||
| 372 | + @cd $@; $(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)" OS="$(OS)" TARGETDIR="$(TARGETDIR)" | ||
| 373 | + | ||
| 374 | +clean: | ||
| 375 | + rm -rf *_OPT *_DBG obj st.pc | ||
| 376 | + | ||
| 377 | +########################## | ||
| 378 | +# Pattern rules: | ||
| 379 | + | ||
| 380 | +ifneq ($(SFLAGS),) | ||
| 381 | +# Compile with shared library options if it's a C file | ||
| 382 | +$(TARGETDIR)/%-pic.o: %.c common.h md.h | ||
| 383 | + $(CC) $(CFLAGS) $(SFLAGS) -c $< -o $@ | ||
| 384 | +endif | ||
| 385 | + | ||
| 386 | +# Compile assembly as normal or C as normal if no SFLAGS | ||
| 387 | +%-pic.o: %.o | ||
| 388 | + rm -f $@; $(LN) $(LNFLAGS) $(<F) $@ | ||
| 389 | + | ||
| 390 | +########################## | ||
| 391 | +# Target rules: | ||
| 392 | + | ||
| 393 | +default-debug: | ||
| 394 | + . ./osguess.sh; $(MAKE) OS="$$OS" BUILD="DBG" | ||
| 395 | +default default-optimized: | ||
| 396 | + . ./osguess.sh; $(MAKE) OS="$$OS" BUILD="OPT" | ||
| 397 | + | ||
| 398 | +aix-debug: | ||
| 399 | + $(MAKE) OS="AIX" BUILD="DBG" | ||
| 400 | +aix-optimized: | ||
| 401 | + $(MAKE) OS="AIX" BUILD="OPT" | ||
| 402 | + | ||
| 403 | +cygwin-debug: | ||
| 404 | + $(MAKE) OS="CYGWIN" BUILD="DBG" | ||
| 405 | +cygwin-optimized: | ||
| 406 | + $(MAKE) OS="CYGWIN" BUILD="OPT" | ||
| 407 | + | ||
| 408 | +darwin-debug: | ||
| 409 | + $(MAKE) OS="DARWIN" BUILD="DBG" | ||
| 410 | +darwin-optimized: | ||
| 411 | + $(MAKE) OS="DARWIN" BUILD="OPT" | ||
| 412 | + | ||
| 413 | +freebsd-debug: | ||
| 414 | + $(MAKE) OS="FREEBSD" BUILD="DBG" | ||
| 415 | +freebsd-optimized: | ||
| 416 | + $(MAKE) OS="FREEBSD" BUILD="OPT" | ||
| 417 | + | ||
| 418 | +hpux-debug: | ||
| 419 | + $(MAKE) OS="HPUX" BUILD="DBG" | ||
| 420 | +hpux-optimized: | ||
| 421 | + $(MAKE) OS="HPUX" BUILD="OPT" | ||
| 422 | +hpux-64-debug: | ||
| 423 | + $(MAKE) OS="HPUX_64" BUILD="DBG" | ||
| 424 | +hpux-64-optimized: | ||
| 425 | + $(MAKE) OS="HPUX_64" BUILD="OPT" | ||
| 426 | + | ||
| 427 | +irix-n32-debug: | ||
| 428 | + $(MAKE) OS="IRIX" BUILD="DBG" | ||
| 429 | +irix-n32-optimized: | ||
| 430 | + $(MAKE) OS="IRIX" BUILD="OPT" | ||
| 431 | +irix-64-debug: | ||
| 432 | + $(MAKE) OS="IRIX_64" BUILD="DBG" | ||
| 433 | +irix-64-optimized: | ||
| 434 | + $(MAKE) OS="IRIX_64" BUILD="OPT" | ||
| 435 | + | ||
| 436 | +linux-debug: | ||
| 437 | + $(MAKE) OS="LINUX" BUILD="DBG" | ||
| 438 | +linux-optimized: | ||
| 439 | + $(MAKE) OS="LINUX" BUILD="OPT" | ||
| 440 | +# compatibility | ||
| 441 | +linux-ia64-debug: linux-debug | ||
| 442 | +linux-ia64-optimized: linux-optimized | ||
| 443 | + | ||
| 444 | +netbsd-debug: | ||
| 445 | + $(MAKE) OS="NETBSD" BUILD="DBG" | ||
| 446 | +netbsd-optimized: | ||
| 447 | + $(MAKE) OS="NETBSD" BUILD="OPT" | ||
| 448 | + | ||
| 449 | +openbsd-debug: | ||
| 450 | + $(MAKE) OS="OPENBSD" BUILD="DBG" | ||
| 451 | +openbsd-optimized: | ||
| 452 | + $(MAKE) OS="OPENBSD" BUILD="OPT" | ||
| 453 | + | ||
| 454 | +osf1-debug: | ||
| 455 | + $(MAKE) OS="OSF1" BUILD="DBG" | ||
| 456 | +osf1-optimized: | ||
| 457 | + $(MAKE) OS="OSF1" BUILD="OPT" | ||
| 458 | + | ||
| 459 | +solaris-debug: | ||
| 460 | + $(MAKE) OS="SOLARIS" BUILD="DBG" | ||
| 461 | +solaris-optimized: | ||
| 462 | + $(MAKE) OS="SOLARIS" BUILD="OPT" | ||
| 463 | +solaris-64-debug: | ||
| 464 | + $(MAKE) OS="SOLARIS_64" BUILD="DBG" | ||
| 465 | +solaris-64-optimized: | ||
| 466 | + $(MAKE) OS="SOLARIS_64" BUILD="OPT" | ||
| 467 | + | ||
| 468 | +########################## | ||
| 469 | + |
trunk/research/st-1.9/README
0 → 100644
| 1 | +WELCOME! | ||
| 2 | + | ||
| 3 | +The State Threads Library is a small application library which provides | ||
| 4 | +a foundation for writing fast and highly scalable Internet applications | ||
| 5 | +(such as web servers, proxy servers, mail transfer agents, and so on, | ||
| 6 | +really any network-data-driven application) on UNIX-like platforms. It | ||
| 7 | +combines the simplicity of the multithreaded programming paradigm, in | ||
| 8 | +which one thread supports each simultaneous connection, with the | ||
| 9 | +performance and scalability of an event-driven state machine | ||
| 10 | +architecture. In other words, this library offers a threading API for | ||
| 11 | +structuring an Internet application as a state machine. For more | ||
| 12 | +details, please see the library documentation in the "docs" directory or | ||
| 13 | +on-line at | ||
| 14 | + | ||
| 15 | + http://state-threads.sourceforge.net/docs/ | ||
| 16 | + | ||
| 17 | +The State Threads Project is an open source project for maintaining and | ||
| 18 | +enhancing the State Threads Library. For more information about this | ||
| 19 | +project, please see | ||
| 20 | + | ||
| 21 | + http://state-threads.sourceforge.net/ | ||
| 22 | + | ||
| 23 | + | ||
| 24 | +BUILDING | ||
| 25 | + | ||
| 26 | +To build the library by hand, use the GNU make utility. Run the make | ||
| 27 | +command (e.g., `gmake') with no arguments to display all supported | ||
| 28 | +targets. | ||
| 29 | + | ||
| 30 | +To build more or less automatically, first set the CONFIG_GUESS_PATH | ||
| 31 | +variable in either osguess.sh or your environment then run "make | ||
| 32 | +default" which guesses your OS and builds. Requires the "config.guess" | ||
| 33 | +utility from GNU autoconf (not included with ST). You can use one from | ||
| 34 | +a larger "main" software project or just use any config.guess available | ||
| 35 | +on your system. You can also get it directly from GNU: | ||
| 36 | +ftp://ftp.gnu.org/gnu/autoconf/ | ||
| 37 | + | ||
| 38 | +To build rpms (RedHat Linux 6.2 or later, Linux/Mandrake, Solaris with | ||
| 39 | +gnome, etc.): | ||
| 40 | + download the latest st-x.y.tar.gz | ||
| 41 | + # rpm -ta st-x.y.tar.gz | ||
| 42 | +The .rpms will land in /usr/src/RPMS/<arch>. Install them with: | ||
| 43 | + # rpm -i libst*.rpm | ||
| 44 | +Requires GNU automake and rpm 3.0.3 or later. | ||
| 45 | + | ||
| 46 | +Debian users: | ||
| 47 | + If you run potato, please upgrade to woody. | ||
| 48 | + If you run woody, "apt-get install libst-dev" will get you v1.3. | ||
| 49 | + If you run testing/unstable, you will get the newest available version. | ||
| 50 | + If you *must* have the newest libst in woody, you may follow these | ||
| 51 | + not-recommended instructions: | ||
| 52 | + 1. Add "deb-src <your-favourite-debian-mirror> unstable main" to your | ||
| 53 | + /etc/apt/sources.list | ||
| 54 | + 2. apt-get update | ||
| 55 | + 3. apt-get source st | ||
| 56 | + 4. cd st-1.4 (or whatever version you got) | ||
| 57 | + 5. debuild | ||
| 58 | + 6. dpkg -i ../*.deb | ||
| 59 | + | ||
| 60 | +If your application uses autoconf to search for dependencies and you | ||
| 61 | +want to search for a given version of libst, you can simply add | ||
| 62 | + PKG_CHECK_MODULES(MYAPP, st >= 1.3 mumble >= 0.2.23) | ||
| 63 | +to your configure.ac/in. This will define @MYAPP_LIBS@ and | ||
| 64 | +@MYAPP_CFLAGS@ which you may then use in your Makefile.am/in files to | ||
| 65 | +link against mumble and st. | ||
| 66 | + | ||
| 67 | + | ||
| 68 | +LICENSE | ||
| 69 | + | ||
| 70 | +The State Threads library is a derivative of the Netscape Portable | ||
| 71 | +Runtime library (NSPR). All source code in this directory is | ||
| 72 | +distributed under the terms of the Mozilla Public License (MPL) version | ||
| 73 | +1.1 or the GNU General Public License (GPL) version 2 or later. For | ||
| 74 | +more information about these licenses please see | ||
| 75 | +http://www.mozilla.org/MPL/ and http://www.gnu.org/copyleft/. | ||
| 76 | + | ||
| 77 | +All source code in the "examples" directory is distributed under the BSD | ||
| 78 | +style license. | ||
| 79 | + | ||
| 80 | + | ||
| 81 | +PLATFORMS | ||
| 82 | + | ||
| 83 | +Please see the "docs/notes.html" file for the list of currently | ||
| 84 | +supported platforms. | ||
| 85 | + | ||
| 86 | + | ||
| 87 | +DEBUGGER SUPPORT | ||
| 88 | + | ||
| 89 | +It's almost impossible to print SP and PC in a portable way. The only | ||
| 90 | +way to see thread's stack platform-independently is to actually jump to | ||
| 91 | +the saved context. That's what the _st_iterate_threads() function does. | ||
| 92 | +Do the following to iterate over all threads: | ||
| 93 | + | ||
| 94 | +- set the _st_iterate_threads_flag to 1 in debugger | ||
| 95 | +- set breakpoint at the _st_show_thread_stack() function | ||
| 96 | + (which does nothing) | ||
| 97 | +- call the _st_iterate_threads() function which jumps to the | ||
| 98 | + next thread | ||
| 99 | +- at each break you can explore thread's stack | ||
| 100 | +- continue | ||
| 101 | +- when iteration is complete, you return to the original | ||
| 102 | + point (you can see thread id and a message as arguments of | ||
| 103 | + the _st_show_thread_stack() function). | ||
| 104 | + | ||
| 105 | +You can call _st_iterate_threads() in three ways: | ||
| 106 | + | ||
| 107 | +- Insert it into your source code at the point you want to | ||
| 108 | + go over threads. | ||
| 109 | +- Just run application and this function will be called at | ||
| 110 | + the first context switch. | ||
| 111 | +- Call it directly from the debugger at any point. | ||
| 112 | + | ||
| 113 | +This works with gdb and dbx. | ||
| 114 | + | ||
| 115 | +Example using gdb: | ||
| 116 | + | ||
| 117 | +(gdb) set _st_iterate_threads_flag = 1 | ||
| 118 | +(gdb) b _st_show_thread_stack | ||
| 119 | +... | ||
| 120 | +(gdb) call _st_iterate_threads() | ||
| 121 | +... | ||
| 122 | +(gdb) bt | ||
| 123 | +... | ||
| 124 | +(gdb) c | ||
| 125 | +... | ||
| 126 | +(gdb) bt | ||
| 127 | +... | ||
| 128 | +(gdb) c | ||
| 129 | +... | ||
| 130 | +and so on... | ||
| 131 | + | ||
| 132 | +_st_iterate_threads_flag will be set to 0 automatically | ||
| 133 | +after iteration is over or you can set it to 0 at any time | ||
| 134 | +to stop iteration. | ||
| 135 | + | ||
| 136 | +Sometimes gdb complains about SIGSEGV when you call a function | ||
| 137 | +directly at gdb command-line. It can be ignored -- just call the | ||
| 138 | +same function right away again, it works just fine. For example: | ||
| 139 | + | ||
| 140 | +(gdb) set _st_iterate_threads_flag = 1 | ||
| 141 | +(gdb) b _st_show_thread_stack | ||
| 142 | +Breakpoint 1 at 0x809bbbb: file sched.c, line 856. | ||
| 143 | +(gdb) call _st_iterate_threads() | ||
| 144 | +Program received signal SIGSEGV, Segmentation fault. | ||
| 145 | +.... | ||
| 146 | +(gdb) # just call the function again: | ||
| 147 | +(gdb) call _st_iterate_threads() | ||
| 148 | +Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2 | ||
| 149 | +"Iteration started") at sched.c:856 | ||
| 150 | +856 } | ||
| 151 | +.... | ||
| 152 | + | ||
| 153 | +You can use simple gdb command-line scripting to display | ||
| 154 | +all threads and their stack traces at once: | ||
| 155 | + | ||
| 156 | +(gdb) while _st_iterate_threads_flag | ||
| 157 | + >bt | ||
| 158 | + >c | ||
| 159 | + >end | ||
| 160 | +.... | ||
| 161 | + | ||
| 162 | +Another script to stop at the thread with the specific thread id | ||
| 163 | +(e.g., 0x40252ee4): | ||
| 164 | + | ||
| 165 | +(gdb) # set the flag again: | ||
| 166 | +(gdb) set _st_iterate_threads_flag = 1 | ||
| 167 | +(gdb) call _st_iterate_threads() | ||
| 168 | +Breakpoint 1, _st_show_thread_stack (thread=0x4017aee4, messg=0x80ae7a2 | ||
| 169 | +"Iteration started") at sched.c:856 | ||
| 170 | +856 } | ||
| 171 | +.... | ||
| 172 | +(gdb) while thread != 0x40252ee4 | ||
| 173 | + >c | ||
| 174 | + >end | ||
| 175 | +.... | ||
| 176 | +.... | ||
| 177 | +Breakpoint 1, _st_show_thread_stack (thread=0x40252ee4, messg=0x0) at | ||
| 178 | +sched.c:856 | ||
| 179 | +856 } | ||
| 180 | +(gdb) bt | ||
| 181 | +.... | ||
| 182 | +(gdb) # don't want to continue iteration, unset the flag: | ||
| 183 | +(gdb) set _st_iterate_threads_flag = 0 | ||
| 184 | +(gdb) c | ||
| 185 | +Continuing. | ||
| 186 | +Breakpoint 1, _st_show_thread_stack (thread=0x0, messg=0x80ae78e "Iteration | ||
| 187 | +completed") | ||
| 188 | + at sched.c:856 | ||
| 189 | +856 } | ||
| 190 | +(gdb) c | ||
| 191 | +Continuing. | ||
| 192 | +(gdb) return | ||
| 193 | +Make selected stack frame return now? (y or n) y | ||
| 194 | +#0 0x4011254e in __select () | ||
| 195 | + from /lib/libc.so.6 | ||
| 196 | +(gdb) detach | ||
| 197 | + | ||
| 198 | + | ||
| 199 | +CHANGE LOG | ||
| 200 | + | ||
| 201 | +Changes from 1.8 to 1.9. | ||
| 202 | +------------------------ | ||
| 203 | +o Support 32-bit and 64-bit Intel Macs. | ||
| 204 | + | ||
| 205 | +o Added ST_VERSION string, and ST_VERSION_MAJOR and ST_VERSION_MINOR | ||
| 206 | + [bug 1796801]. | ||
| 207 | + | ||
| 208 | +o Fixed some compiler warnings, based on a patch from Brian Wellington | ||
| 209 | + [bug 1932741]. | ||
| 210 | + | ||
| 211 | + | ||
| 212 | +Changes from 1.7 to 1.8. | ||
| 213 | +-------------------------- | ||
| 214 | +o Added support for kqueue and epoll on platforms that support them. | ||
| 215 | + Added ability to choose the event notification system at program | ||
| 216 | + startup. | ||
| 217 | + | ||
| 218 | +o Long-overdue public definitions of ST_UTIME_NO_TIMEOUT (-1ULL) and | ||
| 219 | + ST_UTIME_NO_WAIT (0) [bug 1514436]. | ||
| 220 | + | ||
| 221 | +o Documentation patch for st_utime() [bug 1514484]. | ||
| 222 | + | ||
| 223 | +o Documentation patch for st_timecache_set() [bug 1514486]. | ||
| 224 | + | ||
| 225 | +o Documentation patch for st_netfd_serialize_accept() [bug 1514494]. | ||
| 226 | + | ||
| 227 | +o Added st_writev_resid() [rfe 1538344]. | ||
| 228 | + | ||
| 229 | +o Added st_readv_resid() [rfe 1538768] and, for symmetry, st_readv(). | ||
| 230 | + | ||
| 231 | + | ||
| 232 | +Changes from 1.6 to 1.7. | ||
| 233 | +------------------------ | ||
| 234 | +o Support glibc 2.4, which breaks programs that manipulate jump buffers. | ||
| 235 | + Replaced Linux IA64 special cases with new md.S that covers all | ||
| 236 | + Linux. | ||
| 237 | + | ||
| 238 | + | ||
| 239 | +Changes from 1.5.2 to 1.6. | ||
| 240 | +-------------------------- | ||
| 241 | +none | ||
| 242 | + | ||
| 243 | + | ||
| 244 | +Changes from 1.5.1 to 1.5.2. | ||
| 245 | +---------------------------- | ||
| 246 | +o Alfred Perlstein's context switch callback feature. | ||
| 247 | + | ||
| 248 | +o Claus Assmann's st_recvmsg/st_sendmsg wrappers. | ||
| 249 | + | ||
| 250 | +o Extra stack padding for platforms that need it. | ||
| 251 | + | ||
| 252 | +o Ron Arts's timeout clarifications in the reference manual. | ||
| 253 | + | ||
| 254 | +o Raymond Bero and Anton Berezin's AMD64 FreeBSD port. | ||
| 255 | + | ||
| 256 | +o Claus Assmann's AMD64 SunOS 5.10 port. | ||
| 257 | + | ||
| 258 | +o Claus Assmann's AMD64 OpenBSD port. | ||
| 259 | + | ||
| 260 | +o Michael Abd-El-Malek's Mac OS X port. | ||
| 261 | + | ||
| 262 | +o Michael Abd-El-Malek's stack printing patch. | ||
| 263 | + | ||
| 264 | + | ||
| 265 | +Changes from 1.5.0 to 1.5.1. | ||
| 266 | +---------------------------- | ||
| 267 | +o Andreas Gustafsson's USE_POLL fix. | ||
| 268 | + | ||
| 269 | +o Gene's st_set_utime_function() enhancement. | ||
| 270 | + | ||
| 271 | + | ||
| 272 | +Changes from 1.4 to 1.5.0. | ||
| 273 | +-------------------------- | ||
| 274 | +o Andreas Gustafsson's performance patch. | ||
| 275 | + | ||
| 276 | +o New extensions: Improved DNS resolver, generic LRU cache, in-process | ||
| 277 | + DNS cache, and a program to test the resolver and cache. | ||
| 278 | + | ||
| 279 | +o Support for AMD Opteron 64-bit CPUs under Linux. | ||
| 280 | + | ||
| 281 | +o Support for SPARC-64 under Solaris. | ||
| 282 | + | ||
| 283 | +o Andreas Gustafsson's support for VAX under NetBSD. | ||
| 284 | + | ||
| 285 | +o Changed unportable #warning directives in md.h to #error. | ||
| 286 | + | ||
| 287 | + | ||
| 288 | +Changes from 1.3 to 1.4. | ||
| 289 | +------------------------ | ||
| 290 | +o Andreas Gustafsson's NetBSD port. | ||
| 291 | + | ||
| 292 | +o Wesley W. Terpstra's Darwin (MacOS X) port. | ||
| 293 | + | ||
| 294 | +o Support for many CPU architectures under Linux and *BSD. | ||
| 295 | + | ||
| 296 | +o Renamed private typedefs so they don't conflict with public ones any | ||
| 297 | + more. | ||
| 298 | + | ||
| 299 | +o common.h now includes public.h for strict prototyping. | ||
| 300 | + | ||
| 301 | +o Joshua Levy's recommendation to make st_connect() and st_sendto() | ||
| 302 | + accept const struct sockaddr pointers, as the originals do. | ||
| 303 | + | ||
| 304 | +o Clarified the documentation regarding blocking vs. non-blocking I/O. | ||
| 305 | + | ||
| 306 | +o Cygwin support. | ||
| 307 | + | ||
| 308 | +o Created the extensions directory. | ||
| 309 | + | ||
| 310 | +o Fixed warnings from ia64asm.S. | ||
| 311 | + | ||
| 312 | + | ||
| 313 | +Changes from 1.2 to 1.3. | ||
| 314 | +------------------------ | ||
| 315 | +o Added st_read_resid() and st_write_resid() to allow the caller to know | ||
| 316 | + how much data was transferred before an error occurred. Updated | ||
| 317 | + documentation. | ||
| 318 | + | ||
| 319 | +o Updated project link, copyrights, and documentation regarding | ||
| 320 | + timeouts. Added comment to st_connect(). | ||
| 321 | + | ||
| 322 | +o Optimized the _st_add_sleep_q() function in sched.c. Now we walk the | ||
| 323 | + sleep queue *backward* when inserting a thread into it. When you | ||
| 324 | + have lots (hundreds) of threads and several timeout values, it takes | ||
| 325 | + a while to insert a thread at the appropriate point in the sleep | ||
| 326 | + queue. The idea is that often this appropriate point is closer to | ||
| 327 | + the end of the queue rather than the beginning. Measurements show | ||
| 328 | + performance improves with this change. In any case this change | ||
| 329 | + should do no harm. | ||
| 330 | + | ||
| 331 | +o Added a hint of when to define USE_POLL and when not to, to the | ||
| 332 | + Makefile. | ||
| 333 | + | ||
| 334 | +o Added debugging support (files common.h and sched.c). See above. | ||
| 335 | + | ||
| 336 | +o Decreased the number of reallocations of _ST_POLLFDS in sched.c. | ||
| 337 | + Inspired by Lev Walkin. | ||
| 338 | + | ||
| 339 | +o Fixed st_usleep(-1) and st_sleep(-1), and added a warning to the | ||
| 340 | + documentation about too-large timeouts. | ||
| 341 | + | ||
| 342 | +o Linux/*BSD Alpha port. | ||
| 343 | + | ||
| 344 | +o Wesley W. Terpstra modernized the build process: | ||
| 345 | + - properly build relocatable libraries under bsd and linux | ||
| 346 | + - use library versioning | ||
| 347 | + - added rpm spec file | ||
| 348 | + - added debian/ files | ||
| 349 | + See above for build instructions. | ||
| 350 | + | ||
| 351 | + | ||
| 352 | +Changes from 1.1 to 1.2. | ||
| 353 | +------------------------ | ||
| 354 | +o Added st_randomize_stacks(). | ||
| 355 | + | ||
| 356 | +o Added a patch contributed by Sascha Schumann. | ||
| 357 | + | ||
| 358 | + | ||
| 359 | +Changes from 1.0 to 1.1. | ||
| 360 | +------------------------ | ||
| 361 | +o Relicensed under dual MPL-GPL. | ||
| 362 | + | ||
| 363 | +o OpenBSD port. | ||
| 364 | + | ||
| 365 | +o Compile-time option to use poll() instead of select() for | ||
| 366 | + event polling (see Makefile). | ||
| 367 | + This is useful if you want to support a large number of open | ||
| 368 | + file descriptors (larger than FD_SETSIZE) within a single | ||
| 369 | + process. | ||
| 370 | + | ||
| 371 | +o Linux IA-64 port. | ||
| 372 | + Two issues make IA-64 different from other platforms: | ||
| 373 | + | ||
| 374 | + - Besides the traditional call stack in memory, IA-64 uses the | ||
| 375 | + general register stack. Thus each thread needs a backing store | ||
| 376 | + for the register stack in addition to the memory stack. | ||
| 377 | + | ||
| 378 | + - Current implementation of setjmp()/longjmp() can not be used | ||
| 379 | + for thread context-switching since it assumes that only one | ||
| 380 | + register stack exists. Using special assembly functions for | ||
| 381 | + context-switching is unavoidable. | ||
| 382 | + | ||
| 383 | +o Thread stack capping on IRIX. | ||
| 384 | + This allows some profiling tools (such as SpeedShop) to know when | ||
| 385 | + to stop unwinding the stack. Without this libexc, used by SpeedShop, | ||
| 386 | + traces right off the stack and crashes. | ||
| 387 | + | ||
| 388 | +o Miscellaneous documentation additions. | ||
| 389 | + | ||
| 390 | + | ||
| 391 | +COPYRIGHTS | ||
| 392 | + | ||
| 393 | +Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 394 | +All Rights Reserved. |
trunk/research/st-1.9/common.h
0 → 100644
| 1 | +/* | ||
| 2 | + * The contents of this file are subject to the Mozilla Public | ||
| 3 | + * License Version 1.1 (the "License"); you may not use this file | ||
| 4 | + * except in compliance with the License. You may obtain a copy of | ||
| 5 | + * the License at http://www.mozilla.org/MPL/ | ||
| 6 | + * | ||
| 7 | + * Software distributed under the License is distributed on an "AS | ||
| 8 | + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or | ||
| 9 | + * implied. See the License for the specific language governing | ||
| 10 | + * rights and limitations under the License. | ||
| 11 | + * | ||
| 12 | + * The Original Code is the Netscape Portable Runtime library. | ||
| 13 | + * | ||
| 14 | + * The Initial Developer of the Original Code is Netscape | ||
| 15 | + * Communications Corporation. Portions created by Netscape are | ||
| 16 | + * Copyright (C) 1994-2000 Netscape Communications Corporation. All | ||
| 17 | + * Rights Reserved. | ||
| 18 | + * | ||
| 19 | + * Contributor(s): Silicon Graphics, Inc. | ||
| 20 | + * | ||
| 21 | + * Portions created by SGI are Copyright (C) 2000-2001 Silicon | ||
| 22 | + * Graphics, Inc. All Rights Reserved. | ||
| 23 | + * | ||
| 24 | + * Alternatively, the contents of this file may be used under the | ||
| 25 | + * terms of the GNU General Public License Version 2 or later (the | ||
| 26 | + * "GPL"), in which case the provisions of the GPL are applicable | ||
| 27 | + * instead of those above. If you wish to allow use of your | ||
| 28 | + * version of this file only under the terms of the GPL and not to | ||
| 29 | + * allow others to use your version of this file under the MPL, | ||
| 30 | + * indicate your decision by deleting the provisions above and | ||
| 31 | + * replace them with the notice and other provisions required by | ||
| 32 | + * the GPL. If you do not delete the provisions above, a recipient | ||
| 33 | + * may use your version of this file under either the MPL or the | ||
| 34 | + * GPL. | ||
| 35 | + */ | ||
| 36 | + | ||
| 37 | +/* | ||
| 38 | + * This file is derived directly from Netscape Communications Corporation, | ||
| 39 | + * and consists of extensive modifications made during the year(s) 1999-2000. | ||
| 40 | + */ | ||
| 41 | + | ||
| 42 | +#ifndef __ST_COMMON_H__ | ||
| 43 | +#define __ST_COMMON_H__ | ||
| 44 | + | ||
| 45 | +#include <stddef.h> | ||
| 46 | +#include <unistd.h> | ||
| 47 | +#include <sys/types.h> | ||
| 48 | +#include <sys/time.h> | ||
| 49 | +#include <setjmp.h> | ||
| 50 | + | ||
| 51 | +/* Enable assertions only if DEBUG is defined */ | ||
| 52 | +#ifndef DEBUG | ||
| 53 | +#define NDEBUG | ||
| 54 | +#endif | ||
| 55 | +#include <assert.h> | ||
| 56 | +#define ST_ASSERT(expr) assert(expr) | ||
| 57 | + | ||
| 58 | +#define ST_BEGIN_MACRO { | ||
| 59 | +#define ST_END_MACRO } | ||
| 60 | + | ||
| 61 | +#ifdef DEBUG | ||
| 62 | +#define ST_HIDDEN /*nothing*/ | ||
| 63 | +#else | ||
| 64 | +#define ST_HIDDEN static | ||
| 65 | +#endif | ||
| 66 | + | ||
| 67 | +#include "public.h" | ||
| 68 | +#include "md.h" | ||
| 69 | + | ||
| 70 | + | ||
| 71 | +/***************************************** | ||
| 72 | + * Circular linked list definitions | ||
| 73 | + */ | ||
| 74 | + | ||
| 75 | +typedef struct _st_clist { | ||
| 76 | + struct _st_clist *next; | ||
| 77 | + struct _st_clist *prev; | ||
| 78 | +} _st_clist_t; | ||
| 79 | + | ||
| 80 | +/* Insert element "_e" into the list, before "_l" */ | ||
| 81 | +#define ST_INSERT_BEFORE(_e,_l) \ | ||
| 82 | + ST_BEGIN_MACRO \ | ||
| 83 | + (_e)->next = (_l); \ | ||
| 84 | + (_e)->prev = (_l)->prev; \ | ||
| 85 | + (_l)->prev->next = (_e); \ | ||
| 86 | + (_l)->prev = (_e); \ | ||
| 87 | + ST_END_MACRO | ||
| 88 | + | ||
| 89 | +/* Insert element "_e" into the list, after "_l" */ | ||
| 90 | +#define ST_INSERT_AFTER(_e,_l) \ | ||
| 91 | + ST_BEGIN_MACRO \ | ||
| 92 | + (_e)->next = (_l)->next; \ | ||
| 93 | + (_e)->prev = (_l); \ | ||
| 94 | + (_l)->next->prev = (_e); \ | ||
| 95 | + (_l)->next = (_e); \ | ||
| 96 | + ST_END_MACRO | ||
| 97 | + | ||
| 98 | +/* Return the element following element "_e" */ | ||
| 99 | +#define ST_NEXT_LINK(_e) ((_e)->next) | ||
| 100 | + | ||
| 101 | +/* Append an element "_e" to the end of the list "_l" */ | ||
| 102 | +#define ST_APPEND_LINK(_e,_l) ST_INSERT_BEFORE(_e,_l) | ||
| 103 | + | ||
| 104 | +/* Insert an element "_e" at the head of the list "_l" */ | ||
| 105 | +#define ST_INSERT_LINK(_e,_l) ST_INSERT_AFTER(_e,_l) | ||
| 106 | + | ||
| 107 | +/* Return the head/tail of the list */ | ||
| 108 | +#define ST_LIST_HEAD(_l) (_l)->next | ||
| 109 | +#define ST_LIST_TAIL(_l) (_l)->prev | ||
| 110 | + | ||
| 111 | +/* Remove the element "_e" from it's circular list */ | ||
| 112 | +#define ST_REMOVE_LINK(_e) \ | ||
| 113 | + ST_BEGIN_MACRO \ | ||
| 114 | + (_e)->prev->next = (_e)->next; \ | ||
| 115 | + (_e)->next->prev = (_e)->prev; \ | ||
| 116 | + ST_END_MACRO | ||
| 117 | + | ||
| 118 | +/* Return non-zero if the given circular list "_l" is empty, */ | ||
| 119 | +/* zero if the circular list is not empty */ | ||
| 120 | +#define ST_CLIST_IS_EMPTY(_l) \ | ||
| 121 | + ((_l)->next == (_l)) | ||
| 122 | + | ||
| 123 | +/* Initialize a circular list */ | ||
| 124 | +#define ST_INIT_CLIST(_l) \ | ||
| 125 | + ST_BEGIN_MACRO \ | ||
| 126 | + (_l)->next = (_l); \ | ||
| 127 | + (_l)->prev = (_l); \ | ||
| 128 | + ST_END_MACRO | ||
| 129 | + | ||
| 130 | +#define ST_INIT_STATIC_CLIST(_l) \ | ||
| 131 | + {(_l), (_l)} | ||
| 132 | + | ||
| 133 | + | ||
| 134 | +/***************************************** | ||
| 135 | + * Basic types definitions | ||
| 136 | + */ | ||
| 137 | + | ||
| 138 | +typedef void (*_st_destructor_t)(void *); | ||
| 139 | + | ||
| 140 | + | ||
| 141 | +typedef struct _st_stack { | ||
| 142 | + _st_clist_t links; | ||
| 143 | + char *vaddr; /* Base of stack's allocated memory */ | ||
| 144 | + int vaddr_size; /* Size of stack's allocated memory */ | ||
| 145 | + int stk_size; /* Size of usable portion of the stack */ | ||
| 146 | + char *stk_bottom; /* Lowest address of stack's usable portion */ | ||
| 147 | + char *stk_top; /* Highest address of stack's usable portion */ | ||
| 148 | + void *sp; /* Stack pointer from C's point of view */ | ||
| 149 | +#ifdef __ia64__ | ||
| 150 | + void *bsp; /* Register stack backing store pointer */ | ||
| 151 | +#endif | ||
| 152 | +} _st_stack_t; | ||
| 153 | + | ||
| 154 | + | ||
| 155 | +typedef struct _st_cond { | ||
| 156 | + _st_clist_t wait_q; /* Condition variable wait queue */ | ||
| 157 | +} _st_cond_t; | ||
| 158 | + | ||
| 159 | + | ||
| 160 | +typedef struct _st_thread _st_thread_t; | ||
| 161 | + | ||
| 162 | +struct _st_thread { | ||
| 163 | + int state; /* Thread's state */ | ||
| 164 | + int flags; /* Thread's flags */ | ||
| 165 | + | ||
| 166 | + void *(*start)(void *arg); /* The start function of the thread */ | ||
| 167 | + void *arg; /* Argument of the start function */ | ||
| 168 | + void *retval; /* Return value of the start function */ | ||
| 169 | + | ||
| 170 | + _st_stack_t *stack; /* Info about thread's stack */ | ||
| 171 | + | ||
| 172 | + _st_clist_t links; /* For putting on run/sleep/zombie queue */ | ||
| 173 | + _st_clist_t wait_links; /* For putting on mutex/condvar wait queue */ | ||
| 174 | +#ifdef DEBUG | ||
| 175 | + _st_clist_t tlink; /* For putting on thread queue */ | ||
| 176 | +#endif | ||
| 177 | + | ||
| 178 | + st_utime_t due; /* Wakeup time when thread is sleeping */ | ||
| 179 | + _st_thread_t *left; /* For putting in timeout heap */ | ||
| 180 | + _st_thread_t *right; /* -- see docs/timeout_heap.txt for details */ | ||
| 181 | + int heap_index; | ||
| 182 | + | ||
| 183 | + void **private_data; /* Per thread private data */ | ||
| 184 | + | ||
| 185 | + _st_cond_t *term; /* Termination condition variable for join */ | ||
| 186 | + | ||
| 187 | + jmp_buf context; /* Thread's context */ | ||
| 188 | +}; | ||
| 189 | + | ||
| 190 | + | ||
| 191 | +typedef struct _st_mutex { | ||
| 192 | + _st_thread_t *owner; /* Current mutex owner */ | ||
| 193 | + _st_clist_t wait_q; /* Mutex wait queue */ | ||
| 194 | +} _st_mutex_t; | ||
| 195 | + | ||
| 196 | + | ||
| 197 | +typedef struct _st_pollq { | ||
| 198 | + _st_clist_t links; /* For putting on io queue */ | ||
| 199 | + _st_thread_t *thread; /* Polling thread */ | ||
| 200 | + struct pollfd *pds; /* Array of poll descriptors */ | ||
| 201 | + int npds; /* Length of the array */ | ||
| 202 | + int on_ioq; /* Is it on ioq? */ | ||
| 203 | +} _st_pollq_t; | ||
| 204 | + | ||
| 205 | + | ||
| 206 | +typedef struct _st_eventsys_ops { | ||
| 207 | + const char *name; /* Name of this event system */ | ||
| 208 | + int val; /* Type of this event system */ | ||
| 209 | + int (*init)(void); /* Initialization */ | ||
| 210 | + void (*dispatch)(void); /* Dispatch function */ | ||
| 211 | + int (*pollset_add)(struct pollfd *, int); /* Add descriptor set */ | ||
| 212 | + void (*pollset_del)(struct pollfd *, int); /* Delete descriptor set */ | ||
| 213 | + int (*fd_new)(int); /* New descriptor allocated */ | ||
| 214 | + int (*fd_close)(int); /* Descriptor closed */ | ||
| 215 | + int (*fd_getlimit)(void); /* Descriptor hard limit */ | ||
| 216 | +} _st_eventsys_t; | ||
| 217 | + | ||
| 218 | + | ||
| 219 | +typedef struct _st_vp { | ||
| 220 | + _st_thread_t *idle_thread; /* Idle thread for this vp */ | ||
| 221 | + st_utime_t last_clock; /* The last time we went into vp_check_clock() */ | ||
| 222 | + | ||
| 223 | + _st_clist_t run_q; /* run queue for this vp */ | ||
| 224 | + _st_clist_t io_q; /* io queue for this vp */ | ||
| 225 | + _st_clist_t zombie_q; /* zombie queue for this vp */ | ||
| 226 | +#ifdef DEBUG | ||
| 227 | + _st_clist_t thread_q; /* all threads of this vp */ | ||
| 228 | +#endif | ||
| 229 | + int pagesize; | ||
| 230 | + | ||
| 231 | + _st_thread_t *sleep_q; /* sleep queue for this vp */ | ||
| 232 | + int sleepq_size; /* number of threads on sleep queue */ | ||
| 233 | + | ||
| 234 | +#ifdef ST_SWITCH_CB | ||
| 235 | + st_switch_cb_t switch_out_cb; /* called when a thread is switched out */ | ||
| 236 | + st_switch_cb_t switch_in_cb; /* called when a thread is switched in */ | ||
| 237 | +#endif | ||
| 238 | +} _st_vp_t; | ||
| 239 | + | ||
| 240 | + | ||
| 241 | +typedef struct _st_netfd { | ||
| 242 | + int osfd; /* Underlying OS file descriptor */ | ||
| 243 | + int inuse; /* In-use flag */ | ||
| 244 | + void *private_data; /* Per descriptor private data */ | ||
| 245 | + _st_destructor_t destructor; /* Private data destructor function */ | ||
| 246 | + void *aux_data; /* Auxiliary data for internal use */ | ||
| 247 | + struct _st_netfd *next; /* For putting on the free list */ | ||
| 248 | +} _st_netfd_t; | ||
| 249 | + | ||
| 250 | + | ||
| 251 | +/***************************************** | ||
| 252 | + * Current vp, thread, and event system | ||
| 253 | + */ | ||
| 254 | + | ||
| 255 | +extern _st_vp_t _st_this_vp; | ||
| 256 | +extern _st_thread_t *_st_this_thread; | ||
| 257 | +extern _st_eventsys_t *_st_eventsys; | ||
| 258 | + | ||
| 259 | +#define _ST_CURRENT_THREAD() (_st_this_thread) | ||
| 260 | +#define _ST_SET_CURRENT_THREAD(_thread) (_st_this_thread = (_thread)) | ||
| 261 | + | ||
| 262 | +#define _ST_LAST_CLOCK (_st_this_vp.last_clock) | ||
| 263 | + | ||
| 264 | +#define _ST_RUNQ (_st_this_vp.run_q) | ||
| 265 | +#define _ST_IOQ (_st_this_vp.io_q) | ||
| 266 | +#define _ST_ZOMBIEQ (_st_this_vp.zombie_q) | ||
| 267 | +#ifdef DEBUG | ||
| 268 | +#define _ST_THREADQ (_st_this_vp.thread_q) | ||
| 269 | +#endif | ||
| 270 | + | ||
| 271 | +#define _ST_PAGE_SIZE (_st_this_vp.pagesize) | ||
| 272 | + | ||
| 273 | +#define _ST_SLEEPQ (_st_this_vp.sleep_q) | ||
| 274 | +#define _ST_SLEEPQ_SIZE (_st_this_vp.sleepq_size) | ||
| 275 | + | ||
| 276 | +#define _ST_VP_IDLE() (*_st_eventsys->dispatch)() | ||
| 277 | + | ||
| 278 | + | ||
| 279 | +/***************************************** | ||
| 280 | + * vp queues operations | ||
| 281 | + */ | ||
| 282 | + | ||
| 283 | +#define _ST_ADD_IOQ(_pq) ST_APPEND_LINK(&_pq.links, &_ST_IOQ) | ||
| 284 | +#define _ST_DEL_IOQ(_pq) ST_REMOVE_LINK(&_pq.links) | ||
| 285 | + | ||
| 286 | +#define _ST_ADD_RUNQ(_thr) ST_APPEND_LINK(&(_thr)->links, &_ST_RUNQ) | ||
| 287 | +#define _ST_DEL_RUNQ(_thr) ST_REMOVE_LINK(&(_thr)->links) | ||
| 288 | + | ||
| 289 | +#define _ST_ADD_SLEEPQ(_thr, _timeout) _st_add_sleep_q(_thr, _timeout) | ||
| 290 | +#define _ST_DEL_SLEEPQ(_thr) _st_del_sleep_q(_thr) | ||
| 291 | + | ||
| 292 | +#define _ST_ADD_ZOMBIEQ(_thr) ST_APPEND_LINK(&(_thr)->links, &_ST_ZOMBIEQ) | ||
| 293 | +#define _ST_DEL_ZOMBIEQ(_thr) ST_REMOVE_LINK(&(_thr)->links) | ||
| 294 | + | ||
| 295 | +#ifdef DEBUG | ||
| 296 | +#define _ST_ADD_THREADQ(_thr) ST_APPEND_LINK(&(_thr)->tlink, &_ST_THREADQ) | ||
| 297 | +#define _ST_DEL_THREADQ(_thr) ST_REMOVE_LINK(&(_thr)->tlink) | ||
| 298 | +#endif | ||
| 299 | + | ||
| 300 | + | ||
| 301 | +/***************************************** | ||
| 302 | + * Thread states and flags | ||
| 303 | + */ | ||
| 304 | + | ||
| 305 | +#define _ST_ST_RUNNING 0 | ||
| 306 | +#define _ST_ST_RUNNABLE 1 | ||
| 307 | +#define _ST_ST_IO_WAIT 2 | ||
| 308 | +#define _ST_ST_LOCK_WAIT 3 | ||
| 309 | +#define _ST_ST_COND_WAIT 4 | ||
| 310 | +#define _ST_ST_SLEEPING 5 | ||
| 311 | +#define _ST_ST_ZOMBIE 6 | ||
| 312 | +#define _ST_ST_SUSPENDED 7 | ||
| 313 | + | ||
| 314 | +#define _ST_FL_PRIMORDIAL 0x01 | ||
| 315 | +#define _ST_FL_IDLE_THREAD 0x02 | ||
| 316 | +#define _ST_FL_ON_SLEEPQ 0x04 | ||
| 317 | +#define _ST_FL_INTERRUPT 0x08 | ||
| 318 | +#define _ST_FL_TIMEDOUT 0x10 | ||
| 319 | + | ||
| 320 | + | ||
| 321 | +/***************************************** | ||
| 322 | + * Pointer conversion | ||
| 323 | + */ | ||
| 324 | + | ||
| 325 | +#ifndef offsetof | ||
| 326 | +#define offsetof(type, identifier) ((size_t)&(((type *)0)->identifier)) | ||
| 327 | +#endif | ||
| 328 | + | ||
| 329 | +#define _ST_THREAD_PTR(_qp) \ | ||
| 330 | + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, links))) | ||
| 331 | + | ||
| 332 | +#define _ST_THREAD_WAITQ_PTR(_qp) \ | ||
| 333 | + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, wait_links))) | ||
| 334 | + | ||
| 335 | +#define _ST_THREAD_STACK_PTR(_qp) \ | ||
| 336 | + ((_st_stack_t *)((char*)(_qp) - offsetof(_st_stack_t, links))) | ||
| 337 | + | ||
| 338 | +#define _ST_POLLQUEUE_PTR(_qp) \ | ||
| 339 | + ((_st_pollq_t *)((char *)(_qp) - offsetof(_st_pollq_t, links))) | ||
| 340 | + | ||
| 341 | +#ifdef DEBUG | ||
| 342 | +#define _ST_THREAD_THREADQ_PTR(_qp) \ | ||
| 343 | + ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, tlink))) | ||
| 344 | +#endif | ||
| 345 | + | ||
| 346 | + | ||
| 347 | +/***************************************** | ||
| 348 | + * Constants | ||
| 349 | + */ | ||
| 350 | + | ||
| 351 | +#ifndef ST_UTIME_NO_TIMEOUT | ||
| 352 | +#define ST_UTIME_NO_TIMEOUT ((st_utime_t) -1LL) | ||
| 353 | +#endif | ||
| 354 | + | ||
| 355 | +#ifndef __ia64__ | ||
| 356 | +#define ST_DEFAULT_STACK_SIZE (64*1024) | ||
| 357 | +#else | ||
| 358 | +#define ST_DEFAULT_STACK_SIZE (128*1024) /* Includes register stack size */ | ||
| 359 | +#endif | ||
| 360 | + | ||
| 361 | +#ifndef ST_KEYS_MAX | ||
| 362 | +#define ST_KEYS_MAX 16 | ||
| 363 | +#endif | ||
| 364 | + | ||
| 365 | +#ifndef ST_MIN_POLLFDS_SIZE | ||
| 366 | +#define ST_MIN_POLLFDS_SIZE 64 | ||
| 367 | +#endif | ||
| 368 | + | ||
| 369 | + | ||
| 370 | +/***************************************** | ||
| 371 | + * Threads context switching | ||
| 372 | + */ | ||
| 373 | + | ||
| 374 | +#ifdef DEBUG | ||
| 375 | +void _st_iterate_threads(void); | ||
| 376 | +#define ST_DEBUG_ITERATE_THREADS() _st_iterate_threads() | ||
| 377 | +#else | ||
| 378 | +#define ST_DEBUG_ITERATE_THREADS() | ||
| 379 | +#endif | ||
| 380 | + | ||
| 381 | +#ifdef ST_SWITCH_CB | ||
| 382 | +#define ST_SWITCH_OUT_CB(_thread) \ | ||
| 383 | + if (_st_this_vp.switch_out_cb != NULL && \ | ||
| 384 | + _thread != _st_this_vp.idle_thread && \ | ||
| 385 | + _thread->state != _ST_ST_ZOMBIE) { \ | ||
| 386 | + _st_this_vp.switch_out_cb(); \ | ||
| 387 | + } | ||
| 388 | +#define ST_SWITCH_IN_CB(_thread) \ | ||
| 389 | + if (_st_this_vp.switch_in_cb != NULL && \ | ||
| 390 | + _thread != _st_this_vp.idle_thread && \ | ||
| 391 | + _thread->state != _ST_ST_ZOMBIE) { \ | ||
| 392 | + _st_this_vp.switch_in_cb(); \ | ||
| 393 | + } | ||
| 394 | +#else | ||
| 395 | +#define ST_SWITCH_OUT_CB(_thread) | ||
| 396 | +#define ST_SWITCH_IN_CB(_thread) | ||
| 397 | +#endif | ||
| 398 | + | ||
| 399 | +/* | ||
| 400 | + * Switch away from the current thread context by saving its state and | ||
| 401 | + * calling the thread scheduler | ||
| 402 | + */ | ||
| 403 | +#define _ST_SWITCH_CONTEXT(_thread) \ | ||
| 404 | + ST_BEGIN_MACRO \ | ||
| 405 | + ST_SWITCH_OUT_CB(_thread); \ | ||
| 406 | + if (!MD_SETJMP((_thread)->context)) { \ | ||
| 407 | + _st_vp_schedule(); \ | ||
| 408 | + } \ | ||
| 409 | + ST_DEBUG_ITERATE_THREADS(); \ | ||
| 410 | + ST_SWITCH_IN_CB(_thread); \ | ||
| 411 | + ST_END_MACRO | ||
| 412 | + | ||
| 413 | +/* | ||
| 414 | + * Restore a thread context that was saved by _ST_SWITCH_CONTEXT or | ||
| 415 | + * initialized by _ST_INIT_CONTEXT | ||
| 416 | + */ | ||
| 417 | +#define _ST_RESTORE_CONTEXT(_thread) \ | ||
| 418 | + ST_BEGIN_MACRO \ | ||
| 419 | + _ST_SET_CURRENT_THREAD(_thread); \ | ||
| 420 | + MD_LONGJMP((_thread)->context, 1); \ | ||
| 421 | + ST_END_MACRO | ||
| 422 | + | ||
| 423 | +/* | ||
| 424 | + * Initialize the thread context preparing it to execute _main | ||
| 425 | + */ | ||
| 426 | +#ifdef MD_INIT_CONTEXT | ||
| 427 | +#define _ST_INIT_CONTEXT MD_INIT_CONTEXT | ||
| 428 | +#else | ||
| 429 | +#error Unknown OS | ||
| 430 | +#endif | ||
| 431 | + | ||
| 432 | +/* | ||
| 433 | + * Number of bytes reserved under the stack "bottom" | ||
| 434 | + */ | ||
| 435 | +#define _ST_STACK_PAD_SIZE MD_STACK_PAD_SIZE | ||
| 436 | + | ||
| 437 | + | ||
| 438 | +/***************************************** | ||
| 439 | + * Forward declarations | ||
| 440 | + */ | ||
| 441 | + | ||
| 442 | +void _st_vp_schedule(void); | ||
| 443 | +void _st_vp_check_clock(void); | ||
| 444 | +void *_st_idle_thread_start(void *arg); | ||
| 445 | +void _st_thread_main(void); | ||
| 446 | +void _st_thread_cleanup(_st_thread_t *thread); | ||
| 447 | +void _st_add_sleep_q(_st_thread_t *thread, st_utime_t timeout); | ||
| 448 | +void _st_del_sleep_q(_st_thread_t *thread); | ||
| 449 | +_st_stack_t *_st_stack_new(int stack_size); | ||
| 450 | +void _st_stack_free(_st_stack_t *ts); | ||
| 451 | +int _st_io_init(void); | ||
| 452 | + | ||
| 453 | +st_utime_t st_utime(void); | ||
| 454 | +_st_cond_t *st_cond_new(void); | ||
| 455 | +int st_cond_destroy(_st_cond_t *cvar); | ||
| 456 | +int st_cond_timedwait(_st_cond_t *cvar, st_utime_t timeout); | ||
| 457 | +int st_cond_signal(_st_cond_t *cvar); | ||
| 458 | +ssize_t st_read(_st_netfd_t *fd, void *buf, size_t nbyte, st_utime_t timeout); | ||
| 459 | +ssize_t st_write(_st_netfd_t *fd, const void *buf, size_t nbyte, | ||
| 460 | + st_utime_t timeout); | ||
| 461 | +int st_poll(struct pollfd *pds, int npds, st_utime_t timeout); | ||
| 462 | +_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, | ||
| 463 | + int joinable, int stk_size); | ||
| 464 | + | ||
| 465 | +#endif /* !__ST_COMMON_H__ */ | ||
| 466 | + |
trunk/research/st-1.9/docs/fig.gif
0 → 100644
5.2 KB
trunk/research/st-1.9/docs/notes.html
0 → 100644
| 1 | +<HTML> | ||
| 2 | +<HEAD> | ||
| 3 | +<TITLE>State Threads Library Programming Notes</TITLE> | ||
| 4 | +</HEAD> | ||
| 5 | +<BODY BGCOLOR=#FFFFFF> | ||
| 6 | +<H2>Programming Notes</H2> | ||
| 7 | +<P> | ||
| 8 | +<B> | ||
| 9 | +<UL> | ||
| 10 | +<LI><A HREF=#porting>Porting</A></LI> | ||
| 11 | +<LI><A HREF=#signals>Signals</A></LI> | ||
| 12 | +<LI><A HREF=#intra>Intra-Process Synchronization</A></LI> | ||
| 13 | +<LI><A HREF=#inter>Inter-Process Synchronization</A></LI> | ||
| 14 | +<LI><A HREF=#nonnet>Non-Network I/O</A></LI> | ||
| 15 | +<LI><A HREF=#timeouts>Timeouts</A></LI> | ||
| 16 | +</UL> | ||
| 17 | +</B> | ||
| 18 | +<P> | ||
| 19 | +<HR> | ||
| 20 | +<P> | ||
| 21 | +<A NAME="porting"> | ||
| 22 | +<H3>Porting</H3> | ||
| 23 | +The State Threads library uses OS concepts that are available in some | ||
| 24 | +form on most UNIX platforms, making the library very portable across | ||
| 25 | +many flavors of UNIX. However, there are several parts of the library | ||
| 26 | +that rely on platform-specific features. Here is the list of such parts: | ||
| 27 | +<P> | ||
| 28 | +<UL> | ||
| 29 | +<LI><I>Thread context initialization</I>: Two ingredients of the | ||
| 30 | +<TT>jmp_buf</TT> | ||
| 31 | +data structure (the program counter and the stack pointer) have to be | ||
| 32 | +manually set in the thread creation routine. The <TT>jmp_buf</TT> data | ||
| 33 | +structure is defined in the <TT>setjmp.h</TT> header file and differs from | ||
| 34 | +platform to platform. Usually the program counter is a structure member | ||
| 35 | +with <TT>PC</TT> in the name and the stack pointer is a structure member | ||
| 36 | +with <TT>SP</TT> in the name. One can also look in the | ||
| 37 | +<A HREF="http://www.mozilla.org/source.html">Netscape's NSPR library source</A> | ||
| 38 | +which already has this code for many UNIX-like platforms | ||
| 39 | +(<TT>mozilla/nsprpub/pr/include/md/*.h</TT> files). | ||
| 40 | +<P> | ||
| 41 | +Note that on some BSD-derived platforms <TT>_setjmp(3)/_longjmp(3)</TT> | ||
| 42 | +calls should be used instead of <TT>setjmp(3)/longjmp(3)</TT> (that is | ||
| 43 | +the calls that manipulate only the stack and registers and do <I>not</I> | ||
| 44 | +save and restore the process's signal mask).</LI> | ||
| 45 | +<P> | ||
| 46 | +Starting with glibc 2.4 on Linux the opacity of the <TT>jmp_buf</TT> data | ||
| 47 | +structure is enforced by <TT>setjmp(3)/longjmp(3)</TT> so the | ||
| 48 | +<TT>jmp_buf</TT> ingredients cannot be accessed directly anymore (unless | ||
| 49 | +special environmental variable LD_POINTER_GUARD is set before application | ||
| 50 | +execution). To avoid dependency on custom environment, the State Threads | ||
| 51 | +library provides <TT>setjmp/longjmp</TT> replacement functions for | ||
| 52 | +all Intel CPU architectures. Other CPU architectures can also be easily | ||
| 53 | +supported (the <TT>setjmp/longjmp</TT> source code is widely available for | ||
| 54 | +many CPU architectures). | ||
| 55 | +<P> | ||
| 56 | +<LI><I>High resolution time function</I>: Some platforms (IRIX, Solaris) | ||
| 57 | +provide a high resolution time function based on the free running hardware | ||
| 58 | +counter. This function returns the time counted since some arbitrary | ||
| 59 | +moment in the past (usually machine power up time). It is not correlated in | ||
| 60 | +any way to the time of day, and thus is not subject to resetting, | ||
| 61 | +drifting, etc. This type of time is ideal for tasks where cheap, accurate | ||
| 62 | +interval timing is required. If such a function is not available on a | ||
| 63 | +particular platform, the <TT>gettimeofday(3)</TT> function can be used | ||
| 64 | +(though on some platforms it involves a system call). | ||
| 65 | +<P> | ||
| 66 | +<LI><I>The stack growth direction</I>: The library needs to know whether the | ||
| 67 | +stack grows toward lower (down) or higher (up) memory addresses. | ||
| 68 | +One can write a simple test program that detects the stack growth direction | ||
| 69 | +on a particular platform.</LI> | ||
| 70 | +<P> | ||
| 71 | +<LI><I>Non-blocking attribute inheritance</I>: On some platforms (e.g. IRIX) | ||
| 72 | +the socket created as a result of the <TT>accept(2)</TT> call inherits the | ||
| 73 | +non-blocking attribute of the listening socket. One needs to consult the manual | ||
| 74 | +pages or write a simple test program to see if this applies to a specific | ||
| 75 | +platform.</LI> | ||
| 76 | +<P> | ||
| 77 | +<LI><I>Anonymous memory mapping</I>: The library allocates memory segments | ||
| 78 | +for thread stacks by doing anonymous memory mapping (<TT>mmap(2)</TT>). This | ||
| 79 | +mapping is somewhat different on SVR4 and BSD4.3 derived platforms. | ||
| 80 | +<P> | ||
| 81 | +The memory mapping can be avoided altogether by using <TT>malloc(3)</TT> for | ||
| 82 | +stack allocation. In this case the <TT>MALLOC_STACK</TT> macro should be | ||
| 83 | +defined.</LI> | ||
| 84 | +</UL> | ||
| 85 | +<P> | ||
| 86 | +All machine-dependent feature test macros should be defined in the | ||
| 87 | +<TT>md.h</TT> header file. The assembly code for <TT>setjmp/longjmp</TT> | ||
| 88 | +replacement functions for all CPU architectures should be placed in | ||
| 89 | +the <TT>md.S</TT> file. | ||
| 90 | +<P> | ||
| 91 | +The current version of the library is ported to: | ||
| 92 | +<UL> | ||
| 93 | + <LI>IRIX 6.x (both 32 and 64 bit)</LI> | ||
| 94 | + <LI>Linux (kernel 2.x and glibc 2.x) on x86, Alpha, MIPS and MIPSEL, | ||
| 95 | + SPARC, ARM, PowerPC, 68k, HPPA, S390, IA-64, and Opteron (AMD-64)</LI> | ||
| 96 | + <LI>Solaris 2.x (SunOS 5.x) on x86, AMD64, SPARC, and SPARC-64</LI> | ||
| 97 | + <LI>AIX 4.x</LI> | ||
| 98 | + <LI>HP-UX 11 (both 32 and 64 bit)</LI> | ||
| 99 | + <LI>Tru64/OSF1</LI> | ||
| 100 | + <LI>FreeBSD on x86, AMD64, and Alpha</LI> | ||
| 101 | + <LI>OpenBSD on x86, AMD64, Alpha, and SPARC</LI> | ||
| 102 | + <LI>NetBSD on x86, Alpha, SPARC, and VAX</LI> | ||
| 103 | + <LI>MacOS X (Darwin) on PowerPC (32 bit) and Intel (both 32 and 64 bit) [universal]</LI> | ||
| 104 | + <LI>Cygwin</LI> | ||
| 105 | +</UL> | ||
| 106 | +<P> | ||
| 107 | + | ||
| 108 | +<A NAME="signals"> | ||
| 109 | +<H3>Signals</H3> | ||
| 110 | +Signal handling in an application using State Threads should be treated the | ||
| 111 | +same way as in a classical UNIX process application. There is no such | ||
| 112 | +thing as per-thread signal mask, all threads share the same signal handlers, | ||
| 113 | +and only asynchronous-safe functions can be used in signal handlers. | ||
| 114 | +However, there is a way to process signals synchronously by converting a | ||
| 115 | +signal event to an I/O event: a signal catching function does a write to | ||
| 116 | +a pipe which will be processed synchronously by a dedicated signal handling | ||
| 117 | +thread. The following code demonstrates this technique (error handling is | ||
| 118 | +omitted for clarity): | ||
| 119 | +<PRE> | ||
| 120 | + | ||
| 121 | +/* Per-process pipe which is used as a signal queue. */ | ||
| 122 | +/* Up to PIPE_BUF/sizeof(int) signals can be queued up. */ | ||
| 123 | +int sig_pipe[2]; | ||
| 124 | + | ||
| 125 | +/* Signal catching function. */ | ||
| 126 | +/* Converts signal event to I/O event. */ | ||
| 127 | +void sig_catcher(int signo) | ||
| 128 | +{ | ||
| 129 | + int err; | ||
| 130 | + | ||
| 131 | + /* Save errno to restore it after the write() */ | ||
| 132 | + err = errno; | ||
| 133 | + /* write() is reentrant/async-safe */ | ||
| 134 | + write(sig_pipe[1], &signo, sizeof(int)); | ||
| 135 | + errno = err; | ||
| 136 | +} | ||
| 137 | + | ||
| 138 | +/* Signal processing function. */ | ||
| 139 | +/* This is the "main" function of the signal processing thread. */ | ||
| 140 | +void *sig_process(void *arg) | ||
| 141 | +{ | ||
| 142 | + st_netfd_t nfd; | ||
| 143 | + int signo; | ||
| 144 | + | ||
| 145 | + nfd = st_netfd_open(sig_pipe[0]); | ||
| 146 | + | ||
| 147 | + for ( ; ; ) { | ||
| 148 | + /* Read the next signal from the pipe */ | ||
| 149 | + st_read(nfd, &signo, sizeof(int), ST_UTIME_NO_TIMEOUT); | ||
| 150 | + | ||
| 151 | + /* Process signal synchronously */ | ||
| 152 | + switch (signo) { | ||
| 153 | + case SIGHUP: | ||
| 154 | + /* do something here - reread config files, etc. */ | ||
| 155 | + break; | ||
| 156 | + case SIGTERM: | ||
| 157 | + /* do something here - cleanup, etc. */ | ||
| 158 | + break; | ||
| 159 | + /* . | ||
| 160 | + . | ||
| 161 | + Other signals | ||
| 162 | + . | ||
| 163 | + . | ||
| 164 | + */ | ||
| 165 | + } | ||
| 166 | + } | ||
| 167 | + | ||
| 168 | + return NULL; | ||
| 169 | +} | ||
| 170 | + | ||
| 171 | +int main(int argc, char *argv[]) | ||
| 172 | +{ | ||
| 173 | + struct sigaction sa; | ||
| 174 | + . | ||
| 175 | + . | ||
| 176 | + . | ||
| 177 | + | ||
| 178 | + /* Create signal pipe */ | ||
| 179 | + pipe(sig_pipe); | ||
| 180 | + | ||
| 181 | + /* Create signal processing thread */ | ||
| 182 | + st_thread_create(sig_process, NULL, 0, 0); | ||
| 183 | + | ||
| 184 | + /* Install sig_catcher() as a signal handler */ | ||
| 185 | + sa.sa_handler = sig_catcher; | ||
| 186 | + sigemptyset(&sa.sa_mask); | ||
| 187 | + sa.sa_flags = 0; | ||
| 188 | + sigaction(SIGHUP, &sa, NULL); | ||
| 189 | + | ||
| 190 | + sa.sa_handler = sig_catcher; | ||
| 191 | + sigemptyset(&sa.sa_mask); | ||
| 192 | + sa.sa_flags = 0; | ||
| 193 | + sigaction(SIGTERM, &sa, NULL); | ||
| 194 | + | ||
| 195 | + . | ||
| 196 | + . | ||
| 197 | + . | ||
| 198 | + | ||
| 199 | +} | ||
| 200 | + | ||
| 201 | +</PRE> | ||
| 202 | +<P> | ||
| 203 | +Note that if multiple processes are used (see below), the signal pipe should | ||
| 204 | +be initialized after the <TT>fork(2)</TT> call so that each process has its | ||
| 205 | +own private pipe. | ||
| 206 | +<P> | ||
| 207 | + | ||
| 208 | +<A NAME="intra"> | ||
| 209 | +<H3>Intra-Process Synchronization</H3> | ||
| 210 | +Due to the event-driven nature of the library scheduler, the thread context | ||
| 211 | +switch (process state change) can only happen in a well-known set of | ||
| 212 | +library functions. This set includes functions in which a thread may | ||
| 213 | +"block":<TT> </TT>I/O functions (<TT>st_read(), st_write(), </TT>etc.), | ||
| 214 | +sleep functions (<TT>st_sleep(), </TT>etc.), and thread synchronization | ||
| 215 | +functions (<TT>st_thread_join(), st_cond_wait(), </TT>etc.). As a result, | ||
| 216 | +process-specific global data need not to be protected by locks since a thread | ||
| 217 | +cannot be rescheduled while in a critical section (and only one thread at a | ||
| 218 | +time can access the same memory location). By the same token, | ||
| 219 | +non thread-safe functions (in a traditional sense) can be safely used with | ||
| 220 | +the State Threads. The library's mutex facilities are practically useless | ||
| 221 | +for a correctly written application (no blocking functions in critical | ||
| 222 | +section) and are provided mostly for completeness. This absence of locking | ||
| 223 | +greatly simplifies an application design and provides a foundation for | ||
| 224 | +scalability. | ||
| 225 | +<P> | ||
| 226 | + | ||
| 227 | +<A NAME="inter"> | ||
| 228 | +<H3>Inter-Process Synchronization</H3> | ||
| 229 | +The State Threads library makes it possible to multiplex a large number | ||
| 230 | +of simultaneous connections onto a much smaller number of separate | ||
| 231 | +processes, where each process uses a many-to-one user-level threading | ||
| 232 | +implementation (<B>N</B> of <B>M:1</B> mappings rather than one <B>M:N</B> | ||
| 233 | +mapping used in native threading libraries on some platforms). This design | ||
| 234 | +is key to the application's scalability. One can think about it as if a | ||
| 235 | +set of all threads is partitioned into separate groups (processes) where | ||
| 236 | +each group has a separate pool of resources (virtual address space, file | ||
| 237 | +descriptors, etc.). An application designer has full control of how many | ||
| 238 | +groups (processes) an application creates and what resources, if any, | ||
| 239 | +are shared among different groups via standard UNIX inter-process | ||
| 240 | +communication (IPC) facilities.<P> | ||
| 241 | +There are several reasons for creating multiple processes: | ||
| 242 | +<P> | ||
| 243 | +<UL> | ||
| 244 | +<LI>To take advantage of multiple hardware entities (CPUs, disks, etc.) | ||
| 245 | +available in the system (hardware parallelism).</LI> | ||
| 246 | +<P> | ||
| 247 | +<LI>To reduce risk of losing a large number of user connections when one of | ||
| 248 | +the processes crashes. For example, if <B>C</B> user connections (threads) | ||
| 249 | +are multiplexed onto <B>P</B> processes and one of the processes crashes, | ||
| 250 | +only a fraction (<B>C/P</B>) of all connections will be lost.</LI> | ||
| 251 | +<P> | ||
| 252 | +<LI>To overcome per-process resource limitations imposed by the OS. For | ||
| 253 | +example, if <TT>select(2)</TT> is used for event polling, the number of | ||
| 254 | +simultaneous connections (threads) per process is | ||
| 255 | +limited by the <TT>FD_SETSIZE</TT> parameter (see <TT>select(2)</TT>). | ||
| 256 | +If <TT>FD_SETSIZE</TT> is equal to 1024 and each connection needs one file | ||
| 257 | +descriptor, then an application should create 10 processes to support 10,000 | ||
| 258 | +simultaneous connections.</LI> | ||
| 259 | +</UL> | ||
| 260 | +<P> | ||
| 261 | +Ideally all user sessions are completely independent, so there is no need for | ||
| 262 | +inter-process communication. It is always better to have several separate | ||
| 263 | +smaller process-specific resources (e.g., data caches) than to have one large | ||
| 264 | +resource shared (and modified) by all processes. Sometimes, however, there | ||
| 265 | +is a need to share a common resource among different processes. In that case, | ||
| 266 | +standard UNIX IPC facilities can be used. In addition to that, there is a way | ||
| 267 | +to synchronize different processes so that only the thread accessing the | ||
| 268 | +shared resource will be suspended (but not the entire process) if that resource | ||
| 269 | +is unavailable. In the following code fragment a pipe is used as a counting | ||
| 270 | +semaphore for inter-process synchronization: | ||
| 271 | +<PRE> | ||
| 272 | +#ifndef PIPE_BUF | ||
| 273 | +#define PIPE_BUF 512 /* POSIX */ | ||
| 274 | +#endif | ||
| 275 | + | ||
| 276 | +/* Semaphore data structure */ | ||
| 277 | +typedef struct ipc_sem { | ||
| 278 | + st_netfd_t rdfd; /* read descriptor */ | ||
| 279 | + st_netfd_t wrfd; /* write descriptor */ | ||
| 280 | +} ipc_sem_t; | ||
| 281 | + | ||
| 282 | +/* Create and initialize the semaphore. Should be called before fork(2). */ | ||
| 283 | +/* 'value' must be less than PIPE_BUF. */ | ||
| 284 | +/* If 'value' is 1, the semaphore works as mutex. */ | ||
| 285 | +ipc_sem_t *ipc_sem_create(int value) | ||
| 286 | +{ | ||
| 287 | + ipc_sem_t *sem; | ||
| 288 | + int p[2]; | ||
| 289 | + char b[PIPE_BUF]; | ||
| 290 | + | ||
| 291 | + /* Error checking is omitted for clarity */ | ||
| 292 | + sem = malloc(sizeof(ipc_sem_t)); | ||
| 293 | + | ||
| 294 | + /* Create the pipe */ | ||
| 295 | + pipe(p); | ||
| 296 | + sem->rdfd = st_netfd_open(p[0]); | ||
| 297 | + sem->wrfd = st_netfd_open(p[1]); | ||
| 298 | + | ||
| 299 | + /* Initialize the semaphore: put 'value' bytes into the pipe */ | ||
| 300 | + write(p[1], b, value); | ||
| 301 | + | ||
| 302 | + return sem; | ||
| 303 | +} | ||
| 304 | + | ||
| 305 | +/* Try to decrement the "value" of the semaphore. */ | ||
| 306 | +/* If "value" is 0, the calling thread blocks on the semaphore. */ | ||
| 307 | +int ipc_sem_wait(ipc_sem_t *sem) | ||
| 308 | +{ | ||
| 309 | + char c; | ||
| 310 | + | ||
| 311 | + /* Read one byte from the pipe */ | ||
| 312 | + if (st_read(sem->rdfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1) | ||
| 313 | + return -1; | ||
| 314 | + | ||
| 315 | + return 0; | ||
| 316 | +} | ||
| 317 | + | ||
| 318 | +/* Increment the "value" of the semaphore. */ | ||
| 319 | +int ipc_sem_post(ipc_sem_t *sem) | ||
| 320 | +{ | ||
| 321 | + char c; | ||
| 322 | + | ||
| 323 | + if (st_write(sem->wrfd, &c, 1, ST_UTIME_NO_TIMEOUT) != 1) | ||
| 324 | + return -1; | ||
| 325 | + | ||
| 326 | + return 0; | ||
| 327 | +} | ||
| 328 | + | ||
| 329 | +</PRE> | ||
| 330 | +<P> | ||
| 331 | + | ||
| 332 | +Generally, the following steps should be followed when writing an application | ||
| 333 | +using the State Threads library: | ||
| 334 | +<P> | ||
| 335 | +<OL> | ||
| 336 | +<LI>Initialize the library (<TT>st_init()</TT>).</LI> | ||
| 337 | +<P> | ||
| 338 | +<LI>Create resources that will be shared among different processes: | ||
| 339 | + create and bind listening sockets, create shared memory segments, IPC | ||
| 340 | + channels, synchronization primitives, etc.</LI> | ||
| 341 | +<P> | ||
| 342 | +<LI>Create several processes (<TT>fork(2)</TT>). The parent process should | ||
| 343 | + either exit or become a "watchdog" (e.g., it starts a new process when | ||
| 344 | + an existing one crashes, does a cleanup upon application termination, | ||
| 345 | + etc.).</LI> | ||
| 346 | +<P> | ||
| 347 | +<LI>In each child process create a pool of threads | ||
| 348 | + (<TT>st_thread_create()</TT>) to handle user connections.</LI> | ||
| 349 | +</OL> | ||
| 350 | +<P> | ||
| 351 | + | ||
| 352 | +<A NAME="nonnet"> | ||
| 353 | +<H3>Non-Network I/O</H3> | ||
| 354 | + | ||
| 355 | +The State Threads architecture uses non-blocking I/O on | ||
| 356 | +<TT>st_netfd_t</TT> objects for concurrent processing of multiple user | ||
| 357 | +connections. This architecture has a drawback: the entire process and | ||
| 358 | +all its threads may block for the duration of a <I>disk</I> or other | ||
| 359 | +non-network I/O operation, whether through State Threads I/O functions, | ||
| 360 | +direct system calls, or standard I/O functions. (This is applicable | ||
| 361 | +mostly to disk <I>reads</I>; disk <I>writes</I> are usually performed | ||
| 362 | +asynchronously -- data goes to the buffer cache to be written to disk | ||
| 363 | +later.) Fortunately, disk I/O (unlike network I/O) usually takes a | ||
| 364 | +finite and predictable amount of time, but this may not be true for | ||
| 365 | +special devices or user input devices (including stdin). Nevertheless, | ||
| 366 | +such I/O reduces throughput of the system and increases response times. | ||
| 367 | +There are several ways to design an application to overcome this | ||
| 368 | +drawback: | ||
| 369 | + | ||
| 370 | +<P> | ||
| 371 | +<UL> | ||
| 372 | +<LI>Create several identical main processes as described above (symmetric | ||
| 373 | + architecture). This will improve CPU utilization and thus improve the | ||
| 374 | + overall throughput of the system.</LI> | ||
| 375 | +<P> | ||
| 376 | +<LI>Create multiple "helper" processes in addition to the main process that | ||
| 377 | + will handle blocking I/O operations (asymmetric architecture). | ||
| 378 | + This approach was suggested for Web servers in a | ||
| 379 | + <A HREF="http://www.cs.rice.edu/~vivek/flash99/">paper</A> by Peter | ||
| 380 | + Druschel et al. In this architecture the main process communicates with | ||
| 381 | + a helper process via an IPC channel (<TT>pipe(2), socketpair(2)</TT>). | ||
| 382 | + The main process instructs a helper to perform the potentially blocking | ||
| 383 | + operation. Once the operation completes, the helper returns a | ||
| 384 | + notification via IPC. | ||
| 385 | +</UL> | ||
| 386 | +<P> | ||
| 387 | + | ||
| 388 | +<A NAME="timeouts"> | ||
| 389 | +<H3>Timeouts</H3> | ||
| 390 | + | ||
| 391 | +The <TT>timeout</TT> parameter to <TT>st_cond_timedwait()</TT> and the | ||
| 392 | +I/O functions, and the arguments to <TT>st_sleep()</TT> and | ||
| 393 | +<TT>st_usleep()</TT> specify a maximum time to wait <I>since the last | ||
| 394 | +context switch</I> not since the beginning of the function call. | ||
| 395 | + | ||
| 396 | +<P>The State Threads' time resolution is actually the time interval | ||
| 397 | +between context switches. That time interval may be large in some | ||
| 398 | +situations, for example, when a single thread does a lot of work | ||
| 399 | +continuously. Note that a steady, uninterrupted stream of network I/O | ||
| 400 | +qualifies for this description; a context switch occurs only when a | ||
| 401 | +thread blocks. | ||
| 402 | + | ||
| 403 | +<P>If a specified I/O timeout is less than the time interval between | ||
| 404 | +context switches the function may return with a timeout error before | ||
| 405 | +that amount of time has elapsed since the beginning of the function | ||
| 406 | +call. For example, if eight milliseconds have passed since the last | ||
| 407 | +context switch and an I/O function with a timeout of 10 milliseconds | ||
| 408 | +blocks, causing a switch, the call may return with a timeout error as | ||
| 409 | +little as two milliseconds after it was called. (On Linux, | ||
| 410 | +<TT>select()</TT>'s timeout is an <I>upper</I> bound on the amount of | ||
| 411 | +time elapsed before select returns.) Similarly, if 12 ms have passed | ||
| 412 | +already, the function may return immediately. | ||
| 413 | + | ||
| 414 | +<P>In almost all cases I/O timeouts should be used only for detecting a | ||
| 415 | +broken network connection or for preventing a peer from holding an idle | ||
| 416 | +connection for too long. Therefore for most applications realistic I/O | ||
| 417 | +timeouts should be on the order of seconds. Furthermore, there's | ||
| 418 | +probably no point in retrying operations that time out. Rather than | ||
| 419 | +retrying simply use a larger timeout in the first place. | ||
| 420 | + | ||
| 421 | +<P>The largest valid timeout value is platform-dependent and may be | ||
| 422 | +significantly less than <TT>INT_MAX</TT> seconds for <TT>select()</TT> | ||
| 423 | +or <TT>INT_MAX</TT> milliseconds for <TT>poll()</TT>. Generally, you | ||
| 424 | +should not use timeouts exceeding several hours. Use | ||
| 425 | +<tt>ST_UTIME_NO_TIMEOUT</tt> (<tt>-1</tt>) as a special value to | ||
| 426 | +indicate infinite timeout or indefinite sleep. Use | ||
| 427 | +<tt>ST_UTIME_NO_WAIT</tt> (<tt>0</tt>) to indicate no waiting at all. | ||
| 428 | + | ||
| 429 | +<P> | ||
| 430 | +<HR> | ||
| 431 | +<P> | ||
| 432 | +</BODY> | ||
| 433 | +</HTML> | ||
| 434 | + |
trunk/research/st-1.9/docs/reference.html
0 → 100644
此 diff 太大无法显示。
trunk/research/st-1.9/docs/st.html
0 → 100644
| 1 | +<HTML> | ||
| 2 | +<HEAD> | ||
| 3 | +<TITLE>State Threads for Internet Applications</TITLE> | ||
| 4 | +</HEAD> | ||
| 5 | +<BODY BGCOLOR=#FFFFFF> | ||
| 6 | +<H2>State Threads for Internet Applications</H2> | ||
| 7 | +<H3>Introduction</H3> | ||
| 8 | +<P> | ||
| 9 | +State Threads is an application library which provides a | ||
| 10 | +foundation for writing fast and highly scalable Internet Applications | ||
| 11 | +on UNIX-like platforms. It combines the simplicity of the multithreaded | ||
| 12 | +programming paradigm, in which one thread supports each simultaneous | ||
| 13 | +connection, with the performance and scalability of an event-driven | ||
| 14 | +state machine architecture.</P> | ||
| 15 | + | ||
| 16 | +<H3>1. Definitions</H3> | ||
| 17 | +<P> | ||
| 18 | +<A NAME="IA"> | ||
| 19 | +<H4>1.1 Internet Applications</H4> | ||
| 20 | +</A> | ||
| 21 | +<P> | ||
| 22 | +An <I>Internet Application</I> (IA) is either a server or client network | ||
| 23 | +application that accepts connections from clients and may or may not | ||
| 24 | +connect to servers. In an IA the arrival or departure of network data | ||
| 25 | +often controls processing (that is, IA is a <I>data-driven</I> application). | ||
| 26 | +For each connection, an IA does some finite amount of work | ||
| 27 | +involving data exchange with its peer, where its peer may be either | ||
| 28 | +a client or a server. | ||
| 29 | +The typical transaction steps of an IA are to accept a connection, | ||
| 30 | +read a request, do some finite and predictable amount of work to | ||
| 31 | +process the request, then write a response to the peer that sent the | ||
| 32 | +request. One example of an IA is a Web server; | ||
| 33 | +the most general example of an IA is a proxy server, because it both | ||
| 34 | +accepts connections from clients and connects to other servers.</P> | ||
| 35 | +<P> | ||
| 36 | +We assume that the performance of an IA is constrained by available CPU | ||
| 37 | +cycles rather than network bandwidth or disk I/O (that is, CPU | ||
| 38 | +is a bottleneck resource). | ||
| 39 | +<P> | ||
| 40 | + | ||
| 41 | +<A NAME="PS"> | ||
| 42 | +<H4>1.2 Performance and Scalability</H4> | ||
| 43 | +</A> | ||
| 44 | +<P> | ||
| 45 | +The <I>performance</I> of an IA is usually evaluated as its | ||
| 46 | +throughput measured in transactions per second or bytes per second (one | ||
| 47 | +can be converted to the other, given the average transaction size). There are | ||
| 48 | +several benchmarks that can be used to measure throughput of Web serving | ||
| 49 | +applications for specific workloads (such as | ||
| 50 | +<A HREF="http://www.spec.org/osg/web96/">SPECweb96</A>, | ||
| 51 | +<A HREF="http://www.mindcraft.com/webstone/">WebStone</A>, | ||
| 52 | +<A HREF="http://www.zdnet.com/zdbop/webbench/">WebBench</A>). | ||
| 53 | +Although there is no common definition for <I>scalability</I>, in general it | ||
| 54 | +expresses the ability of an application to sustain its performance when some | ||
| 55 | +external condition changes. For IAs this external condition is either the | ||
| 56 | +number of clients (also known as "users," "simultaneous connections," or "load | ||
| 57 | +generators") or the underlying hardware system size (number of CPUs, memory | ||
| 58 | +size, and so on). Thus there are two types of scalability: <I>load | ||
| 59 | +scalability</I> and <I>system scalability</I>, respectively. | ||
| 60 | +<P> | ||
| 61 | +The figure below shows how the throughput of an idealized IA changes with | ||
| 62 | +the increasing number of clients (solid blue line). Initially the throughput | ||
| 63 | +grows linearly (the slope represents the maximal throughput that one client | ||
| 64 | +can provide). Within this initial range, the IA is underutilized and CPUs are | ||
| 65 | +partially idle. Further increase in the number of clients leads to a system | ||
| 66 | +saturation, and the throughput gradually stops growing as all CPUs become fully | ||
| 67 | +utilized. After that point, the throughput stays flat because there are no | ||
| 68 | +more CPU cycles available. | ||
| 69 | +In the real world, however, each simultaneous connection | ||
| 70 | +consumes some computational and memory resources, even when idle, and this | ||
| 71 | +overhead grows with the number of clients. Therefore, the throughput of the | ||
| 72 | +real world IA starts dropping after some point (dashed blue line in the figure | ||
| 73 | +below). The rate at which the throughput drops depends, among other things, on | ||
| 74 | +application design. | ||
| 75 | +<P> | ||
| 76 | +We say that an application has a good <I>load scalability</I> if it can | ||
| 77 | +sustain its throughput over a wide range of loads. | ||
| 78 | +Interestingly, the <A HREF="http://www.spec.org/osg/web99/">SPECweb99</A> | ||
| 79 | +benchmark somewhat reflects the Web server's load scalability because it | ||
| 80 | +measures the number of clients (load generators) given a mandatory minimal | ||
| 81 | +throughput per client (that is, it measures the server's <I>capacity</I>). | ||
| 82 | +This is unlike <A HREF="http://www.spec.org/osg/web96/">SPECweb96</A> and | ||
| 83 | +other benchmarks that use the throughput as their main metric (see the figure | ||
| 84 | +below). | ||
| 85 | +<P> | ||
| 86 | +<CENTER><IMG SRC="fig.gif" ALT="Figure: Throughput vs. Number of clients"> | ||
| 87 | +</CENTER> | ||
| 88 | +<P> | ||
| 89 | +<I>System scalability</I> is the ability of an application to sustain its | ||
| 90 | +performance per hardware unit (such as a CPU) with the increasing number of | ||
| 91 | +these units. In other words, good system scalability means that doubling the | ||
| 92 | +number of processors will roughly double the application's throughput (dashed | ||
| 93 | +green line). We assume here that the underlying operating system also scales | ||
| 94 | +well. Good system scalability allows you to initially run an application on | ||
| 95 | +the smallest system possible, while retaining the ability to move that | ||
| 96 | +application to a larger system if necessary, without excessive effort or | ||
| 97 | +expense. That is, an application need not be rewritten or even undergo a | ||
| 98 | +major porting effort when changing system size. | ||
| 99 | +<P> | ||
| 100 | +Although scalability and performance are more important in the case of server | ||
| 101 | +IAs, they should also be considered for some client applications (such as | ||
| 102 | +benchmark load generators). | ||
| 103 | +<P> | ||
| 104 | + | ||
| 105 | +<A NAME="CONC"> | ||
| 106 | +<H4>1.3 Concurrency</H4> | ||
| 107 | +</A> | ||
| 108 | +<P> | ||
| 109 | +Concurrency reflects the parallelism in a system. The two unrelated types | ||
| 110 | +are <I>virtual</I> concurrency and <I>real</I> concurrency. | ||
| 111 | +<UL> | ||
| 112 | +<LI>Virtual (or apparent) concurrency is the number of simultaneous | ||
| 113 | +connections that a system supports. | ||
| 114 | +<BR><BR> | ||
| 115 | +<LI>Real concurrency is the number of hardware devices, including | ||
| 116 | +CPUs, network cards, and disks, that actually allow a system to perform | ||
| 117 | +tasks in parallel. | ||
| 118 | +</UL> | ||
| 119 | +<P> | ||
| 120 | +An IA must provide virtual concurrency in order to serve many users | ||
| 121 | +simultaneously. | ||
| 122 | +To achieve maximum performance and scalability in doing so, the number of | ||
| 123 | +programming entities than an IA creates to be scheduled by the OS kernel | ||
| 124 | +should be | ||
| 125 | +kept close to (within an order of magnitude of) the real concurrency found on | ||
| 126 | +the system. These programming entities scheduled by the kernel are known as | ||
| 127 | +<I>kernel execution vehicles</I>. Examples of kernel execution vehicles | ||
| 128 | +include Solaris lightweight processes and IRIX kernel threads. | ||
| 129 | +In other words, the number of kernel execution vehicles should be dictated by | ||
| 130 | +the system size and not by the number of simultaneous connections. | ||
| 131 | +<P> | ||
| 132 | + | ||
| 133 | +<H3>2. Existing Architectures</H3> | ||
| 134 | +<P> | ||
| 135 | +There are a few different architectures that are commonly used by IAs. | ||
| 136 | +These include the <I>Multi-Process</I>, | ||
| 137 | +<I>Multi-Threaded</I>, and <I>Event-Driven State Machine</I> | ||
| 138 | +architectures. | ||
| 139 | +<P> | ||
| 140 | +<A NAME="MP"> | ||
| 141 | +<H4>2.1 Multi-Process Architecture</H4> | ||
| 142 | +</A> | ||
| 143 | +<P> | ||
| 144 | +In the Multi-Process (MP) architecture, an individual process is | ||
| 145 | +dedicated to each simultaneous connection. | ||
| 146 | +A process performs all of a transaction's initialization steps | ||
| 147 | +and services a connection completely before moving on to service | ||
| 148 | +a new connection. | ||
| 149 | +<P> | ||
| 150 | +User sessions in IAs are relatively independent; therefore, no | ||
| 151 | +synchronization between processes handling different connections is | ||
| 152 | +necessary. Because each process has its own private address space, | ||
| 153 | +this architecture is very robust. If a process serving one of the connections | ||
| 154 | +crashes, the other sessions will not be affected. However, to serve many | ||
| 155 | +concurrent connections, an equal number of processes must be employed. | ||
| 156 | +Because processes are kernel entities (and are in fact the heaviest ones), | ||
| 157 | +the number of kernel entities will be at least as large as the number of | ||
| 158 | +concurrent sessions. On most systems, good performance will not be achieved | ||
| 159 | +when more than a few hundred processes are created because of the high | ||
| 160 | +context-switching overhead. In other words, MP applications have poor load | ||
| 161 | +scalability. | ||
| 162 | +<P> | ||
| 163 | +On the other hand, MP applications have very good system scalability, because | ||
| 164 | +no resources are shared among different processes and there is no | ||
| 165 | +synchronization overhead. | ||
| 166 | +<P> | ||
| 167 | +The Apache Web Server 1.x (<A HREF=#refs1>[Reference 1]</A>) uses the MP | ||
| 168 | +architecture on UNIX systems. | ||
| 169 | +<P> | ||
| 170 | +<A NAME="MT"> | ||
| 171 | +<H4>2.2 Multi-Threaded Architecture</H4> | ||
| 172 | +</A> | ||
| 173 | +<P> | ||
| 174 | +In the Multi-Threaded (MT) architecture, multiple independent threads | ||
| 175 | +of control are employed within a single shared address space. Like a | ||
| 176 | +process in the MP architecture, each thread performs all of a | ||
| 177 | +transaction's initialization steps and services a connection completely | ||
| 178 | +before moving on to service a new connection. | ||
| 179 | +<P> | ||
| 180 | +Many modern UNIX operating systems implement a <I>many-to-few</I> model when | ||
| 181 | +mapping user-level threads to kernel entities. In this model, an | ||
| 182 | +arbitrarily large number of user-level threads is multiplexed onto a | ||
| 183 | +lesser number of kernel execution vehicles. Kernel execution | ||
| 184 | +vehicles are also known as <I>virtual processors</I>. Whenever a user-level | ||
| 185 | +thread makes a blocking system call, the kernel execution vehicle it is using | ||
| 186 | +will become blocked in the kernel. If there are no other non-blocked kernel | ||
| 187 | +execution vehicles and there are other runnable user-level threads, a new | ||
| 188 | +kernel execution vehicle will be created automatically. This prevents the | ||
| 189 | +application from blocking when it can continue to make useful forward | ||
| 190 | +progress. | ||
| 191 | +<P> | ||
| 192 | +Because IAs are by nature network I/O driven, all concurrent sessions block on | ||
| 193 | +network I/O at various points. As a result, the number of virtual processors | ||
| 194 | +created in the kernel grows close to the number of user-level threads | ||
| 195 | +(or simultaneous connections). When this occurs, the many-to-few model | ||
| 196 | +effectively degenerates to a <I>one-to-one</I> model. Again, like in | ||
| 197 | +the MP architecture, the number of kernel execution vehicles is dictated by | ||
| 198 | +the number of simultaneous connections rather than by number of CPUs. This | ||
| 199 | +reduces an application's load scalability. However, because kernel threads | ||
| 200 | +(lightweight processes) use fewer resources and are more light-weight than | ||
| 201 | +traditional UNIX processes, an MT application should scale better with load | ||
| 202 | +than an MP application. | ||
| 203 | +<P> | ||
| 204 | +Unexpectedly, the small number of virtual processors sharing the same address | ||
| 205 | +space in the MT architecture destroys an application's system scalability | ||
| 206 | +because of contention among the threads on various locks. Even if an | ||
| 207 | +application itself is carefully | ||
| 208 | +optimized to avoid lock contention around its own global data (a non-trivial | ||
| 209 | +task), there are still standard library functions and system calls | ||
| 210 | +that use common resources hidden from the application. For example, | ||
| 211 | +on many platforms thread safety of memory allocation routines | ||
| 212 | +(<TT>malloc(3)</TT>, <TT>free(3)</TT>, and so on) is achieved by using a single | ||
| 213 | +global lock. Another example is a per-process file descriptor table. | ||
| 214 | +This common resource table is shared by all kernel execution vehicles within | ||
| 215 | +the same process and must be protected when one modifies it via | ||
| 216 | +certain system calls (such as <TT>open(2)</TT>, <TT>close(2)</TT>, and so on). | ||
| 217 | +In addition to that, maintaining the caches coherent | ||
| 218 | +among CPUs on multiprocessor systems hurts performance when different threads | ||
| 219 | +running on different CPUs modify data items on the same cache line. | ||
| 220 | +<P> | ||
| 221 | +In order to improve load scalability, some applications employ a different | ||
| 222 | +type of MT architecture: they create one or more thread(s) <I>per task</I> | ||
| 223 | +rather than one thread <I>per connection</I>. For example, one small group | ||
| 224 | +of threads may be responsible for accepting client connections, another | ||
| 225 | +for request processing, and yet another for serving responses. The main | ||
| 226 | +advantage of this architecture is that it eliminates the tight coupling | ||
| 227 | +between the number of threads and number of simultaneous connections. However, | ||
| 228 | +in this architecture, different task-specific thread groups must share common | ||
| 229 | +work queues that must be protected by mutual exclusion locks (a typical | ||
| 230 | +producer-consumer problem). This adds synchronization overhead that causes an | ||
| 231 | +application to perform badly on multiprocessor systems. In other words, in | ||
| 232 | +this architecture, the application's system scalability is sacrificed for the | ||
| 233 | +sake of load scalability. | ||
| 234 | +<P> | ||
| 235 | +Of course, the usual nightmares of threaded programming, including data | ||
| 236 | +corruption, deadlocks, and race conditions, also make MT architecture (in any | ||
| 237 | +form) non-simplistic to use. | ||
| 238 | +<P> | ||
| 239 | + | ||
| 240 | +<A NAME="EDSM"> | ||
| 241 | +<H4>2.3 Event-Driven State Machine Architecture</H4> | ||
| 242 | +</A> | ||
| 243 | +<P> | ||
| 244 | +In the Event-Driven State Machine (EDSM) architecture, a single process | ||
| 245 | +is employed to concurrently process multiple connections. The basics of this | ||
| 246 | +architecture are described in Comer and Stevens | ||
| 247 | +<A HREF=#refs2>[Reference 2]</A>. | ||
| 248 | +The EDSM architecture performs one basic data-driven step associated with | ||
| 249 | +a particular connection at a time, thus multiplexing many concurrent | ||
| 250 | +connections. The process operates as a state machine that receives an event | ||
| 251 | +and then reacts to it. | ||
| 252 | +<P> | ||
| 253 | +In the idle state the EDSM calls <TT>select(2)</TT> or <TT>poll(2)</TT> to | ||
| 254 | +wait for network I/O events. When a particular file descriptor is ready for | ||
| 255 | +I/O, the EDSM completes the corresponding basic step (usually by invoking a | ||
| 256 | +handler function) and starts the next one. This architecture uses | ||
| 257 | +non-blocking system calls to perform asynchronous network I/O operations. | ||
| 258 | +For more details on non-blocking I/O see Stevens | ||
| 259 | +<A HREF=#refs3>[Reference 3]</A>. | ||
| 260 | +<P> | ||
| 261 | +To take advantage of hardware parallelism (real concurrency), multiple | ||
| 262 | +identical processes may be created. This is called Symmetric Multi-Process | ||
| 263 | +EDSM and is used, for example, in the Zeus Web Server | ||
| 264 | +(<A HREF=#refs4>[Reference 4]</A>). To more efficiently multiplex disk I/O, | ||
| 265 | +special "helper" processes may be created. This is called Asymmetric | ||
| 266 | +Multi-Process EDSM and was proposed for Web servers by Druschel | ||
| 267 | +and others <A HREF=#refs5>[Reference 5]</A>. | ||
| 268 | +<P> | ||
| 269 | +EDSM is probably the most scalable architecture for IAs. | ||
| 270 | +Because the number of simultaneous connections (virtual concurrency) is | ||
| 271 | +completely decoupled from the number of kernel execution vehicles (processes), | ||
| 272 | +this architecture has very good load scalability. It requires only minimal | ||
| 273 | +user-level resources to create and maintain additional connection. | ||
| 274 | +<P> | ||
| 275 | +Like MP applications, Multi-Process EDSM has very good system scalability | ||
| 276 | +because no resources are shared among different processes and there is no | ||
| 277 | +synchronization overhead. | ||
| 278 | +<P> | ||
| 279 | +Unfortunately, the EDSM architecture is monolithic rather than based on the | ||
| 280 | +concept of threads, so new applications generally need to be implemented from | ||
| 281 | +the ground up. In effect, the EDSM architecture simulates threads and their | ||
| 282 | +stacks the hard way. | ||
| 283 | +<P> | ||
| 284 | + | ||
| 285 | +<A NAME="ST"> | ||
| 286 | +<H3>3. State Threads Library</H3> | ||
| 287 | +</A> | ||
| 288 | +<P> | ||
| 289 | +The State Threads library combines the advantages of all of the above | ||
| 290 | +architectures. The interface preserves the programming simplicity of thread | ||
| 291 | +abstraction, allowing each simultaneous connection to be treated as a separate | ||
| 292 | +thread of execution within a single process. The underlying implementation is | ||
| 293 | +close to the EDSM architecture as the state of each particular concurrent | ||
| 294 | +session is saved in a separate memory segment. | ||
| 295 | +<P> | ||
| 296 | + | ||
| 297 | +<H4>3.1 State Changes and Scheduling</H4> | ||
| 298 | +<P> | ||
| 299 | +The state of each concurrent session includes its stack environment | ||
| 300 | +(stack pointer, program counter, CPU registers) and its stack. Conceptually, | ||
| 301 | +a thread context switch can be viewed as a process changing its state. There | ||
| 302 | +are no kernel entities involved other than processes. | ||
| 303 | +Unlike other general-purpose threading libraries, the State Threads library | ||
| 304 | +is fully deterministic. The thread context switch (process state change) can | ||
| 305 | +only happen in a well-known set of functions (at I/O points or at explicit | ||
| 306 | +synchronization points). As a result, process-specific global data does not | ||
| 307 | +have to be protected by mutual exclusion locks in most cases. The entire | ||
| 308 | +application is free to use all the static variables and non-reentrant library | ||
| 309 | +functions it wants, greatly simplifying programming and debugging while | ||
| 310 | +increasing performance. This is somewhat similar to a <I>co-routine</I> model | ||
| 311 | +(co-operatively multitasked threads), except that no explicit yield is needed | ||
| 312 | +-- | ||
| 313 | +sooner or later, a thread performs a blocking I/O operation and thus surrenders | ||
| 314 | +control. All threads of execution (simultaneous connections) have the | ||
| 315 | +same priority, so scheduling is non-preemptive, like in the EDSM architecture. | ||
| 316 | +Because IAs are data-driven (processing is limited by the size of network | ||
| 317 | +buffers and data arrival rates), scheduling is non-time-slicing. | ||
| 318 | +<P> | ||
| 319 | +Only two types of external events are handled by the library's | ||
| 320 | +scheduler, because only these events can be detected by | ||
| 321 | +<TT>select(2)</TT> or <TT>poll(2)</TT>: I/O events (a file descriptor is ready | ||
| 322 | +for I/O) and time events | ||
| 323 | +(some timeout has expired). However, other types of events (such as | ||
| 324 | +a signal sent to a process) can also be handled by converting them to I/O | ||
| 325 | +events. For example, a signal handling function can perform a write to a pipe | ||
| 326 | +(<TT>write(2)</TT> is reentrant/asynchronous-safe), thus converting a signal | ||
| 327 | +event to an I/O event. | ||
| 328 | +<P> | ||
| 329 | +To take advantage of hardware parallelism, as in the EDSM architecture, | ||
| 330 | +multiple processes can be created in either a symmetric or asymmetric manner. | ||
| 331 | +Process management is not in the library's scope but instead is left up to the | ||
| 332 | +application. | ||
| 333 | +<P> | ||
| 334 | +There are several general-purpose threading libraries that implement a | ||
| 335 | +<I>many-to-one</I> model (many user-level threads to one kernel execution | ||
| 336 | +vehicle), using the same basic techniques as the State Threads library | ||
| 337 | +(non-blocking I/O, event-driven scheduler, and so on). For an example, see GNU | ||
| 338 | +Portable Threads (<A HREF=#refs6>[Reference 6]</A>). Because they are | ||
| 339 | +general-purpose, these libraries have different objectives than the State | ||
| 340 | +Threads library. The State Threads library is <I>not</I> a general-purpose | ||
| 341 | +threading library, | ||
| 342 | +but rather an application library that targets only certain types of | ||
| 343 | +applications (IAs) in order to achieve the highest possible performance and | ||
| 344 | +scalability for those applications. | ||
| 345 | +<P> | ||
| 346 | + | ||
| 347 | +<H4>3.2 Scalability</H4> | ||
| 348 | +<P> | ||
| 349 | +State threads are very lightweight user-level entities, and therefore creating | ||
| 350 | +and maintaining user connections requires minimal resources. An application | ||
| 351 | +using the State Threads library scales very well with the increasing number | ||
| 352 | +of connections. | ||
| 353 | +<P> | ||
| 354 | +On multiprocessor systems an application should create multiple processes | ||
| 355 | +to take advantage of hardware parallelism. Using multiple separate processes | ||
| 356 | +is the <I>only</I> way to achieve the highest possible system scalability. | ||
| 357 | +This is because duplicating per-process resources is the only way to avoid | ||
| 358 | +significant synchronization overhead on multiprocessor systems. Creating | ||
| 359 | +separate UNIX processes naturally offers resource duplication. Again, | ||
| 360 | +as in the EDSM architecture, there is no connection between the number of | ||
| 361 | +simultaneous connections (which may be very large and changes within a wide | ||
| 362 | +range) and the number of kernel entities (which is usually small and constant). | ||
| 363 | +In other words, the State Threads library makes it possible to multiplex a | ||
| 364 | +large number of simultaneous connections onto a much smaller number of | ||
| 365 | +separate processes, thus allowing an application to scale well with both | ||
| 366 | +the load and system size. | ||
| 367 | +<P> | ||
| 368 | + | ||
| 369 | +<H4>3.3 Performance</H4> | ||
| 370 | +<P> | ||
| 371 | +Performance is one of the library's main objectives. The State Threads | ||
| 372 | +library is implemented to minimize the number of system calls and | ||
| 373 | +to make thread creation and context switching as fast as possible. | ||
| 374 | +For example, per-thread signal mask does not exist (unlike | ||
| 375 | +POSIX threads), so there is no need to save and restore a process's | ||
| 376 | +signal mask on every thread context switch. This eliminates two system | ||
| 377 | +calls per context switch. Signal events can be handled much more | ||
| 378 | +efficiently by converting them to I/O events (see above). | ||
| 379 | +<P> | ||
| 380 | + | ||
| 381 | +<H4>3.4 Portability</H4> | ||
| 382 | +<P> | ||
| 383 | +The library uses the same general, underlying concepts as the EDSM | ||
| 384 | +architecture, including non-blocking I/O, file descriptors, and | ||
| 385 | +I/O multiplexing. These concepts are available in some form on most | ||
| 386 | +UNIX platforms, making the library very portable across many | ||
| 387 | +flavors of UNIX. There are only a few platform-dependent sections in the | ||
| 388 | +source. | ||
| 389 | +<P> | ||
| 390 | + | ||
| 391 | +<H4>3.5 State Threads and NSPR</H4> | ||
| 392 | +<P> | ||
| 393 | +The State Threads library is a derivative of the Netscape Portable | ||
| 394 | +Runtime library (NSPR) <A HREF=#refs7>[Reference 7]</A>. The primary goal of | ||
| 395 | +NSPR is to provide a platform-independent layer for system facilities, | ||
| 396 | +where system facilities include threads, thread synchronization, and I/O. | ||
| 397 | +Performance and scalability are not the main concern of NSPR. The | ||
| 398 | +State Threads library addresses performance and scalability while | ||
| 399 | +remaining much smaller than NSPR. It is contained in 8 source files | ||
| 400 | +as opposed to more than 400, but provides all the functionality that | ||
| 401 | +is needed to write efficient IAs on UNIX-like platforms. | ||
| 402 | +<P> | ||
| 403 | + | ||
| 404 | +<TABLE CELLPADDING=3> | ||
| 405 | +<TR> | ||
| 406 | +<TD></TD> | ||
| 407 | +<TH>NSPR</TH> | ||
| 408 | +<TH>State Threads</TH> | ||
| 409 | +</TR> | ||
| 410 | +<TR> | ||
| 411 | +<TD><B>Lines of code</B></TD> | ||
| 412 | +<TD ALIGN=RIGHT>~150,000</TD> | ||
| 413 | +<TD ALIGN=RIGHT>~3000</TD> | ||
| 414 | +</TR> | ||
| 415 | +<TR> | ||
| 416 | +<TD><B>Dynamic library size <BR>(debug version)</B></TD> | ||
| 417 | +<TD></TD> | ||
| 418 | +<TD></TD> | ||
| 419 | +</TR> | ||
| 420 | +<TR> | ||
| 421 | +<TD>IRIX</TD> | ||
| 422 | +<TD ALIGN=RIGHT>~700 KB</TD> | ||
| 423 | +<TD ALIGN=RIGHT>~60 KB</TD> | ||
| 424 | +</TR> | ||
| 425 | +<TR> | ||
| 426 | +<TD>Linux</TD> | ||
| 427 | +<TD ALIGN=RIGHT>~900 KB</TD> | ||
| 428 | +<TD ALIGN=RIGHT>~70 KB</TD> | ||
| 429 | +</TR> | ||
| 430 | +</TABLE> | ||
| 431 | +<P> | ||
| 432 | + | ||
| 433 | +<H3>Conclusion</H3> | ||
| 434 | +<P> | ||
| 435 | +State Threads is an application library which provides a foundation for | ||
| 436 | +writing <A HREF=#IA>Internet Applications</A>. To summarize, it has the | ||
| 437 | +following <I>advantages</I>: | ||
| 438 | +<P> | ||
| 439 | +<UL> | ||
| 440 | +<LI>It allows the design of fast and highly scalable applications. An | ||
| 441 | +application will scale well with both load and number of CPUs. | ||
| 442 | +<P> | ||
| 443 | +<LI>It greatly simplifies application programming and debugging because, as a | ||
| 444 | +rule, no mutual exclusion locking is necessary and the entire application is | ||
| 445 | +free to use static variables and non-reentrant library functions. | ||
| 446 | +</UL> | ||
| 447 | +<P> | ||
| 448 | +The library's main <I>limitation</I>: | ||
| 449 | +<P> | ||
| 450 | +<UL> | ||
| 451 | +<LI>All I/O operations on sockets must use the State Thread library's I/O | ||
| 452 | +functions because only those functions perform thread scheduling and prevent | ||
| 453 | +the application's processes from blocking. | ||
| 454 | +</UL> | ||
| 455 | +<P> | ||
| 456 | + | ||
| 457 | +<H3>References</H3> | ||
| 458 | +<OL> | ||
| 459 | +<A NAME="refs1"> | ||
| 460 | +<LI> Apache Software Foundation, | ||
| 461 | +<A HREF="http://www.apache.org">http://www.apache.org</A>. | ||
| 462 | +<A NAME="refs2"> | ||
| 463 | +<LI> Douglas E. Comer, David L. Stevens, <I>Internetworking With TCP/IP, | ||
| 464 | +Vol. III: Client-Server Programming And Applications</I>, Second Edition, | ||
| 465 | +Ch. 8, 12. | ||
| 466 | +<A NAME="refs3"> | ||
| 467 | +<LI> W. Richard Stevens, <I>UNIX Network Programming</I>, Second Edition, | ||
| 468 | +Vol. 1, Ch. 15. | ||
| 469 | +<A NAME="refs4"> | ||
| 470 | +<LI> Zeus Technology Limited, | ||
| 471 | +<A HREF="http://www.zeus.co.uk/">http://www.zeus.co.uk</A>. | ||
| 472 | +<A NAME="refs5"> | ||
| 473 | +<LI> Peter Druschel, Vivek S. Pai, Willy Zwaenepoel, | ||
| 474 | +<A HREF="http://www.cs.rice.edu/~druschel/usenix99flash.ps.gz"> | ||
| 475 | +Flash: An Efficient and Portable Web Server</A>. In <I>Proceedings of the | ||
| 476 | +USENIX 1999 Annual Technical Conference</I>, Monterey, CA, June 1999. | ||
| 477 | +<A NAME="refs6"> | ||
| 478 | +<LI> GNU Portable Threads, | ||
| 479 | +<A HREF="http://www.gnu.org/software/pth/">http://www.gnu.org/software/pth/</A>. | ||
| 480 | +<A NAME="refs7"> | ||
| 481 | +<LI> Netscape Portable Runtime, | ||
| 482 | +<A HREF="http://www.mozilla.org/docs/refList/refNSPR/">http://www.mozilla.org/docs/refList/refNSPR/</A>. | ||
| 483 | +</OL> | ||
| 484 | + | ||
| 485 | +<H3>Other resources covering various architectural issues in IAs</H3> | ||
| 486 | +<OL START=8> | ||
| 487 | +<LI> Dan Kegel, <I>The C10K problem</I>, | ||
| 488 | +<A HREF="http://www.kegel.com/c10k.html">http://www.kegel.com/c10k.html</A>. | ||
| 489 | +</LI> | ||
| 490 | +<LI> James C. Hu, Douglas C. Schmidt, Irfan Pyarali, <I>JAWS: Understanding | ||
| 491 | +High Performance Web Systems</I>, | ||
| 492 | +<A HREF="http://www.cs.wustl.edu/~jxh/research/research.html">http://www.cs.wustl.edu/~jxh/research/research.html</A>.</LI> | ||
| 493 | +</OL> | ||
| 494 | +<P> | ||
| 495 | +<HR> | ||
| 496 | +<P> | ||
| 497 | + | ||
| 498 | +<CENTER><FONT SIZE=-1>Portions created by SGI are Copyright © 2000 | ||
| 499 | +Silicon Graphics, Inc. All rights reserved.</FONT></CENTER> | ||
| 500 | +<P> | ||
| 501 | + | ||
| 502 | +</BODY> | ||
| 503 | +</HTML> | ||
| 504 | + |
trunk/research/st-1.9/docs/timeout_heap.txt
0 → 100644
| 1 | +How the timeout heap works | ||
| 2 | + | ||
| 3 | +As of version 1.5, the State Threads Library represents the queue of | ||
| 4 | +sleeping threads using a heap data structure rather than a sorted | ||
| 5 | +linked list. This improves performance when there is a large number | ||
| 6 | +of sleeping threads, since insertion into a heap takes O(log N) time | ||
| 7 | +while insertion into a sorted list takes O(N) time. For example, in | ||
| 8 | +one test 1000 threads were created, each thread called st_usleep() | ||
| 9 | +with a random time interval, and then all the threads where | ||
| 10 | +immediately interrupted and joined before the sleeps had a chance to | ||
| 11 | +finish. The whole process was repeated 1000 times, for a total of a | ||
| 12 | +million sleep queue insertions and removals. With the old list-based | ||
| 13 | +sleep queue, this test took 100 seconds; now it takes only 12 seconds. | ||
| 14 | + | ||
| 15 | +Heap data structures are typically based on dynamically resized | ||
| 16 | +arrays. However, since the existing ST code base was very nicely | ||
| 17 | +structured around linking the thread objects into pointer-based lists | ||
| 18 | +without the need for any auxiliary data structures, implementing the | ||
| 19 | +heap using a similar nodes-and-pointers based approach seemed more | ||
| 20 | +appropriate for ST than introducing a separate array. | ||
| 21 | + | ||
| 22 | +Thus, the new ST timeout heap works by organizing the existing | ||
| 23 | +_st_thread_t objects in a balanced binary tree, just as they were | ||
| 24 | +previously organized into a doubly-linked, sorted list. The global | ||
| 25 | +_ST_SLEEPQ variable, formerly a linked list head, is now simply a | ||
| 26 | +pointer to the root of this tree, and the root node of the tree is the | ||
| 27 | +thread with the earliest timeout. Each thread object has two child | ||
| 28 | +pointers, "left" and "right", pointing to threads with later timeouts. | ||
| 29 | + | ||
| 30 | +Each node in the tree is numbered with an integer index, corresponding | ||
| 31 | +to the array index in an array-based heap, and the tree is kept fully | ||
| 32 | +balanced and left-adjusted at all times. In other words, the tree | ||
| 33 | +consists of any number of fully populated top levels, followed by a | ||
| 34 | +single bottom level which may be partially populated, such that any | ||
| 35 | +existing nodes form a contiguous block to the left and the spaces for | ||
| 36 | +missing nodes form a contiguous block to the right. For example, if | ||
| 37 | +there are nine threads waiting for a timeout, they are numbered and | ||
| 38 | +arranged in a tree exactly as follows: | ||
| 39 | + | ||
| 40 | + 1 | ||
| 41 | + / \ | ||
| 42 | + 2 3 | ||
| 43 | + / \ / \ | ||
| 44 | + 4 5 6 7 | ||
| 45 | + / \ | ||
| 46 | + 8 9 | ||
| 47 | + | ||
| 48 | +Each node has either no children, only a left child, or both a left | ||
| 49 | +and a right child. Children always time out later than their parents | ||
| 50 | +(this is called the "heap invariant"), but when a node has two | ||
| 51 | +children, their mutual order is unspecified - the left child may time | ||
| 52 | +out before or after the right child. If a node is numbered N, its | ||
| 53 | +left child is numbered 2N, and its right child is numbered 2N+1. | ||
| 54 | + | ||
| 55 | +There is no pointer from a child to its parent; all pointers point | ||
| 56 | +downward. Additions and deletions both work by starting at the root | ||
| 57 | +and traversing the tree towards the leaves, going left or right | ||
| 58 | +according to the binary digits forming the index of the destination | ||
| 59 | +node. As nodes are added or deleted, existing nodes are rearranged to | ||
| 60 | +maintain the heap invariant. |
trunk/research/st-1.9/event.c
0 → 100644
| 1 | +/* | ||
| 2 | + * The contents of this file are subject to the Mozilla Public | ||
| 3 | + * License Version 1.1 (the "License"); you may not use this file | ||
| 4 | + * except in compliance with the License. You may obtain a copy of | ||
| 5 | + * the License at http://www.mozilla.org/MPL/ | ||
| 6 | + * | ||
| 7 | + * Software distributed under the License is distributed on an "AS | ||
| 8 | + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or | ||
| 9 | + * implied. See the License for the specific language governing | ||
| 10 | + * rights and limitations under the License. | ||
| 11 | + * | ||
| 12 | + * The Original Code is the Netscape Portable Runtime library. | ||
| 13 | + * | ||
| 14 | + * The Initial Developer of the Original Code is Netscape | ||
| 15 | + * Communications Corporation. Portions created by Netscape are | ||
| 16 | + * Copyright (C) 1994-2000 Netscape Communications Corporation. All | ||
| 17 | + * Rights Reserved. | ||
| 18 | + * | ||
| 19 | + * Contributor(s): Silicon Graphics, Inc. | ||
| 20 | + * Yahoo! Inc. | ||
| 21 | + * | ||
| 22 | + * Alternatively, the contents of this file may be used under the | ||
| 23 | + * terms of the GNU General Public License Version 2 or later (the | ||
| 24 | + * "GPL"), in which case the provisions of the GPL are applicable | ||
| 25 | + * instead of those above. If you wish to allow use of your | ||
| 26 | + * version of this file only under the terms of the GPL and not to | ||
| 27 | + * allow others to use your version of this file under the MPL, | ||
| 28 | + * indicate your decision by deleting the provisions above and | ||
| 29 | + * replace them with the notice and other provisions required by | ||
| 30 | + * the GPL. If you do not delete the provisions above, a recipient | ||
| 31 | + * may use your version of this file under either the MPL or the | ||
| 32 | + * GPL. | ||
| 33 | + */ | ||
| 34 | + | ||
| 35 | +#include <stdlib.h> | ||
| 36 | +#include <unistd.h> | ||
| 37 | +#include <fcntl.h> | ||
| 38 | +#include <string.h> | ||
| 39 | +#include <time.h> | ||
| 40 | +#include <errno.h> | ||
| 41 | +#include "common.h" | ||
| 42 | + | ||
| 43 | +#ifdef MD_HAVE_KQUEUE | ||
| 44 | +#include <sys/event.h> | ||
| 45 | +#endif | ||
| 46 | +#ifdef MD_HAVE_EPOLL | ||
| 47 | +#include <sys/epoll.h> | ||
| 48 | +#endif | ||
| 49 | + | ||
| 50 | +#if defined(USE_POLL) && !defined(MD_HAVE_POLL) | ||
| 51 | +/* Force poll usage if explicitly asked for it */ | ||
| 52 | +#define MD_HAVE_POLL | ||
| 53 | +#endif | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +static struct _st_seldata { | ||
| 57 | + fd_set fd_read_set, fd_write_set, fd_exception_set; | ||
| 58 | + int fd_ref_cnts[FD_SETSIZE][3]; | ||
| 59 | + int maxfd; | ||
| 60 | +} *_st_select_data; | ||
| 61 | + | ||
| 62 | +#define _ST_SELECT_MAX_OSFD (_st_select_data->maxfd) | ||
| 63 | +#define _ST_SELECT_READ_SET (_st_select_data->fd_read_set) | ||
| 64 | +#define _ST_SELECT_WRITE_SET (_st_select_data->fd_write_set) | ||
| 65 | +#define _ST_SELECT_EXCEP_SET (_st_select_data->fd_exception_set) | ||
| 66 | +#define _ST_SELECT_READ_CNT(fd) (_st_select_data->fd_ref_cnts[fd][0]) | ||
| 67 | +#define _ST_SELECT_WRITE_CNT(fd) (_st_select_data->fd_ref_cnts[fd][1]) | ||
| 68 | +#define _ST_SELECT_EXCEP_CNT(fd) (_st_select_data->fd_ref_cnts[fd][2]) | ||
| 69 | + | ||
| 70 | + | ||
| 71 | +#ifdef MD_HAVE_POLL | ||
| 72 | +static struct _st_polldata { | ||
| 73 | + struct pollfd *pollfds; | ||
| 74 | + int pollfds_size; | ||
| 75 | + int fdcnt; | ||
| 76 | +} *_st_poll_data; | ||
| 77 | + | ||
| 78 | +#define _ST_POLL_OSFD_CNT (_st_poll_data->fdcnt) | ||
| 79 | +#define _ST_POLLFDS (_st_poll_data->pollfds) | ||
| 80 | +#define _ST_POLLFDS_SIZE (_st_poll_data->pollfds_size) | ||
| 81 | +#endif /* MD_HAVE_POLL */ | ||
| 82 | + | ||
| 83 | + | ||
| 84 | +#ifdef MD_HAVE_KQUEUE | ||
| 85 | +typedef struct _kq_fd_data { | ||
| 86 | + int rd_ref_cnt; | ||
| 87 | + int wr_ref_cnt; | ||
| 88 | + int revents; | ||
| 89 | +} _kq_fd_data_t; | ||
| 90 | + | ||
| 91 | +static struct _st_kqdata { | ||
| 92 | + _kq_fd_data_t *fd_data; | ||
| 93 | + struct kevent *evtlist; | ||
| 94 | + struct kevent *addlist; | ||
| 95 | + struct kevent *dellist; | ||
| 96 | + int fd_data_size; | ||
| 97 | + int evtlist_size; | ||
| 98 | + int addlist_size; | ||
| 99 | + int addlist_cnt; | ||
| 100 | + int dellist_size; | ||
| 101 | + int dellist_cnt; | ||
| 102 | + int kq; | ||
| 103 | + pid_t pid; | ||
| 104 | +} *_st_kq_data; | ||
| 105 | + | ||
| 106 | +#ifndef ST_KQ_MIN_EVTLIST_SIZE | ||
| 107 | +#define ST_KQ_MIN_EVTLIST_SIZE 64 | ||
| 108 | +#endif | ||
| 109 | + | ||
| 110 | +#define _ST_KQ_READ_CNT(fd) (_st_kq_data->fd_data[fd].rd_ref_cnt) | ||
| 111 | +#define _ST_KQ_WRITE_CNT(fd) (_st_kq_data->fd_data[fd].wr_ref_cnt) | ||
| 112 | +#define _ST_KQ_REVENTS(fd) (_st_kq_data->fd_data[fd].revents) | ||
| 113 | +#endif /* MD_HAVE_KQUEUE */ | ||
| 114 | + | ||
| 115 | + | ||
| 116 | +#ifdef MD_HAVE_EPOLL | ||
| 117 | +typedef struct _epoll_fd_data { | ||
| 118 | + int rd_ref_cnt; | ||
| 119 | + int wr_ref_cnt; | ||
| 120 | + int ex_ref_cnt; | ||
| 121 | + int revents; | ||
| 122 | +} _epoll_fd_data_t; | ||
| 123 | + | ||
| 124 | +static struct _st_epolldata { | ||
| 125 | + _epoll_fd_data_t *fd_data; | ||
| 126 | + struct epoll_event *evtlist; | ||
| 127 | + int fd_data_size; | ||
| 128 | + int evtlist_size; | ||
| 129 | + int evtlist_cnt; | ||
| 130 | + int fd_hint; | ||
| 131 | + int epfd; | ||
| 132 | + pid_t pid; | ||
| 133 | +} *_st_epoll_data; | ||
| 134 | + | ||
| 135 | +#ifndef ST_EPOLL_EVTLIST_SIZE | ||
| 136 | +/* Not a limit, just a hint */ | ||
| 137 | +#define ST_EPOLL_EVTLIST_SIZE 4096 | ||
| 138 | +#endif | ||
| 139 | + | ||
| 140 | +#define _ST_EPOLL_READ_CNT(fd) (_st_epoll_data->fd_data[fd].rd_ref_cnt) | ||
| 141 | +#define _ST_EPOLL_WRITE_CNT(fd) (_st_epoll_data->fd_data[fd].wr_ref_cnt) | ||
| 142 | +#define _ST_EPOLL_EXCEP_CNT(fd) (_st_epoll_data->fd_data[fd].ex_ref_cnt) | ||
| 143 | +#define _ST_EPOLL_REVENTS(fd) (_st_epoll_data->fd_data[fd].revents) | ||
| 144 | + | ||
| 145 | +#define _ST_EPOLL_READ_BIT(fd) (_ST_EPOLL_READ_CNT(fd) ? EPOLLIN : 0) | ||
| 146 | +#define _ST_EPOLL_WRITE_BIT(fd) (_ST_EPOLL_WRITE_CNT(fd) ? EPOLLOUT : 0) | ||
| 147 | +#define _ST_EPOLL_EXCEP_BIT(fd) (_ST_EPOLL_EXCEP_CNT(fd) ? EPOLLPRI : 0) | ||
| 148 | +#define _ST_EPOLL_EVENTS(fd) \ | ||
| 149 | + (_ST_EPOLL_READ_BIT(fd)|_ST_EPOLL_WRITE_BIT(fd)|_ST_EPOLL_EXCEP_BIT(fd)) | ||
| 150 | + | ||
| 151 | +#endif /* MD_HAVE_EPOLL */ | ||
| 152 | + | ||
| 153 | +_st_eventsys_t *_st_eventsys = NULL; | ||
| 154 | + | ||
| 155 | + | ||
| 156 | +/***************************************** | ||
| 157 | + * select event system | ||
| 158 | + */ | ||
| 159 | + | ||
| 160 | +ST_HIDDEN int _st_select_init(void) | ||
| 161 | +{ | ||
| 162 | + _st_select_data = (struct _st_seldata *) malloc(sizeof(*_st_select_data)); | ||
| 163 | + if (!_st_select_data) | ||
| 164 | + return -1; | ||
| 165 | + | ||
| 166 | + memset(_st_select_data, 0, sizeof(*_st_select_data)); | ||
| 167 | + _st_select_data->maxfd = -1; | ||
| 168 | + | ||
| 169 | + return 0; | ||
| 170 | +} | ||
| 171 | + | ||
| 172 | +ST_HIDDEN int _st_select_pollset_add(struct pollfd *pds, int npds) | ||
| 173 | +{ | ||
| 174 | + struct pollfd *pd; | ||
| 175 | + struct pollfd *epd = pds + npds; | ||
| 176 | + | ||
| 177 | + /* Do checks up front */ | ||
| 178 | + for (pd = pds; pd < epd; pd++) { | ||
| 179 | + if (pd->fd < 0 || pd->fd >= FD_SETSIZE || !pd->events || | ||
| 180 | + (pd->events & ~(POLLIN | POLLOUT | POLLPRI))) { | ||
| 181 | + errno = EINVAL; | ||
| 182 | + return -1; | ||
| 183 | + } | ||
| 184 | + } | ||
| 185 | + | ||
| 186 | + for (pd = pds; pd < epd; pd++) { | ||
| 187 | + if (pd->events & POLLIN) { | ||
| 188 | + FD_SET(pd->fd, &_ST_SELECT_READ_SET); | ||
| 189 | + _ST_SELECT_READ_CNT(pd->fd)++; | ||
| 190 | + } | ||
| 191 | + if (pd->events & POLLOUT) { | ||
| 192 | + FD_SET(pd->fd, &_ST_SELECT_WRITE_SET); | ||
| 193 | + _ST_SELECT_WRITE_CNT(pd->fd)++; | ||
| 194 | + } | ||
| 195 | + if (pd->events & POLLPRI) { | ||
| 196 | + FD_SET(pd->fd, &_ST_SELECT_EXCEP_SET); | ||
| 197 | + _ST_SELECT_EXCEP_CNT(pd->fd)++; | ||
| 198 | + } | ||
| 199 | + if (_ST_SELECT_MAX_OSFD < pd->fd) | ||
| 200 | + _ST_SELECT_MAX_OSFD = pd->fd; | ||
| 201 | + } | ||
| 202 | + | ||
| 203 | + return 0; | ||
| 204 | +} | ||
| 205 | + | ||
| 206 | +ST_HIDDEN void _st_select_pollset_del(struct pollfd *pds, int npds) | ||
| 207 | +{ | ||
| 208 | + struct pollfd *pd; | ||
| 209 | + struct pollfd *epd = pds + npds; | ||
| 210 | + | ||
| 211 | + for (pd = pds; pd < epd; pd++) { | ||
| 212 | + if (pd->events & POLLIN) { | ||
| 213 | + if (--_ST_SELECT_READ_CNT(pd->fd) == 0) | ||
| 214 | + FD_CLR(pd->fd, &_ST_SELECT_READ_SET); | ||
| 215 | + } | ||
| 216 | + if (pd->events & POLLOUT) { | ||
| 217 | + if (--_ST_SELECT_WRITE_CNT(pd->fd) == 0) | ||
| 218 | + FD_CLR(pd->fd, &_ST_SELECT_WRITE_SET); | ||
| 219 | + } | ||
| 220 | + if (pd->events & POLLPRI) { | ||
| 221 | + if (--_ST_SELECT_EXCEP_CNT(pd->fd) == 0) | ||
| 222 | + FD_CLR(pd->fd, &_ST_SELECT_EXCEP_SET); | ||
| 223 | + } | ||
| 224 | + } | ||
| 225 | +} | ||
| 226 | + | ||
| 227 | +ST_HIDDEN void _st_select_find_bad_fd(void) | ||
| 228 | +{ | ||
| 229 | + _st_clist_t *q; | ||
| 230 | + _st_pollq_t *pq; | ||
| 231 | + int notify; | ||
| 232 | + struct pollfd *pds, *epds; | ||
| 233 | + int pq_max_osfd, osfd; | ||
| 234 | + short events; | ||
| 235 | + | ||
| 236 | + _ST_SELECT_MAX_OSFD = -1; | ||
| 237 | + | ||
| 238 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 239 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 240 | + notify = 0; | ||
| 241 | + epds = pq->pds + pq->npds; | ||
| 242 | + pq_max_osfd = -1; | ||
| 243 | + | ||
| 244 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 245 | + osfd = pds->fd; | ||
| 246 | + pds->revents = 0; | ||
| 247 | + if (pds->events == 0) | ||
| 248 | + continue; | ||
| 249 | + if (fcntl(osfd, F_GETFL, 0) < 0) { | ||
| 250 | + pds->revents = POLLNVAL; | ||
| 251 | + notify = 1; | ||
| 252 | + } | ||
| 253 | + if (osfd > pq_max_osfd) { | ||
| 254 | + pq_max_osfd = osfd; | ||
| 255 | + } | ||
| 256 | + } | ||
| 257 | + | ||
| 258 | + if (notify) { | ||
| 259 | + ST_REMOVE_LINK(&pq->links); | ||
| 260 | + pq->on_ioq = 0; | ||
| 261 | + /* | ||
| 262 | + * Decrement the count of descriptors for each descriptor/event | ||
| 263 | + * because this I/O request is being removed from the ioq | ||
| 264 | + */ | ||
| 265 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 266 | + osfd = pds->fd; | ||
| 267 | + events = pds->events; | ||
| 268 | + if (events & POLLIN) { | ||
| 269 | + if (--_ST_SELECT_READ_CNT(osfd) == 0) { | ||
| 270 | + FD_CLR(osfd, &_ST_SELECT_READ_SET); | ||
| 271 | + } | ||
| 272 | + } | ||
| 273 | + if (events & POLLOUT) { | ||
| 274 | + if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { | ||
| 275 | + FD_CLR(osfd, &_ST_SELECT_WRITE_SET); | ||
| 276 | + } | ||
| 277 | + } | ||
| 278 | + if (events & POLLPRI) { | ||
| 279 | + if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { | ||
| 280 | + FD_CLR(osfd, &_ST_SELECT_EXCEP_SET); | ||
| 281 | + } | ||
| 282 | + } | ||
| 283 | + } | ||
| 284 | + | ||
| 285 | + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) | ||
| 286 | + _ST_DEL_SLEEPQ(pq->thread); | ||
| 287 | + pq->thread->state = _ST_ST_RUNNABLE; | ||
| 288 | + _ST_ADD_RUNQ(pq->thread); | ||
| 289 | + } else { | ||
| 290 | + if (_ST_SELECT_MAX_OSFD < pq_max_osfd) | ||
| 291 | + _ST_SELECT_MAX_OSFD = pq_max_osfd; | ||
| 292 | + } | ||
| 293 | + } | ||
| 294 | +} | ||
| 295 | + | ||
| 296 | +ST_HIDDEN void _st_select_dispatch(void) | ||
| 297 | +{ | ||
| 298 | + struct timeval timeout, *tvp; | ||
| 299 | + fd_set r, w, e; | ||
| 300 | + fd_set *rp, *wp, *ep; | ||
| 301 | + int nfd, pq_max_osfd, osfd; | ||
| 302 | + _st_clist_t *q; | ||
| 303 | + st_utime_t min_timeout; | ||
| 304 | + _st_pollq_t *pq; | ||
| 305 | + int notify; | ||
| 306 | + struct pollfd *pds, *epds; | ||
| 307 | + short events, revents; | ||
| 308 | + | ||
| 309 | + /* | ||
| 310 | + * Assignment of fd_sets | ||
| 311 | + */ | ||
| 312 | + r = _ST_SELECT_READ_SET; | ||
| 313 | + w = _ST_SELECT_WRITE_SET; | ||
| 314 | + e = _ST_SELECT_EXCEP_SET; | ||
| 315 | + | ||
| 316 | + rp = &r; | ||
| 317 | + wp = &w; | ||
| 318 | + ep = &e; | ||
| 319 | + | ||
| 320 | + if (_ST_SLEEPQ == NULL) { | ||
| 321 | + tvp = NULL; | ||
| 322 | + } else { | ||
| 323 | + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : | ||
| 324 | + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); | ||
| 325 | + timeout.tv_sec = (int) (min_timeout / 1000000); | ||
| 326 | + timeout.tv_usec = (int) (min_timeout % 1000000); | ||
| 327 | + tvp = &timeout; | ||
| 328 | + } | ||
| 329 | + | ||
| 330 | + /* Check for I/O operations */ | ||
| 331 | + nfd = select(_ST_SELECT_MAX_OSFD + 1, rp, wp, ep, tvp); | ||
| 332 | + | ||
| 333 | + /* Notify threads that are associated with the selected descriptors */ | ||
| 334 | + if (nfd > 0) { | ||
| 335 | + _ST_SELECT_MAX_OSFD = -1; | ||
| 336 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 337 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 338 | + notify = 0; | ||
| 339 | + epds = pq->pds + pq->npds; | ||
| 340 | + pq_max_osfd = -1; | ||
| 341 | + | ||
| 342 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 343 | + osfd = pds->fd; | ||
| 344 | + events = pds->events; | ||
| 345 | + revents = 0; | ||
| 346 | + if ((events & POLLIN) && FD_ISSET(osfd, rp)) { | ||
| 347 | + revents |= POLLIN; | ||
| 348 | + } | ||
| 349 | + if ((events & POLLOUT) && FD_ISSET(osfd, wp)) { | ||
| 350 | + revents |= POLLOUT; | ||
| 351 | + } | ||
| 352 | + if ((events & POLLPRI) && FD_ISSET(osfd, ep)) { | ||
| 353 | + revents |= POLLPRI; | ||
| 354 | + } | ||
| 355 | + pds->revents = revents; | ||
| 356 | + if (revents) { | ||
| 357 | + notify = 1; | ||
| 358 | + } | ||
| 359 | + if (osfd > pq_max_osfd) { | ||
| 360 | + pq_max_osfd = osfd; | ||
| 361 | + } | ||
| 362 | + } | ||
| 363 | + if (notify) { | ||
| 364 | + ST_REMOVE_LINK(&pq->links); | ||
| 365 | + pq->on_ioq = 0; | ||
| 366 | + /* | ||
| 367 | + * Decrement the count of descriptors for each descriptor/event | ||
| 368 | + * because this I/O request is being removed from the ioq | ||
| 369 | + */ | ||
| 370 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 371 | + osfd = pds->fd; | ||
| 372 | + events = pds->events; | ||
| 373 | + if (events & POLLIN) { | ||
| 374 | + if (--_ST_SELECT_READ_CNT(osfd) == 0) { | ||
| 375 | + FD_CLR(osfd, &_ST_SELECT_READ_SET); | ||
| 376 | + } | ||
| 377 | + } | ||
| 378 | + if (events & POLLOUT) { | ||
| 379 | + if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { | ||
| 380 | + FD_CLR(osfd, &_ST_SELECT_WRITE_SET); | ||
| 381 | + } | ||
| 382 | + } | ||
| 383 | + if (events & POLLPRI) { | ||
| 384 | + if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { | ||
| 385 | + FD_CLR(osfd, &_ST_SELECT_EXCEP_SET); | ||
| 386 | + } | ||
| 387 | + } | ||
| 388 | + } | ||
| 389 | + | ||
| 390 | + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) | ||
| 391 | + _ST_DEL_SLEEPQ(pq->thread); | ||
| 392 | + pq->thread->state = _ST_ST_RUNNABLE; | ||
| 393 | + _ST_ADD_RUNQ(pq->thread); | ||
| 394 | + } else { | ||
| 395 | + if (_ST_SELECT_MAX_OSFD < pq_max_osfd) | ||
| 396 | + _ST_SELECT_MAX_OSFD = pq_max_osfd; | ||
| 397 | + } | ||
| 398 | + } | ||
| 399 | + } else if (nfd < 0) { | ||
| 400 | + /* | ||
| 401 | + * It can happen when a thread closes file descriptor | ||
| 402 | + * that is being used by some other thread -- BAD! | ||
| 403 | + */ | ||
| 404 | + if (errno == EBADF) | ||
| 405 | + _st_select_find_bad_fd(); | ||
| 406 | + } | ||
| 407 | +} | ||
| 408 | + | ||
| 409 | +ST_HIDDEN int _st_select_fd_new(int osfd) | ||
| 410 | +{ | ||
| 411 | + if (osfd >= FD_SETSIZE) { | ||
| 412 | + errno = EMFILE; | ||
| 413 | + return -1; | ||
| 414 | + } | ||
| 415 | + | ||
| 416 | + return 0; | ||
| 417 | +} | ||
| 418 | + | ||
| 419 | +ST_HIDDEN int _st_select_fd_close(int osfd) | ||
| 420 | +{ | ||
| 421 | + if (_ST_SELECT_READ_CNT(osfd) || _ST_SELECT_WRITE_CNT(osfd) || | ||
| 422 | + _ST_SELECT_EXCEP_CNT(osfd)) { | ||
| 423 | + errno = EBUSY; | ||
| 424 | + return -1; | ||
| 425 | + } | ||
| 426 | + | ||
| 427 | + return 0; | ||
| 428 | +} | ||
| 429 | + | ||
| 430 | +ST_HIDDEN int _st_select_fd_getlimit(void) | ||
| 431 | +{ | ||
| 432 | + return FD_SETSIZE; | ||
| 433 | +} | ||
| 434 | + | ||
| 435 | +static _st_eventsys_t _st_select_eventsys = { | ||
| 436 | + "select", | ||
| 437 | + ST_EVENTSYS_SELECT, | ||
| 438 | + _st_select_init, | ||
| 439 | + _st_select_dispatch, | ||
| 440 | + _st_select_pollset_add, | ||
| 441 | + _st_select_pollset_del, | ||
| 442 | + _st_select_fd_new, | ||
| 443 | + _st_select_fd_close, | ||
| 444 | + _st_select_fd_getlimit | ||
| 445 | +}; | ||
| 446 | + | ||
| 447 | + | ||
| 448 | +#ifdef MD_HAVE_POLL | ||
| 449 | +/***************************************** | ||
| 450 | + * poll event system | ||
| 451 | + */ | ||
| 452 | + | ||
| 453 | +ST_HIDDEN int _st_poll_init(void) | ||
| 454 | +{ | ||
| 455 | + _st_poll_data = (struct _st_polldata *) malloc(sizeof(*_st_poll_data)); | ||
| 456 | + if (!_st_poll_data) | ||
| 457 | + return -1; | ||
| 458 | + | ||
| 459 | + _ST_POLLFDS = (struct pollfd *) malloc(ST_MIN_POLLFDS_SIZE * | ||
| 460 | + sizeof(struct pollfd)); | ||
| 461 | + if (!_ST_POLLFDS) { | ||
| 462 | + free(_st_poll_data); | ||
| 463 | + _st_poll_data = NULL; | ||
| 464 | + return -1; | ||
| 465 | + } | ||
| 466 | + _ST_POLLFDS_SIZE = ST_MIN_POLLFDS_SIZE; | ||
| 467 | + _ST_POLL_OSFD_CNT = 0; | ||
| 468 | + | ||
| 469 | + return 0; | ||
| 470 | +} | ||
| 471 | + | ||
| 472 | +ST_HIDDEN int _st_poll_pollset_add(struct pollfd *pds, int npds) | ||
| 473 | +{ | ||
| 474 | + struct pollfd *pd; | ||
| 475 | + struct pollfd *epd = pds + npds; | ||
| 476 | + | ||
| 477 | + for (pd = pds; pd < epd; pd++) { | ||
| 478 | + if (pd->fd < 0 || !pd->events) { | ||
| 479 | + errno = EINVAL; | ||
| 480 | + return -1; | ||
| 481 | + } | ||
| 482 | + } | ||
| 483 | + | ||
| 484 | + _ST_POLL_OSFD_CNT += npds; | ||
| 485 | + | ||
| 486 | + return 0; | ||
| 487 | +} | ||
| 488 | + | ||
| 489 | +/* ARGSUSED */ | ||
| 490 | +ST_HIDDEN void _st_poll_pollset_del(struct pollfd *pds, int npds) | ||
| 491 | +{ | ||
| 492 | + _ST_POLL_OSFD_CNT -= npds; | ||
| 493 | + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); | ||
| 494 | +} | ||
| 495 | + | ||
| 496 | +ST_HIDDEN void _st_poll_dispatch(void) | ||
| 497 | +{ | ||
| 498 | + int timeout, nfd; | ||
| 499 | + _st_clist_t *q; | ||
| 500 | + st_utime_t min_timeout; | ||
| 501 | + _st_pollq_t *pq; | ||
| 502 | + struct pollfd *pds, *epds, *pollfds; | ||
| 503 | + | ||
| 504 | + /* | ||
| 505 | + * Build up the array of struct pollfd to wait on. | ||
| 506 | + * If existing array is not big enough, release it and allocate a new one. | ||
| 507 | + */ | ||
| 508 | + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); | ||
| 509 | + if (_ST_POLL_OSFD_CNT > _ST_POLLFDS_SIZE) { | ||
| 510 | + free(_ST_POLLFDS); | ||
| 511 | + _ST_POLLFDS = (struct pollfd *) malloc((_ST_POLL_OSFD_CNT + 10) * | ||
| 512 | + sizeof(struct pollfd)); | ||
| 513 | + ST_ASSERT(_ST_POLLFDS != NULL); | ||
| 514 | + _ST_POLLFDS_SIZE = _ST_POLL_OSFD_CNT + 10; | ||
| 515 | + } | ||
| 516 | + pollfds = _ST_POLLFDS; | ||
| 517 | + | ||
| 518 | + /* Gather all descriptors into one array */ | ||
| 519 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 520 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 521 | + memcpy(pollfds, pq->pds, sizeof(struct pollfd) * pq->npds); | ||
| 522 | + pollfds += pq->npds; | ||
| 523 | + } | ||
| 524 | + ST_ASSERT(pollfds <= _ST_POLLFDS + _ST_POLLFDS_SIZE); | ||
| 525 | + | ||
| 526 | + if (_ST_SLEEPQ == NULL) { | ||
| 527 | + timeout = -1; | ||
| 528 | + } else { | ||
| 529 | + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : | ||
| 530 | + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); | ||
| 531 | + timeout = (int) (min_timeout / 1000); | ||
| 532 | + } | ||
| 533 | + | ||
| 534 | + /* Check for I/O operations */ | ||
| 535 | + nfd = poll(_ST_POLLFDS, _ST_POLL_OSFD_CNT, timeout); | ||
| 536 | + | ||
| 537 | + /* Notify threads that are associated with the selected descriptors */ | ||
| 538 | + if (nfd > 0) { | ||
| 539 | + pollfds = _ST_POLLFDS; | ||
| 540 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 541 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 542 | + epds = pollfds + pq->npds; | ||
| 543 | + for (pds = pollfds; pds < epds; pds++) { | ||
| 544 | + if (pds->revents) | ||
| 545 | + break; | ||
| 546 | + } | ||
| 547 | + if (pds < epds) { | ||
| 548 | + memcpy(pq->pds, pollfds, sizeof(struct pollfd) * pq->npds); | ||
| 549 | + ST_REMOVE_LINK(&pq->links); | ||
| 550 | + pq->on_ioq = 0; | ||
| 551 | + | ||
| 552 | + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) | ||
| 553 | + _ST_DEL_SLEEPQ(pq->thread); | ||
| 554 | + pq->thread->state = _ST_ST_RUNNABLE; | ||
| 555 | + _ST_ADD_RUNQ(pq->thread); | ||
| 556 | + | ||
| 557 | + _ST_POLL_OSFD_CNT -= pq->npds; | ||
| 558 | + ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); | ||
| 559 | + } | ||
| 560 | + pollfds = epds; | ||
| 561 | + } | ||
| 562 | + } | ||
| 563 | +} | ||
| 564 | + | ||
| 565 | +/* ARGSUSED */ | ||
| 566 | +ST_HIDDEN int _st_poll_fd_new(int osfd) | ||
| 567 | +{ | ||
| 568 | + return 0; | ||
| 569 | +} | ||
| 570 | + | ||
| 571 | +/* ARGSUSED */ | ||
| 572 | +ST_HIDDEN int _st_poll_fd_close(int osfd) | ||
| 573 | +{ | ||
| 574 | + /* | ||
| 575 | + * We don't maintain I/O counts for poll event system | ||
| 576 | + * so nothing to check here. | ||
| 577 | + */ | ||
| 578 | + return 0; | ||
| 579 | +} | ||
| 580 | + | ||
| 581 | +ST_HIDDEN int _st_poll_fd_getlimit(void) | ||
| 582 | +{ | ||
| 583 | + /* zero means no specific limit */ | ||
| 584 | + return 0; | ||
| 585 | +} | ||
| 586 | + | ||
| 587 | +static _st_eventsys_t _st_poll_eventsys = { | ||
| 588 | + "poll", | ||
| 589 | + ST_EVENTSYS_POLL, | ||
| 590 | + _st_poll_init, | ||
| 591 | + _st_poll_dispatch, | ||
| 592 | + _st_poll_pollset_add, | ||
| 593 | + _st_poll_pollset_del, | ||
| 594 | + _st_poll_fd_new, | ||
| 595 | + _st_poll_fd_close, | ||
| 596 | + _st_poll_fd_getlimit | ||
| 597 | +}; | ||
| 598 | +#endif /* MD_HAVE_POLL */ | ||
| 599 | + | ||
| 600 | + | ||
| 601 | +#ifdef MD_HAVE_KQUEUE | ||
| 602 | +/***************************************** | ||
| 603 | + * kqueue event system | ||
| 604 | + */ | ||
| 605 | + | ||
| 606 | +ST_HIDDEN int _st_kq_init(void) | ||
| 607 | +{ | ||
| 608 | + int err = 0; | ||
| 609 | + int rv = 0; | ||
| 610 | + | ||
| 611 | + _st_kq_data = (struct _st_kqdata *) calloc(1, sizeof(*_st_kq_data)); | ||
| 612 | + if (!_st_kq_data) | ||
| 613 | + return -1; | ||
| 614 | + | ||
| 615 | + if ((_st_kq_data->kq = kqueue()) < 0) { | ||
| 616 | + err = errno; | ||
| 617 | + rv = -1; | ||
| 618 | + goto cleanup_kq; | ||
| 619 | + } | ||
| 620 | + fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC); | ||
| 621 | + _st_kq_data->pid = getpid(); | ||
| 622 | + | ||
| 623 | + /* | ||
| 624 | + * Allocate file descriptor data array. | ||
| 625 | + * FD_SETSIZE looks like good initial size. | ||
| 626 | + */ | ||
| 627 | + _st_kq_data->fd_data_size = FD_SETSIZE; | ||
| 628 | + _st_kq_data->fd_data = (_kq_fd_data_t *)calloc(_st_kq_data->fd_data_size, | ||
| 629 | + sizeof(_kq_fd_data_t)); | ||
| 630 | + if (!_st_kq_data->fd_data) { | ||
| 631 | + err = errno; | ||
| 632 | + rv = -1; | ||
| 633 | + goto cleanup_kq; | ||
| 634 | + } | ||
| 635 | + | ||
| 636 | + /* Allocate event lists */ | ||
| 637 | + _st_kq_data->evtlist_size = ST_KQ_MIN_EVTLIST_SIZE; | ||
| 638 | + _st_kq_data->evtlist = (struct kevent *)malloc(_st_kq_data->evtlist_size * | ||
| 639 | + sizeof(struct kevent)); | ||
| 640 | + _st_kq_data->addlist_size = ST_KQ_MIN_EVTLIST_SIZE; | ||
| 641 | + _st_kq_data->addlist = (struct kevent *)malloc(_st_kq_data->addlist_size * | ||
| 642 | + sizeof(struct kevent)); | ||
| 643 | + _st_kq_data->dellist_size = ST_KQ_MIN_EVTLIST_SIZE; | ||
| 644 | + _st_kq_data->dellist = (struct kevent *)malloc(_st_kq_data->dellist_size * | ||
| 645 | + sizeof(struct kevent)); | ||
| 646 | + if (!_st_kq_data->evtlist || !_st_kq_data->addlist || | ||
| 647 | + !_st_kq_data->dellist) { | ||
| 648 | + err = ENOMEM; | ||
| 649 | + rv = -1; | ||
| 650 | + } | ||
| 651 | + | ||
| 652 | + cleanup_kq: | ||
| 653 | + if (rv < 0) { | ||
| 654 | + if (_st_kq_data->kq >= 0) | ||
| 655 | + close(_st_kq_data->kq); | ||
| 656 | + free(_st_kq_data->fd_data); | ||
| 657 | + free(_st_kq_data->evtlist); | ||
| 658 | + free(_st_kq_data->addlist); | ||
| 659 | + free(_st_kq_data->dellist); | ||
| 660 | + free(_st_kq_data); | ||
| 661 | + _st_kq_data = NULL; | ||
| 662 | + errno = err; | ||
| 663 | + } | ||
| 664 | + | ||
| 665 | + return rv; | ||
| 666 | +} | ||
| 667 | + | ||
| 668 | +ST_HIDDEN int _st_kq_fd_data_expand(int maxfd) | ||
| 669 | +{ | ||
| 670 | + _kq_fd_data_t *ptr; | ||
| 671 | + int n = _st_kq_data->fd_data_size; | ||
| 672 | + | ||
| 673 | + while (maxfd >= n) | ||
| 674 | + n <<= 1; | ||
| 675 | + | ||
| 676 | + ptr = (_kq_fd_data_t *)realloc(_st_kq_data->fd_data, | ||
| 677 | + n * sizeof(_kq_fd_data_t)); | ||
| 678 | + if (!ptr) | ||
| 679 | + return -1; | ||
| 680 | + | ||
| 681 | + memset(ptr + _st_kq_data->fd_data_size, 0, | ||
| 682 | + (n - _st_kq_data->fd_data_size) * sizeof(_kq_fd_data_t)); | ||
| 683 | + | ||
| 684 | + _st_kq_data->fd_data = ptr; | ||
| 685 | + _st_kq_data->fd_data_size = n; | ||
| 686 | + | ||
| 687 | + return 0; | ||
| 688 | +} | ||
| 689 | + | ||
| 690 | +ST_HIDDEN int _st_kq_addlist_expand(int avail) | ||
| 691 | +{ | ||
| 692 | + struct kevent *ptr; | ||
| 693 | + int n = _st_kq_data->addlist_size; | ||
| 694 | + | ||
| 695 | + while (avail > n - _st_kq_data->addlist_cnt) | ||
| 696 | + n <<= 1; | ||
| 697 | + | ||
| 698 | + ptr = (struct kevent *)realloc(_st_kq_data->addlist, | ||
| 699 | + n * sizeof(struct kevent)); | ||
| 700 | + if (!ptr) | ||
| 701 | + return -1; | ||
| 702 | + | ||
| 703 | + _st_kq_data->addlist = ptr; | ||
| 704 | + _st_kq_data->addlist_size = n; | ||
| 705 | + | ||
| 706 | + /* | ||
| 707 | + * Try to expand the result event list too | ||
| 708 | + * (although we don't have to do it). | ||
| 709 | + */ | ||
| 710 | + ptr = (struct kevent *)realloc(_st_kq_data->evtlist, | ||
| 711 | + n * sizeof(struct kevent)); | ||
| 712 | + if (ptr) { | ||
| 713 | + _st_kq_data->evtlist = ptr; | ||
| 714 | + _st_kq_data->evtlist_size = n; | ||
| 715 | + } | ||
| 716 | + | ||
| 717 | + return 0; | ||
| 718 | +} | ||
| 719 | + | ||
| 720 | +ST_HIDDEN void _st_kq_addlist_add(const struct kevent *kev) | ||
| 721 | +{ | ||
| 722 | + ST_ASSERT(_st_kq_data->addlist_cnt < _st_kq_data->addlist_size); | ||
| 723 | + memcpy(_st_kq_data->addlist + _st_kq_data->addlist_cnt, kev, | ||
| 724 | + sizeof(struct kevent)); | ||
| 725 | + _st_kq_data->addlist_cnt++; | ||
| 726 | +} | ||
| 727 | + | ||
| 728 | +ST_HIDDEN void _st_kq_dellist_add(const struct kevent *kev) | ||
| 729 | +{ | ||
| 730 | + int n = _st_kq_data->dellist_size; | ||
| 731 | + | ||
| 732 | + if (_st_kq_data->dellist_cnt >= n) { | ||
| 733 | + struct kevent *ptr; | ||
| 734 | + | ||
| 735 | + n <<= 1; | ||
| 736 | + ptr = (struct kevent *)realloc(_st_kq_data->dellist, | ||
| 737 | + n * sizeof(struct kevent)); | ||
| 738 | + if (!ptr) { | ||
| 739 | + /* See comment in _st_kq_pollset_del() */ | ||
| 740 | + return; | ||
| 741 | + } | ||
| 742 | + | ||
| 743 | + _st_kq_data->dellist = ptr; | ||
| 744 | + _st_kq_data->dellist_size = n; | ||
| 745 | + } | ||
| 746 | + | ||
| 747 | + memcpy(_st_kq_data->dellist + _st_kq_data->dellist_cnt, kev, | ||
| 748 | + sizeof(struct kevent)); | ||
| 749 | + _st_kq_data->dellist_cnt++; | ||
| 750 | +} | ||
| 751 | + | ||
| 752 | +ST_HIDDEN int _st_kq_pollset_add(struct pollfd *pds, int npds) | ||
| 753 | +{ | ||
| 754 | + struct kevent kev; | ||
| 755 | + struct pollfd *pd; | ||
| 756 | + struct pollfd *epd = pds + npds; | ||
| 757 | + | ||
| 758 | + /* | ||
| 759 | + * Pollset adding is "atomic". That is, either it succeeded for | ||
| 760 | + * all descriptors in the set or it failed. It means that we | ||
| 761 | + * need to do all the checks up front so we don't have to | ||
| 762 | + * "unwind" if adding of one of the descriptors failed. | ||
| 763 | + */ | ||
| 764 | + for (pd = pds; pd < epd; pd++) { | ||
| 765 | + /* POLLIN and/or POLLOUT must be set, but nothing else */ | ||
| 766 | + if (pd->fd < 0 || !pd->events || (pd->events & ~(POLLIN | POLLOUT))) { | ||
| 767 | + errno = EINVAL; | ||
| 768 | + return -1; | ||
| 769 | + } | ||
| 770 | + if (pd->fd >= _st_kq_data->fd_data_size && | ||
| 771 | + _st_kq_fd_data_expand(pd->fd) < 0) | ||
| 772 | + return -1; | ||
| 773 | + } | ||
| 774 | + | ||
| 775 | + /* | ||
| 776 | + * Make sure we have enough room in the addlist for twice as many | ||
| 777 | + * descriptors as in the pollset (for both READ and WRITE filters). | ||
| 778 | + */ | ||
| 779 | + npds <<= 1; | ||
| 780 | + if (npds > _st_kq_data->addlist_size - _st_kq_data->addlist_cnt && | ||
| 781 | + _st_kq_addlist_expand(npds) < 0) | ||
| 782 | + return -1; | ||
| 783 | + | ||
| 784 | + for (pd = pds; pd < epd; pd++) { | ||
| 785 | + if ((pd->events & POLLIN) && (_ST_KQ_READ_CNT(pd->fd)++ == 0)) { | ||
| 786 | + memset(&kev, 0, sizeof(kev)); | ||
| 787 | + kev.ident = pd->fd; | ||
| 788 | + kev.filter = EVFILT_READ; | ||
| 789 | +#ifdef NOTE_EOF | ||
| 790 | + /* Make it behave like select() and poll() */ | ||
| 791 | + kev.fflags = NOTE_EOF; | ||
| 792 | +#endif | ||
| 793 | + kev.flags = (EV_ADD | EV_ONESHOT); | ||
| 794 | + _st_kq_addlist_add(&kev); | ||
| 795 | + } | ||
| 796 | + if ((pd->events & POLLOUT) && (_ST_KQ_WRITE_CNT(pd->fd)++ == 0)) { | ||
| 797 | + memset(&kev, 0, sizeof(kev)); | ||
| 798 | + kev.ident = pd->fd; | ||
| 799 | + kev.filter = EVFILT_WRITE; | ||
| 800 | + kev.flags = (EV_ADD | EV_ONESHOT); | ||
| 801 | + _st_kq_addlist_add(&kev); | ||
| 802 | + } | ||
| 803 | + } | ||
| 804 | + | ||
| 805 | + return 0; | ||
| 806 | +} | ||
| 807 | + | ||
| 808 | +ST_HIDDEN void _st_kq_pollset_del(struct pollfd *pds, int npds) | ||
| 809 | +{ | ||
| 810 | + struct kevent kev; | ||
| 811 | + struct pollfd *pd; | ||
| 812 | + struct pollfd *epd = pds + npds; | ||
| 813 | + | ||
| 814 | + /* | ||
| 815 | + * It's OK if deleting fails because a descriptor will either be | ||
| 816 | + * closed or fire only once (we set EV_ONESHOT flag). | ||
| 817 | + */ | ||
| 818 | + _st_kq_data->dellist_cnt = 0; | ||
| 819 | + for (pd = pds; pd < epd; pd++) { | ||
| 820 | + if ((pd->events & POLLIN) && (--_ST_KQ_READ_CNT(pd->fd) == 0)) { | ||
| 821 | + memset(&kev, 0, sizeof(kev)); | ||
| 822 | + kev.ident = pd->fd; | ||
| 823 | + kev.filter = EVFILT_READ; | ||
| 824 | + kev.flags = EV_DELETE; | ||
| 825 | + _st_kq_dellist_add(&kev); | ||
| 826 | + } | ||
| 827 | + if ((pd->events & POLLOUT) && (--_ST_KQ_WRITE_CNT(pd->fd) == 0)) { | ||
| 828 | + memset(&kev, 0, sizeof(kev)); | ||
| 829 | + kev.ident = pd->fd; | ||
| 830 | + kev.filter = EVFILT_WRITE; | ||
| 831 | + kev.flags = EV_DELETE; | ||
| 832 | + _st_kq_dellist_add(&kev); | ||
| 833 | + } | ||
| 834 | + } | ||
| 835 | + | ||
| 836 | + if (_st_kq_data->dellist_cnt > 0) { | ||
| 837 | + /* | ||
| 838 | + * We do "synchronous" kqueue deletes to avoid deleting | ||
| 839 | + * closed descriptors and other possible problems. | ||
| 840 | + */ | ||
| 841 | + int rv; | ||
| 842 | + do { | ||
| 843 | + /* This kevent() won't block since result list size is 0 */ | ||
| 844 | + rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, | ||
| 845 | + _st_kq_data->dellist_cnt, NULL, 0, NULL); | ||
| 846 | + } while (rv < 0 && errno == EINTR); | ||
| 847 | + } | ||
| 848 | +} | ||
| 849 | + | ||
| 850 | +ST_HIDDEN void _st_kq_dispatch(void) | ||
| 851 | +{ | ||
| 852 | + struct timespec timeout, *tsp; | ||
| 853 | + struct kevent kev; | ||
| 854 | + st_utime_t min_timeout; | ||
| 855 | + _st_clist_t *q; | ||
| 856 | + _st_pollq_t *pq; | ||
| 857 | + struct pollfd *pds, *epds; | ||
| 858 | + int nfd, i, osfd, notify, filter; | ||
| 859 | + short events, revents; | ||
| 860 | + | ||
| 861 | + if (_ST_SLEEPQ == NULL) { | ||
| 862 | + tsp = NULL; | ||
| 863 | + } else { | ||
| 864 | + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : | ||
| 865 | + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); | ||
| 866 | + timeout.tv_sec = (time_t) (min_timeout / 1000000); | ||
| 867 | + timeout.tv_nsec = (long) ((min_timeout % 1000000) * 1000); | ||
| 868 | + tsp = &timeout; | ||
| 869 | + } | ||
| 870 | + | ||
| 871 | + retry_kevent: | ||
| 872 | + /* Check for I/O operations */ | ||
| 873 | + nfd = kevent(_st_kq_data->kq, | ||
| 874 | + _st_kq_data->addlist, _st_kq_data->addlist_cnt, | ||
| 875 | + _st_kq_data->evtlist, _st_kq_data->evtlist_size, tsp); | ||
| 876 | + | ||
| 877 | + _st_kq_data->addlist_cnt = 0; | ||
| 878 | + | ||
| 879 | + if (nfd > 0) { | ||
| 880 | + for (i = 0; i < nfd; i++) { | ||
| 881 | + osfd = _st_kq_data->evtlist[i].ident; | ||
| 882 | + filter = _st_kq_data->evtlist[i].filter; | ||
| 883 | + | ||
| 884 | + if (filter == EVFILT_READ) { | ||
| 885 | + _ST_KQ_REVENTS(osfd) |= POLLIN; | ||
| 886 | + } else if (filter == EVFILT_WRITE) { | ||
| 887 | + _ST_KQ_REVENTS(osfd) |= POLLOUT; | ||
| 888 | + } | ||
| 889 | + if (_st_kq_data->evtlist[i].flags & EV_ERROR) { | ||
| 890 | + if (_st_kq_data->evtlist[i].data == EBADF) { | ||
| 891 | + _ST_KQ_REVENTS(osfd) |= POLLNVAL; | ||
| 892 | + } else { | ||
| 893 | + _ST_KQ_REVENTS(osfd) |= POLLERR; | ||
| 894 | + } | ||
| 895 | + } | ||
| 896 | + } | ||
| 897 | + | ||
| 898 | + _st_kq_data->dellist_cnt = 0; | ||
| 899 | + | ||
| 900 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 901 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 902 | + notify = 0; | ||
| 903 | + epds = pq->pds + pq->npds; | ||
| 904 | + | ||
| 905 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 906 | + osfd = pds->fd; | ||
| 907 | + events = pds->events; | ||
| 908 | + revents = (short)(_ST_KQ_REVENTS(osfd) & ~(POLLIN | POLLOUT)); | ||
| 909 | + if ((events & POLLIN) && (_ST_KQ_REVENTS(osfd) & POLLIN)) { | ||
| 910 | + revents |= POLLIN; | ||
| 911 | + } | ||
| 912 | + if ((events & POLLOUT) && (_ST_KQ_REVENTS(osfd) & POLLOUT)) { | ||
| 913 | + revents |= POLLOUT; | ||
| 914 | + } | ||
| 915 | + pds->revents = revents; | ||
| 916 | + if (revents) { | ||
| 917 | + notify = 1; | ||
| 918 | + } | ||
| 919 | + } | ||
| 920 | + if (notify) { | ||
| 921 | + ST_REMOVE_LINK(&pq->links); | ||
| 922 | + pq->on_ioq = 0; | ||
| 923 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 924 | + osfd = pds->fd; | ||
| 925 | + events = pds->events; | ||
| 926 | + /* | ||
| 927 | + * We set EV_ONESHOT flag so we only need to delete | ||
| 928 | + * descriptor if it didn't fire. | ||
| 929 | + */ | ||
| 930 | + if ((events & POLLIN) && (--_ST_KQ_READ_CNT(osfd) == 0) && | ||
| 931 | + ((_ST_KQ_REVENTS(osfd) & POLLIN) == 0)) { | ||
| 932 | + memset(&kev, 0, sizeof(kev)); | ||
| 933 | + kev.ident = osfd; | ||
| 934 | + kev.filter = EVFILT_READ; | ||
| 935 | + kev.flags = EV_DELETE; | ||
| 936 | + _st_kq_dellist_add(&kev); | ||
| 937 | + } | ||
| 938 | + if ((events & POLLOUT) && (--_ST_KQ_WRITE_CNT(osfd) == 0) | ||
| 939 | + && ((_ST_KQ_REVENTS(osfd) & POLLOUT) == 0)) { | ||
| 940 | + memset(&kev, 0, sizeof(kev)); | ||
| 941 | + kev.ident = osfd; | ||
| 942 | + kev.filter = EVFILT_WRITE; | ||
| 943 | + kev.flags = EV_DELETE; | ||
| 944 | + _st_kq_dellist_add(&kev); | ||
| 945 | + } | ||
| 946 | + } | ||
| 947 | + | ||
| 948 | + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) | ||
| 949 | + _ST_DEL_SLEEPQ(pq->thread); | ||
| 950 | + pq->thread->state = _ST_ST_RUNNABLE; | ||
| 951 | + _ST_ADD_RUNQ(pq->thread); | ||
| 952 | + } | ||
| 953 | + } | ||
| 954 | + | ||
| 955 | + if (_st_kq_data->dellist_cnt > 0) { | ||
| 956 | + int rv; | ||
| 957 | + do { | ||
| 958 | + /* This kevent() won't block since result list size is 0 */ | ||
| 959 | + rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, | ||
| 960 | + _st_kq_data->dellist_cnt, NULL, 0, NULL); | ||
| 961 | + } while (rv < 0 && errno == EINTR); | ||
| 962 | + } | ||
| 963 | + | ||
| 964 | + for (i = 0; i < nfd; i++) { | ||
| 965 | + osfd = _st_kq_data->evtlist[i].ident; | ||
| 966 | + _ST_KQ_REVENTS(osfd) = 0; | ||
| 967 | + } | ||
| 968 | + | ||
| 969 | + } else if (nfd < 0) { | ||
| 970 | + if (errno == EBADF && _st_kq_data->pid != getpid()) { | ||
| 971 | + /* We probably forked, reinitialize kqueue */ | ||
| 972 | + if ((_st_kq_data->kq = kqueue()) < 0) { | ||
| 973 | + /* There is nothing we can do here, will retry later */ | ||
| 974 | + return; | ||
| 975 | + } | ||
| 976 | + fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC); | ||
| 977 | + _st_kq_data->pid = getpid(); | ||
| 978 | + /* Re-register all descriptors on ioq with new kqueue */ | ||
| 979 | + memset(_st_kq_data->fd_data, 0, | ||
| 980 | + _st_kq_data->fd_data_size * sizeof(_kq_fd_data_t)); | ||
| 981 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 982 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 983 | + _st_kq_pollset_add(pq->pds, pq->npds); | ||
| 984 | + } | ||
| 985 | + goto retry_kevent; | ||
| 986 | + } | ||
| 987 | + } | ||
| 988 | +} | ||
| 989 | + | ||
| 990 | +ST_HIDDEN int _st_kq_fd_new(int osfd) | ||
| 991 | +{ | ||
| 992 | + if (osfd >= _st_kq_data->fd_data_size && _st_kq_fd_data_expand(osfd) < 0) | ||
| 993 | + return -1; | ||
| 994 | + | ||
| 995 | + return 0; | ||
| 996 | +} | ||
| 997 | + | ||
| 998 | +ST_HIDDEN int _st_kq_fd_close(int osfd) | ||
| 999 | +{ | ||
| 1000 | + if (_ST_KQ_READ_CNT(osfd) || _ST_KQ_WRITE_CNT(osfd)) { | ||
| 1001 | + errno = EBUSY; | ||
| 1002 | + return -1; | ||
| 1003 | + } | ||
| 1004 | + | ||
| 1005 | + return 0; | ||
| 1006 | +} | ||
| 1007 | + | ||
| 1008 | +ST_HIDDEN int _st_kq_fd_getlimit(void) | ||
| 1009 | +{ | ||
| 1010 | + /* zero means no specific limit */ | ||
| 1011 | + return 0; | ||
| 1012 | +} | ||
| 1013 | + | ||
| 1014 | +static _st_eventsys_t _st_kq_eventsys = { | ||
| 1015 | + "kqueue", | ||
| 1016 | + ST_EVENTSYS_ALT, | ||
| 1017 | + _st_kq_init, | ||
| 1018 | + _st_kq_dispatch, | ||
| 1019 | + _st_kq_pollset_add, | ||
| 1020 | + _st_kq_pollset_del, | ||
| 1021 | + _st_kq_fd_new, | ||
| 1022 | + _st_kq_fd_close, | ||
| 1023 | + _st_kq_fd_getlimit | ||
| 1024 | +}; | ||
| 1025 | +#endif /* MD_HAVE_KQUEUE */ | ||
| 1026 | + | ||
| 1027 | + | ||
| 1028 | +#ifdef MD_HAVE_EPOLL | ||
| 1029 | +/***************************************** | ||
| 1030 | + * epoll event system | ||
| 1031 | + */ | ||
| 1032 | + | ||
| 1033 | +ST_HIDDEN int _st_epoll_init(void) | ||
| 1034 | +{ | ||
| 1035 | + int fdlim; | ||
| 1036 | + int err = 0; | ||
| 1037 | + int rv = 0; | ||
| 1038 | + | ||
| 1039 | + _st_epoll_data = | ||
| 1040 | + (struct _st_epolldata *) calloc(1, sizeof(*_st_epoll_data)); | ||
| 1041 | + if (!_st_epoll_data) | ||
| 1042 | + return -1; | ||
| 1043 | + | ||
| 1044 | + fdlim = st_getfdlimit(); | ||
| 1045 | + _st_epoll_data->fd_hint = (fdlim > 0 && fdlim < ST_EPOLL_EVTLIST_SIZE) ? | ||
| 1046 | + fdlim : ST_EPOLL_EVTLIST_SIZE; | ||
| 1047 | + | ||
| 1048 | + if ((_st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint)) < 0) { | ||
| 1049 | + err = errno; | ||
| 1050 | + rv = -1; | ||
| 1051 | + goto cleanup_epoll; | ||
| 1052 | + } | ||
| 1053 | + fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); | ||
| 1054 | + _st_epoll_data->pid = getpid(); | ||
| 1055 | + | ||
| 1056 | + /* Allocate file descriptor data array */ | ||
| 1057 | + _st_epoll_data->fd_data_size = _st_epoll_data->fd_hint; | ||
| 1058 | + _st_epoll_data->fd_data = | ||
| 1059 | + (_epoll_fd_data_t *)calloc(_st_epoll_data->fd_data_size, | ||
| 1060 | + sizeof(_epoll_fd_data_t)); | ||
| 1061 | + if (!_st_epoll_data->fd_data) { | ||
| 1062 | + err = errno; | ||
| 1063 | + rv = -1; | ||
| 1064 | + goto cleanup_epoll; | ||
| 1065 | + } | ||
| 1066 | + | ||
| 1067 | + /* Allocate event lists */ | ||
| 1068 | + _st_epoll_data->evtlist_size = _st_epoll_data->fd_hint; | ||
| 1069 | + _st_epoll_data->evtlist = | ||
| 1070 | + (struct epoll_event *)malloc(_st_epoll_data->evtlist_size * | ||
| 1071 | + sizeof(struct epoll_event)); | ||
| 1072 | + if (!_st_epoll_data->evtlist) { | ||
| 1073 | + err = errno; | ||
| 1074 | + rv = -1; | ||
| 1075 | + } | ||
| 1076 | + | ||
| 1077 | + cleanup_epoll: | ||
| 1078 | + if (rv < 0) { | ||
| 1079 | + if (_st_epoll_data->epfd >= 0) | ||
| 1080 | + close(_st_epoll_data->epfd); | ||
| 1081 | + free(_st_epoll_data->fd_data); | ||
| 1082 | + free(_st_epoll_data->evtlist); | ||
| 1083 | + free(_st_epoll_data); | ||
| 1084 | + _st_epoll_data = NULL; | ||
| 1085 | + errno = err; | ||
| 1086 | + } | ||
| 1087 | + | ||
| 1088 | + return rv; | ||
| 1089 | +} | ||
| 1090 | + | ||
| 1091 | +ST_HIDDEN int _st_epoll_fd_data_expand(int maxfd) | ||
| 1092 | +{ | ||
| 1093 | + _epoll_fd_data_t *ptr; | ||
| 1094 | + int n = _st_epoll_data->fd_data_size; | ||
| 1095 | + | ||
| 1096 | + while (maxfd >= n) | ||
| 1097 | + n <<= 1; | ||
| 1098 | + | ||
| 1099 | + ptr = (_epoll_fd_data_t *)realloc(_st_epoll_data->fd_data, | ||
| 1100 | + n * sizeof(_epoll_fd_data_t)); | ||
| 1101 | + if (!ptr) | ||
| 1102 | + return -1; | ||
| 1103 | + | ||
| 1104 | + memset(ptr + _st_epoll_data->fd_data_size, 0, | ||
| 1105 | + (n - _st_epoll_data->fd_data_size) * sizeof(_epoll_fd_data_t)); | ||
| 1106 | + | ||
| 1107 | + _st_epoll_data->fd_data = ptr; | ||
| 1108 | + _st_epoll_data->fd_data_size = n; | ||
| 1109 | + | ||
| 1110 | + return 0; | ||
| 1111 | +} | ||
| 1112 | + | ||
| 1113 | +ST_HIDDEN void _st_epoll_evtlist_expand(void) | ||
| 1114 | +{ | ||
| 1115 | + struct epoll_event *ptr; | ||
| 1116 | + int n = _st_epoll_data->evtlist_size; | ||
| 1117 | + | ||
| 1118 | + while (_st_epoll_data->evtlist_cnt > n) | ||
| 1119 | + n <<= 1; | ||
| 1120 | + | ||
| 1121 | + ptr = (struct epoll_event *)realloc(_st_epoll_data->evtlist, | ||
| 1122 | + n * sizeof(struct epoll_event)); | ||
| 1123 | + if (ptr) { | ||
| 1124 | + _st_epoll_data->evtlist = ptr; | ||
| 1125 | + _st_epoll_data->evtlist_size = n; | ||
| 1126 | + } | ||
| 1127 | +} | ||
| 1128 | + | ||
| 1129 | +ST_HIDDEN void _st_epoll_pollset_del(struct pollfd *pds, int npds) | ||
| 1130 | +{ | ||
| 1131 | + struct epoll_event ev; | ||
| 1132 | + struct pollfd *pd; | ||
| 1133 | + struct pollfd *epd = pds + npds; | ||
| 1134 | + int old_events, events, op; | ||
| 1135 | + | ||
| 1136 | + /* | ||
| 1137 | + * It's more or less OK if deleting fails because a descriptor | ||
| 1138 | + * will either be closed or deleted in dispatch function after | ||
| 1139 | + * it fires. | ||
| 1140 | + */ | ||
| 1141 | + for (pd = pds; pd < epd; pd++) { | ||
| 1142 | + old_events = _ST_EPOLL_EVENTS(pd->fd); | ||
| 1143 | + | ||
| 1144 | + if (pd->events & POLLIN) | ||
| 1145 | + _ST_EPOLL_READ_CNT(pd->fd)--; | ||
| 1146 | + if (pd->events & POLLOUT) | ||
| 1147 | + _ST_EPOLL_WRITE_CNT(pd->fd)--; | ||
| 1148 | + if (pd->events & POLLPRI) | ||
| 1149 | + _ST_EPOLL_EXCEP_CNT(pd->fd)--; | ||
| 1150 | + | ||
| 1151 | + events = _ST_EPOLL_EVENTS(pd->fd); | ||
| 1152 | + /* | ||
| 1153 | + * The _ST_EPOLL_REVENTS check below is needed so we can use | ||
| 1154 | + * this function inside dispatch(). Outside of dispatch() | ||
| 1155 | + * _ST_EPOLL_REVENTS is always zero for all descriptors. | ||
| 1156 | + */ | ||
| 1157 | + if (events != old_events && _ST_EPOLL_REVENTS(pd->fd) == 0) { | ||
| 1158 | + op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; | ||
| 1159 | + ev.events = events; | ||
| 1160 | + ev.data.fd = pd->fd; | ||
| 1161 | + if (epoll_ctl(_st_epoll_data->epfd, op, pd->fd, &ev) == 0 && | ||
| 1162 | + op == EPOLL_CTL_DEL) { | ||
| 1163 | + _st_epoll_data->evtlist_cnt--; | ||
| 1164 | + } | ||
| 1165 | + } | ||
| 1166 | + } | ||
| 1167 | +} | ||
| 1168 | + | ||
| 1169 | +ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds) | ||
| 1170 | +{ | ||
| 1171 | + struct epoll_event ev; | ||
| 1172 | + int i, fd; | ||
| 1173 | + int old_events, events, op; | ||
| 1174 | + | ||
| 1175 | + /* Do as many checks as possible up front */ | ||
| 1176 | + for (i = 0; i < npds; i++) { | ||
| 1177 | + fd = pds[i].fd; | ||
| 1178 | + if (fd < 0 || !pds[i].events || | ||
| 1179 | + (pds[i].events & ~(POLLIN | POLLOUT | POLLPRI))) { | ||
| 1180 | + errno = EINVAL; | ||
| 1181 | + return -1; | ||
| 1182 | + } | ||
| 1183 | + if (fd >= _st_epoll_data->fd_data_size && | ||
| 1184 | + _st_epoll_fd_data_expand(fd) < 0) | ||
| 1185 | + return -1; | ||
| 1186 | + } | ||
| 1187 | + | ||
| 1188 | + for (i = 0; i < npds; i++) { | ||
| 1189 | + fd = pds[i].fd; | ||
| 1190 | + old_events = _ST_EPOLL_EVENTS(fd); | ||
| 1191 | + | ||
| 1192 | + if (pds[i].events & POLLIN) | ||
| 1193 | + _ST_EPOLL_READ_CNT(fd)++; | ||
| 1194 | + if (pds[i].events & POLLOUT) | ||
| 1195 | + _ST_EPOLL_WRITE_CNT(fd)++; | ||
| 1196 | + if (pds[i].events & POLLPRI) | ||
| 1197 | + _ST_EPOLL_EXCEP_CNT(fd)++; | ||
| 1198 | + | ||
| 1199 | + events = _ST_EPOLL_EVENTS(fd); | ||
| 1200 | + if (events != old_events) { | ||
| 1201 | + op = old_events ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; | ||
| 1202 | + ev.events = events; | ||
| 1203 | + ev.data.fd = fd; | ||
| 1204 | + if (epoll_ctl(_st_epoll_data->epfd, op, fd, &ev) < 0 && | ||
| 1205 | + (op != EPOLL_CTL_ADD || errno != EEXIST)) | ||
| 1206 | + break; | ||
| 1207 | + if (op == EPOLL_CTL_ADD) { | ||
| 1208 | + _st_epoll_data->evtlist_cnt++; | ||
| 1209 | + if (_st_epoll_data->evtlist_cnt > _st_epoll_data->evtlist_size) | ||
| 1210 | + _st_epoll_evtlist_expand(); | ||
| 1211 | + } | ||
| 1212 | + } | ||
| 1213 | + } | ||
| 1214 | + | ||
| 1215 | + if (i < npds) { | ||
| 1216 | + /* Error */ | ||
| 1217 | + int err = errno; | ||
| 1218 | + /* Unroll the state */ | ||
| 1219 | + _st_epoll_pollset_del(pds, i + 1); | ||
| 1220 | + errno = err; | ||
| 1221 | + return -1; | ||
| 1222 | + } | ||
| 1223 | + | ||
| 1224 | + return 0; | ||
| 1225 | +} | ||
| 1226 | + | ||
| 1227 | +ST_HIDDEN void _st_epoll_dispatch(void) | ||
| 1228 | +{ | ||
| 1229 | + st_utime_t min_timeout; | ||
| 1230 | + _st_clist_t *q; | ||
| 1231 | + _st_pollq_t *pq; | ||
| 1232 | + struct pollfd *pds, *epds; | ||
| 1233 | + struct epoll_event ev; | ||
| 1234 | + int timeout, nfd, i, osfd, notify; | ||
| 1235 | + int events, op; | ||
| 1236 | + short revents; | ||
| 1237 | + | ||
| 1238 | + if (_ST_SLEEPQ == NULL) { | ||
| 1239 | + timeout = -1; | ||
| 1240 | + } else { | ||
| 1241 | + min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : | ||
| 1242 | + (_ST_SLEEPQ->due - _ST_LAST_CLOCK); | ||
| 1243 | + timeout = (int) (min_timeout / 1000); | ||
| 1244 | + } | ||
| 1245 | + | ||
| 1246 | + if (_st_epoll_data->pid != getpid()) { | ||
| 1247 | + /* We probably forked, reinitialize epoll set */ | ||
| 1248 | + close(_st_epoll_data->epfd); | ||
| 1249 | + _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); | ||
| 1250 | + if (_st_epoll_data->epfd < 0) { | ||
| 1251 | + /* There is nothing we can do here, will retry later */ | ||
| 1252 | + return; | ||
| 1253 | + } | ||
| 1254 | + fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); | ||
| 1255 | + _st_epoll_data->pid = getpid(); | ||
| 1256 | + | ||
| 1257 | + /* Put all descriptors on ioq into new epoll set */ | ||
| 1258 | + memset(_st_epoll_data->fd_data, 0, | ||
| 1259 | + _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); | ||
| 1260 | + _st_epoll_data->evtlist_cnt = 0; | ||
| 1261 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 1262 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 1263 | + _st_epoll_pollset_add(pq->pds, pq->npds); | ||
| 1264 | + } | ||
| 1265 | + } | ||
| 1266 | + | ||
| 1267 | + /* Check for I/O operations */ | ||
| 1268 | + nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, | ||
| 1269 | + _st_epoll_data->evtlist_size, timeout); | ||
| 1270 | + | ||
| 1271 | + if (nfd > 0) { | ||
| 1272 | + for (i = 0; i < nfd; i++) { | ||
| 1273 | + osfd = _st_epoll_data->evtlist[i].data.fd; | ||
| 1274 | + _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; | ||
| 1275 | + if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { | ||
| 1276 | + /* Also set I/O bits on error */ | ||
| 1277 | + _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); | ||
| 1278 | + } | ||
| 1279 | + } | ||
| 1280 | + | ||
| 1281 | + for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { | ||
| 1282 | + pq = _ST_POLLQUEUE_PTR(q); | ||
| 1283 | + notify = 0; | ||
| 1284 | + epds = pq->pds + pq->npds; | ||
| 1285 | + | ||
| 1286 | + for (pds = pq->pds; pds < epds; pds++) { | ||
| 1287 | + if (_ST_EPOLL_REVENTS(pds->fd) == 0) { | ||
| 1288 | + pds->revents = 0; | ||
| 1289 | + continue; | ||
| 1290 | + } | ||
| 1291 | + osfd = pds->fd; | ||
| 1292 | + events = pds->events; | ||
| 1293 | + revents = 0; | ||
| 1294 | + if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) | ||
| 1295 | + revents |= POLLIN; | ||
| 1296 | + if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) | ||
| 1297 | + revents |= POLLOUT; | ||
| 1298 | + if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) | ||
| 1299 | + revents |= POLLPRI; | ||
| 1300 | + if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) | ||
| 1301 | + revents |= POLLERR; | ||
| 1302 | + if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) | ||
| 1303 | + revents |= POLLHUP; | ||
| 1304 | + | ||
| 1305 | + pds->revents = revents; | ||
| 1306 | + if (revents) { | ||
| 1307 | + notify = 1; | ||
| 1308 | + } | ||
| 1309 | + } | ||
| 1310 | + if (notify) { | ||
| 1311 | + ST_REMOVE_LINK(&pq->links); | ||
| 1312 | + pq->on_ioq = 0; | ||
| 1313 | + /* | ||
| 1314 | + * Here we will only delete/modify descriptors that | ||
| 1315 | + * didn't fire (see comments in _st_epoll_pollset_del()). | ||
| 1316 | + */ | ||
| 1317 | + _st_epoll_pollset_del(pq->pds, pq->npds); | ||
| 1318 | + | ||
| 1319 | + if (pq->thread->flags & _ST_FL_ON_SLEEPQ) | ||
| 1320 | + _ST_DEL_SLEEPQ(pq->thread); | ||
| 1321 | + pq->thread->state = _ST_ST_RUNNABLE; | ||
| 1322 | + _ST_ADD_RUNQ(pq->thread); | ||
| 1323 | + } | ||
| 1324 | + } | ||
| 1325 | + | ||
| 1326 | + for (i = 0; i < nfd; i++) { | ||
| 1327 | + /* Delete/modify descriptors that fired */ | ||
| 1328 | + osfd = _st_epoll_data->evtlist[i].data.fd; | ||
| 1329 | + _ST_EPOLL_REVENTS(osfd) = 0; | ||
| 1330 | + events = _ST_EPOLL_EVENTS(osfd); | ||
| 1331 | + op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; | ||
| 1332 | + ev.events = events; | ||
| 1333 | + ev.data.fd = osfd; | ||
| 1334 | + if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && | ||
| 1335 | + op == EPOLL_CTL_DEL) { | ||
| 1336 | + _st_epoll_data->evtlist_cnt--; | ||
| 1337 | + } | ||
| 1338 | + } | ||
| 1339 | + } | ||
| 1340 | +} | ||
| 1341 | + | ||
| 1342 | +ST_HIDDEN int _st_epoll_fd_new(int osfd) | ||
| 1343 | +{ | ||
| 1344 | + if (osfd >= _st_epoll_data->fd_data_size && | ||
| 1345 | + _st_epoll_fd_data_expand(osfd) < 0) | ||
| 1346 | + return -1; | ||
| 1347 | + | ||
| 1348 | + return 0; | ||
| 1349 | +} | ||
| 1350 | + | ||
| 1351 | +ST_HIDDEN int _st_epoll_fd_close(int osfd) | ||
| 1352 | +{ | ||
| 1353 | + if (_ST_EPOLL_READ_CNT(osfd) || _ST_EPOLL_WRITE_CNT(osfd) || | ||
| 1354 | + _ST_EPOLL_EXCEP_CNT(osfd)) { | ||
| 1355 | + errno = EBUSY; | ||
| 1356 | + return -1; | ||
| 1357 | + } | ||
| 1358 | + | ||
| 1359 | + return 0; | ||
| 1360 | +} | ||
| 1361 | + | ||
| 1362 | +ST_HIDDEN int _st_epoll_fd_getlimit(void) | ||
| 1363 | +{ | ||
| 1364 | + /* zero means no specific limit */ | ||
| 1365 | + return 0; | ||
| 1366 | +} | ||
| 1367 | + | ||
| 1368 | +/* | ||
| 1369 | + * Check if epoll functions are just stubs. | ||
| 1370 | + */ | ||
| 1371 | +ST_HIDDEN int _st_epoll_is_supported(void) | ||
| 1372 | +{ | ||
| 1373 | + struct epoll_event ev; | ||
| 1374 | + | ||
| 1375 | + ev.events = EPOLLIN; | ||
| 1376 | + ev.data.ptr = NULL; | ||
| 1377 | + /* Guaranteed to fail */ | ||
| 1378 | + epoll_ctl(-1, EPOLL_CTL_ADD, -1, &ev); | ||
| 1379 | + | ||
| 1380 | + return (errno != ENOSYS); | ||
| 1381 | +} | ||
| 1382 | + | ||
| 1383 | +static _st_eventsys_t _st_epoll_eventsys = { | ||
| 1384 | + "epoll", | ||
| 1385 | + ST_EVENTSYS_ALT, | ||
| 1386 | + _st_epoll_init, | ||
| 1387 | + _st_epoll_dispatch, | ||
| 1388 | + _st_epoll_pollset_add, | ||
| 1389 | + _st_epoll_pollset_del, | ||
| 1390 | + _st_epoll_fd_new, | ||
| 1391 | + _st_epoll_fd_close, | ||
| 1392 | + _st_epoll_fd_getlimit | ||
| 1393 | +}; | ||
| 1394 | +#endif /* MD_HAVE_EPOLL */ | ||
| 1395 | + | ||
| 1396 | + | ||
| 1397 | +/***************************************** | ||
| 1398 | + * Public functions | ||
| 1399 | + */ | ||
| 1400 | + | ||
| 1401 | +int st_set_eventsys(int eventsys) | ||
| 1402 | +{ | ||
| 1403 | + if (_st_eventsys) { | ||
| 1404 | + errno = EBUSY; | ||
| 1405 | + return -1; | ||
| 1406 | + } | ||
| 1407 | + | ||
| 1408 | + switch (eventsys) { | ||
| 1409 | + case ST_EVENTSYS_DEFAULT: | ||
| 1410 | +#ifdef USE_POLL | ||
| 1411 | + _st_eventsys = &_st_poll_eventsys; | ||
| 1412 | +#else | ||
| 1413 | + _st_eventsys = &_st_select_eventsys; | ||
| 1414 | +#endif | ||
| 1415 | + break; | ||
| 1416 | + case ST_EVENTSYS_SELECT: | ||
| 1417 | + _st_eventsys = &_st_select_eventsys; | ||
| 1418 | + break; | ||
| 1419 | +#ifdef MD_HAVE_POLL | ||
| 1420 | + case ST_EVENTSYS_POLL: | ||
| 1421 | + _st_eventsys = &_st_poll_eventsys; | ||
| 1422 | + break; | ||
| 1423 | +#endif | ||
| 1424 | + case ST_EVENTSYS_ALT: | ||
| 1425 | +#if defined (MD_HAVE_KQUEUE) | ||
| 1426 | + _st_eventsys = &_st_kq_eventsys; | ||
| 1427 | +#elif defined (MD_HAVE_EPOLL) | ||
| 1428 | + if (_st_epoll_is_supported()) | ||
| 1429 | + _st_eventsys = &_st_epoll_eventsys; | ||
| 1430 | +#endif | ||
| 1431 | + break; | ||
| 1432 | + default: | ||
| 1433 | + errno = EINVAL; | ||
| 1434 | + return -1; | ||
| 1435 | + } | ||
| 1436 | + | ||
| 1437 | + return 0; | ||
| 1438 | +} | ||
| 1439 | + | ||
| 1440 | +int st_get_eventsys(void) | ||
| 1441 | +{ | ||
| 1442 | + return _st_eventsys ? _st_eventsys->val : -1; | ||
| 1443 | +} | ||
| 1444 | + | ||
| 1445 | +const char *st_get_eventsys_name(void) | ||
| 1446 | +{ | ||
| 1447 | + return _st_eventsys ? _st_eventsys->name : ""; | ||
| 1448 | +} | ||
| 1449 | + |
trunk/research/st-1.9/examples/Makefile
0 → 100644
| 1 | +# | ||
| 2 | +# Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 3 | +# All Rights Reserved. | ||
| 4 | +# | ||
| 5 | +# Redistribution and use in source and binary forms, with or without | ||
| 6 | +# modification, are permitted provided that the following conditions | ||
| 7 | +# are met: | ||
| 8 | +# | ||
| 9 | +# 1. Redistributions of source code must retain the above copyright | ||
| 10 | +# notice, this list of conditions and the following disclaimer. | ||
| 11 | +# 2. Redistributions in binary form must reproduce the above copyright | ||
| 12 | +# notice, this list of conditions and the following disclaimer in the | ||
| 13 | +# documentation and/or other materials provided with the distribution. | ||
| 14 | +# 3. Neither the name of Silicon Graphics, Inc. nor the names of its | ||
| 15 | +# contributors may be used to endorse or promote products derived from | ||
| 16 | +# this software without specific prior written permission. | ||
| 17 | +# | ||
| 18 | +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 19 | +# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 20 | +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 21 | +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 22 | +# HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 23 | +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 24 | +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| 25 | +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 26 | +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 27 | +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 28 | +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | + | ||
| 30 | +########################## | ||
| 31 | +# Supported OSes: | ||
| 32 | +# | ||
| 33 | +# AIX | ||
| 34 | +# FREEBSD | ||
| 35 | +# HPUX | ||
| 36 | +# HPUX_64 | ||
| 37 | +# IRIX | ||
| 38 | +# IRIX_64 | ||
| 39 | +# LINUX | ||
| 40 | +# LINUX_IA64 | ||
| 41 | +# NETBSD | ||
| 42 | +# OPENBSD | ||
| 43 | +# OSF1 | ||
| 44 | +# SOLARIS | ||
| 45 | +# SOLARIS_64 | ||
| 46 | + | ||
| 47 | +########################## | ||
| 48 | + | ||
| 49 | +CC = cc | ||
| 50 | + | ||
| 51 | +SHELL = /bin/sh | ||
| 52 | +ECHO = /bin/echo | ||
| 53 | + | ||
| 54 | +DEPTH = .. | ||
| 55 | +BUILD = | ||
| 56 | +TARGETDIR = | ||
| 57 | + | ||
| 58 | +DEFINES = | ||
| 59 | +CFLAGS = | ||
| 60 | +OTHER_FLAGS = | ||
| 61 | + | ||
| 62 | +OBJDIR = $(DEPTH)/$(TARGETDIR) | ||
| 63 | +INCDIR = $(DEPTH)/$(TARGETDIR) | ||
| 64 | +LIBST = $(OBJDIR)/libst.a | ||
| 65 | +HEADER = $(INCDIR)/st.h | ||
| 66 | + | ||
| 67 | +LIBRESOLV = | ||
| 68 | +EXTRALIBS = | ||
| 69 | + | ||
| 70 | +ifeq ($(OS),) | ||
| 71 | +EXAMPLES = unknown | ||
| 72 | +else | ||
| 73 | +EXAMPLES = $(OBJDIR)/lookupdns $(OBJDIR)/proxy $(OBJDIR)/server | ||
| 74 | +endif | ||
| 75 | + | ||
| 76 | + | ||
| 77 | +########################## | ||
| 78 | +# Platform section. | ||
| 79 | +# | ||
| 80 | + | ||
| 81 | +ifeq (DARWIN, $(findstring DARWIN, $(OS))) | ||
| 82 | +LIBRESOLV = -lresolv | ||
| 83 | +endif | ||
| 84 | + | ||
| 85 | +ifeq (LINUX, $(findstring LINUX, $(OS))) | ||
| 86 | +LIBRESOLV = -lresolv | ||
| 87 | +endif | ||
| 88 | + | ||
| 89 | +ifeq (SOLARIS, $(findstring SOLARIS, $(OS))) | ||
| 90 | +LIBRESOLV = -lresolv | ||
| 91 | +EXTRALIBS = -lsocket -lnsl | ||
| 92 | +endif | ||
| 93 | + | ||
| 94 | +# | ||
| 95 | +# End of platform section. | ||
| 96 | +########################## | ||
| 97 | + | ||
| 98 | + | ||
| 99 | +all: $(EXAMPLES) | ||
| 100 | + | ||
| 101 | +$(OBJDIR)/lookupdns: lookupdns.c $(OBJDIR)/res.o $(LIBST) $(HEADER) | ||
| 102 | + $(CC) $(CFLAGS) -I$(INCDIR) lookupdns.c $(OBJDIR)/res.o $(LIBST) $(LIBRESOLV) $(EXTRALIBS) -o $@ | ||
| 103 | + | ||
| 104 | +$(OBJDIR)/proxy: proxy.c $(LIBST) $(HEADER) | ||
| 105 | + $(CC) $(CFLAGS) -I$(INCDIR) proxy.c $(LIBST) $(EXTRALIBS) -o $@ | ||
| 106 | + | ||
| 107 | +$(OBJDIR)/server: server.c $(OBJDIR)/error.o $(LIBST) $(HEADER) | ||
| 108 | + $(CC) $(CFLAGS) -I$(INCDIR) server.c $(OBJDIR)/error.o $(LIBST) $(EXTRALIBS) -o $@ | ||
| 109 | + | ||
| 110 | +$(OBJDIR)/%.o: %.c | ||
| 111 | + $(CC) $(CFLAGS) -I$(INCDIR) -c $< -o $@ | ||
| 112 | + | ||
| 113 | +.DEFAULT: | ||
| 114 | + @cd $(DEPTH); $(MAKE) $@ | ||
| 115 | + |
trunk/research/st-1.9/examples/README
0 → 100644
| 1 | +Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 2 | +All Rights Reserved. | ||
| 3 | + | ||
| 4 | + | ||
| 5 | +This directory contains three example programs. | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +--------------------------------------------------------------------------- | ||
| 9 | + | ||
| 10 | +PROGRAM | ||
| 11 | + | ||
| 12 | + lookupdns | ||
| 13 | + | ||
| 14 | +FILES | ||
| 15 | + | ||
| 16 | + lookupdns.c | ||
| 17 | + res.c | ||
| 18 | + | ||
| 19 | +USAGE | ||
| 20 | + | ||
| 21 | + lookupdns <hostname1> [<hostname2>] ... | ||
| 22 | + | ||
| 23 | +DESCRIPTION | ||
| 24 | + | ||
| 25 | + This program performs asynchronous DNS host name resolution and reports | ||
| 26 | + IP address for each <hostname> specified as a command line argument. | ||
| 27 | + One ST thread is created for each host name. All threads do host name | ||
| 28 | + resolution concurrently. | ||
| 29 | + | ||
| 30 | + | ||
| 31 | +--------------------------------------------------------------------------- | ||
| 32 | + | ||
| 33 | +PROGRAM | ||
| 34 | + | ||
| 35 | + proxy | ||
| 36 | + | ||
| 37 | +FILES | ||
| 38 | + | ||
| 39 | + proxy.c | ||
| 40 | + | ||
| 41 | +USAGE | ||
| 42 | + | ||
| 43 | + proxy -l <local_addr> -r <remote_addr> [-p <num_processes>] [-S] | ||
| 44 | + | ||
| 45 | + -l <local_addr> bind to local address specified as [<host>]:<port> | ||
| 46 | + -r <remote_addr> connect to remote address specified as <host>:<port> | ||
| 47 | + -p <num_processes> create specified number of processes | ||
| 48 | + -S serialize accept() calls from different processes | ||
| 49 | + on the same listening socket (if needed). | ||
| 50 | + | ||
| 51 | +DESCRIPTION | ||
| 52 | + | ||
| 53 | + This program acts as a generic gateway. It listens for connections to a | ||
| 54 | + local address. Upon accepting a client connection, it connects to the | ||
| 55 | + specified remote address and then just pumps the data through without any | ||
| 56 | + modification. | ||
| 57 | + | ||
| 58 | + | ||
| 59 | +--------------------------------------------------------------------------- | ||
| 60 | + | ||
| 61 | +PROGRAM | ||
| 62 | + | ||
| 63 | + server | ||
| 64 | + | ||
| 65 | +FILES | ||
| 66 | + | ||
| 67 | + server.c | ||
| 68 | + error.c | ||
| 69 | + | ||
| 70 | +USAGE | ||
| 71 | + | ||
| 72 | + server -l <log_directory> [<options>] | ||
| 73 | + | ||
| 74 | + -l <log_directory> open all log files in specified directory. | ||
| 75 | + | ||
| 76 | + Possible options: | ||
| 77 | + | ||
| 78 | + -b <host>:<port> bind to specified address (multiple addresses | ||
| 79 | + are permitted) | ||
| 80 | + -p <num_processes> create specified number of processes | ||
| 81 | + -t <min_thr>:<max_thr> specify thread limits per listening socket | ||
| 82 | + across all processes | ||
| 83 | + -u <user> change server's user id to specified value | ||
| 84 | + -q <backlog> set max length of pending connections queue | ||
| 85 | + -a enable access logging | ||
| 86 | + -i run in interactive mode (useful for debugging) | ||
| 87 | + -S serialize accept() calls from different processes | ||
| 88 | + on the same listening socket (if needed). | ||
| 89 | + | ||
| 90 | +DESCRIPTION | ||
| 91 | + | ||
| 92 | + This program is a general server example. It accepts a client connection | ||
| 93 | + and outputs a short HTML page. It can be easily adapted to provide | ||
| 94 | + other services. | ||
| 95 | + | ||
| 96 | + | ||
| 97 | +--------------------------------------------------------------------------- | ||
| 98 | + |
trunk/research/st-1.9/examples/error.c
0 → 100644
| 1 | +/* | ||
| 2 | + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 3 | + * All Rights Reserved. | ||
| 4 | + * | ||
| 5 | + * Redistribution and use in source and binary forms, with or without | ||
| 6 | + * modification, are permitted provided that the following conditions | ||
| 7 | + * are met: | ||
| 8 | + * | ||
| 9 | + * 1. Redistributions of source code must retain the above copyright | ||
| 10 | + * notice, this list of conditions and the following disclaimer. | ||
| 11 | + * 2. Redistributions in binary form must reproduce the above copyright | ||
| 12 | + * notice, this list of conditions and the following disclaimer in the | ||
| 13 | + * documentation and/or other materials provided with the distribution. | ||
| 14 | + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its | ||
| 15 | + * contributors may be used to endorse or promote products derived from | ||
| 16 | + * this software without specific prior written permission. | ||
| 17 | + * | ||
| 18 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 19 | + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 20 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 21 | + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 22 | + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 23 | + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 24 | + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| 25 | + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 26 | + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 27 | + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 28 | + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | + */ | ||
| 30 | + | ||
| 31 | +#include <stdarg.h> | ||
| 32 | +#include <stdio.h> | ||
| 33 | +#include <stdlib.h> | ||
| 34 | +#include <string.h> | ||
| 35 | +#include <unistd.h> | ||
| 36 | +#include <errno.h> | ||
| 37 | +#include "st.h" | ||
| 38 | + | ||
| 39 | +/* | ||
| 40 | + * Simple error reporting functions. | ||
| 41 | + * Suggested in W. Richard Stevens' "Advanced Programming in UNIX | ||
| 42 | + * Environment". | ||
| 43 | + */ | ||
| 44 | + | ||
| 45 | +#define MAXLINE 4096 /* max line length */ | ||
| 46 | + | ||
| 47 | +static void err_doit(int, int, const char *, va_list); | ||
| 48 | + | ||
| 49 | + | ||
| 50 | +/* | ||
| 51 | + * Nonfatal error related to a system call. | ||
| 52 | + * Print a message and return. | ||
| 53 | + */ | ||
| 54 | +void err_sys_report(int fd, const char *fmt, ...) | ||
| 55 | +{ | ||
| 56 | + va_list ap; | ||
| 57 | + | ||
| 58 | + va_start(ap, fmt); | ||
| 59 | + err_doit(fd, 1, fmt, ap); | ||
| 60 | + va_end(ap); | ||
| 61 | +} | ||
| 62 | + | ||
| 63 | + | ||
| 64 | +/* | ||
| 65 | + * Fatal error related to a system call. | ||
| 66 | + * Print a message and terminate. | ||
| 67 | + */ | ||
| 68 | +void err_sys_quit(int fd, const char *fmt, ...) | ||
| 69 | +{ | ||
| 70 | + va_list ap; | ||
| 71 | + | ||
| 72 | + va_start(ap, fmt); | ||
| 73 | + err_doit(fd, 1, fmt, ap); | ||
| 74 | + va_end(ap); | ||
| 75 | + exit(1); | ||
| 76 | +} | ||
| 77 | + | ||
| 78 | + | ||
| 79 | +/* | ||
| 80 | + * Fatal error related to a system call. | ||
| 81 | + * Print a message, dump core, and terminate. | ||
| 82 | + */ | ||
| 83 | +void err_sys_dump(int fd, const char *fmt, ...) | ||
| 84 | +{ | ||
| 85 | + va_list ap; | ||
| 86 | + | ||
| 87 | + va_start(ap, fmt); | ||
| 88 | + err_doit(fd, 1, fmt, ap); | ||
| 89 | + va_end(ap); | ||
| 90 | + abort(); /* dump core and terminate */ | ||
| 91 | + exit(1); /* shouldn't get here */ | ||
| 92 | +} | ||
| 93 | + | ||
| 94 | + | ||
| 95 | +/* | ||
| 96 | + * Nonfatal error unrelated to a system call. | ||
| 97 | + * Print a message and return. | ||
| 98 | + */ | ||
| 99 | +void err_report(int fd, const char *fmt, ...) | ||
| 100 | +{ | ||
| 101 | + va_list ap; | ||
| 102 | + | ||
| 103 | + va_start(ap, fmt); | ||
| 104 | + err_doit(fd, 0, fmt, ap); | ||
| 105 | + va_end(ap); | ||
| 106 | +} | ||
| 107 | + | ||
| 108 | + | ||
| 109 | +/* | ||
| 110 | + * Fatal error unrelated to a system call. | ||
| 111 | + * Print a message and terminate. | ||
| 112 | + */ | ||
| 113 | +void err_quit(int fd, const char *fmt, ...) | ||
| 114 | +{ | ||
| 115 | + va_list ap; | ||
| 116 | + | ||
| 117 | + va_start(ap, fmt); | ||
| 118 | + err_doit(fd, 0, fmt, ap); | ||
| 119 | + va_end(ap); | ||
| 120 | + exit(1); | ||
| 121 | +} | ||
| 122 | + | ||
| 123 | + | ||
| 124 | +/* | ||
| 125 | + * Return a pointer to a string containing current time. | ||
| 126 | + */ | ||
| 127 | +char *err_tstamp(void) | ||
| 128 | +{ | ||
| 129 | + static char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", | ||
| 130 | + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; | ||
| 131 | + static char str[32]; | ||
| 132 | + static time_t lastt = 0; | ||
| 133 | + struct tm *tmp; | ||
| 134 | + time_t currt = st_time(); | ||
| 135 | + | ||
| 136 | + if (currt == lastt) | ||
| 137 | + return str; | ||
| 138 | + | ||
| 139 | + tmp = localtime(&currt); | ||
| 140 | + sprintf(str, "[%02d/%s/%d:%02d:%02d:%02d] ", tmp->tm_mday, | ||
| 141 | + months[tmp->tm_mon], 1900 + tmp->tm_year, tmp->tm_hour, | ||
| 142 | + tmp->tm_min, tmp->tm_sec); | ||
| 143 | + lastt = currt; | ||
| 144 | + | ||
| 145 | + return str; | ||
| 146 | +} | ||
| 147 | + | ||
| 148 | + | ||
| 149 | +/* | ||
| 150 | + * Print a message and return to caller. | ||
| 151 | + * Caller specifies "errnoflag". | ||
| 152 | + */ | ||
| 153 | +static void err_doit(int fd, int errnoflag, const char *fmt, va_list ap) | ||
| 154 | +{ | ||
| 155 | + int errno_save; | ||
| 156 | + char buf[MAXLINE]; | ||
| 157 | + | ||
| 158 | + errno_save = errno; /* value caller might want printed */ | ||
| 159 | + strcpy(buf, err_tstamp()); /* prepend a message with time stamp */ | ||
| 160 | + vsprintf(buf + strlen(buf), fmt, ap); | ||
| 161 | + if (errnoflag) | ||
| 162 | + sprintf(buf + strlen(buf), ": %s\n", strerror(errno_save)); | ||
| 163 | + else | ||
| 164 | + strcat(buf, "\n"); | ||
| 165 | + write(fd, buf, strlen(buf)); | ||
| 166 | + errno = errno_save; | ||
| 167 | +} | ||
| 168 | + |
trunk/research/st-1.9/examples/lookupdns.c
0 → 100644
| 1 | +/* | ||
| 2 | + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 3 | + * All Rights Reserved. | ||
| 4 | + * | ||
| 5 | + * Redistribution and use in source and binary forms, with or without | ||
| 6 | + * modification, are permitted provided that the following conditions | ||
| 7 | + * are met: | ||
| 8 | + * | ||
| 9 | + * 1. Redistributions of source code must retain the above copyright | ||
| 10 | + * notice, this list of conditions and the following disclaimer. | ||
| 11 | + * 2. Redistributions in binary form must reproduce the above copyright | ||
| 12 | + * notice, this list of conditions and the following disclaimer in the | ||
| 13 | + * documentation and/or other materials provided with the distribution. | ||
| 14 | + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its | ||
| 15 | + * contributors may be used to endorse or promote products derived from | ||
| 16 | + * this software without specific prior written permission. | ||
| 17 | + * | ||
| 18 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 19 | + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 20 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 21 | + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 22 | + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 23 | + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 24 | + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| 25 | + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 26 | + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 27 | + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 28 | + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | + */ | ||
| 30 | + | ||
| 31 | +#include <stdio.h> | ||
| 32 | +#include <stdlib.h> | ||
| 33 | +#include <unistd.h> | ||
| 34 | +#include <sys/time.h> | ||
| 35 | +#include <sys/types.h> | ||
| 36 | +#include <sys/socket.h> | ||
| 37 | +#include <netinet/in.h> | ||
| 38 | +#include <arpa/inet.h> | ||
| 39 | +#include <netdb.h> | ||
| 40 | +#include "st.h" | ||
| 41 | + | ||
| 42 | +#if !defined(NETDB_INTERNAL) && defined(h_NETDB_INTERNAL) | ||
| 43 | +#define NETDB_INTERNAL h_NETDB_INTERNAL | ||
| 44 | +#endif | ||
| 45 | + | ||
| 46 | +/* Resolution timeout (in microseconds) */ | ||
| 47 | +#define TIMEOUT (2*1000000LL) | ||
| 48 | + | ||
| 49 | +/* External function defined in the res.c file */ | ||
| 50 | +int dns_getaddr(const char *host, struct in_addr *addr, st_utime_t timeout); | ||
| 51 | + | ||
| 52 | + | ||
| 53 | +void *do_resolve(void *host) | ||
| 54 | +{ | ||
| 55 | + struct in_addr addr; | ||
| 56 | + | ||
| 57 | + /* Use dns_getaddr() instead of gethostbyname(3) to get IP address */ | ||
| 58 | + if (dns_getaddr(host, &addr, TIMEOUT) < 0) { | ||
| 59 | + fprintf(stderr, "dns_getaddr: can't resolve %s: ", (char *)host); | ||
| 60 | + if (h_errno == NETDB_INTERNAL) | ||
| 61 | + perror(""); | ||
| 62 | + else | ||
| 63 | + herror(""); | ||
| 64 | + } else | ||
| 65 | + printf("%-40s %s\n", (char *)host, inet_ntoa(addr)); | ||
| 66 | + | ||
| 67 | + return NULL; | ||
| 68 | +} | ||
| 69 | + | ||
| 70 | + | ||
| 71 | +/* | ||
| 72 | + * Asynchronous DNS host name resolution. This program creates one | ||
| 73 | + * ST thread for each host name (specified as command line arguments). | ||
| 74 | + * All threads do host name resolution concurrently. | ||
| 75 | + */ | ||
| 76 | +int main(int argc, char *argv[]) | ||
| 77 | +{ | ||
| 78 | + int i; | ||
| 79 | + | ||
| 80 | + if (argc < 2) { | ||
| 81 | + fprintf(stderr, "Usage: %s <hostname1> [<hostname2>] ...\n", argv[0]); | ||
| 82 | + exit(1); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + if (st_init() < 0) { | ||
| 86 | + perror("st_init"); | ||
| 87 | + exit(1); | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + for (i = 1; i < argc; i++) { | ||
| 91 | + /* Create a separate thread for each host name */ | ||
| 92 | + if (st_thread_create(do_resolve, argv[i], 0, 0) == NULL) { | ||
| 93 | + perror("st_thread_create"); | ||
| 94 | + exit(1); | ||
| 95 | + } | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + st_thread_exit(NULL); | ||
| 99 | + | ||
| 100 | + /* NOTREACHED */ | ||
| 101 | + return 1; | ||
| 102 | +} | ||
| 103 | + |
trunk/research/st-1.9/examples/proxy.c
0 → 100644
| 1 | +/* | ||
| 2 | + * Portions created by SGI are Copyright (C) 2000 Silicon Graphics, Inc. | ||
| 3 | + * All Rights Reserved. | ||
| 4 | + * | ||
| 5 | + * Redistribution and use in source and binary forms, with or without | ||
| 6 | + * modification, are permitted provided that the following conditions | ||
| 7 | + * are met: | ||
| 8 | + * | ||
| 9 | + * 1. Redistributions of source code must retain the above copyright | ||
| 10 | + * notice, this list of conditions and the following disclaimer. | ||
| 11 | + * 2. Redistributions in binary form must reproduce the above copyright | ||
| 12 | + * notice, this list of conditions and the following disclaimer in the | ||
| 13 | + * documentation and/or other materials provided with the distribution. | ||
| 14 | + * 3. Neither the name of Silicon Graphics, Inc. nor the names of its | ||
| 15 | + * contributors may be used to endorse or promote products derived from | ||
| 16 | + * this software without specific prior written permission. | ||
| 17 | + * | ||
| 18 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 19 | + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 20 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 21 | + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 22 | + * HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 23 | + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | ||
| 24 | + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
| 25 | + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
| 26 | + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
| 27 | + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 28 | + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | + */ | ||
| 30 | + | ||
| 31 | +#include <stdio.h> | ||
| 32 | +#include <stdlib.h> | ||
| 33 | +#include <string.h> | ||
| 34 | +#include <signal.h> | ||
| 35 | +#include <unistd.h> | ||
| 36 | +#include <fcntl.h> | ||
| 37 | +#include <sys/types.h> | ||
| 38 | +#include <sys/stat.h> | ||
| 39 | +#include <sys/socket.h> | ||
| 40 | +#include <netinet/in.h> | ||
| 41 | +#include <arpa/inet.h> | ||
| 42 | +#include <netdb.h> | ||
| 43 | +#include "st.h" | ||
| 44 | + | ||
| 45 | +#define IOBUFSIZE (16*1024) | ||
| 46 | + | ||
| 47 | +#define IOV_LEN 256 | ||
| 48 | +#define IOV_COUNT (IOBUFSIZE / IOV_LEN) | ||
| 49 | + | ||
| 50 | +#ifndef INADDR_NONE | ||
| 51 | +#define INADDR_NONE 0xffffffff | ||
| 52 | +#endif | ||
| 53 | + | ||
| 54 | +static char *prog; /* Program name */ | ||
| 55 | +static struct sockaddr_in rmt_addr; /* Remote address */ | ||
| 56 | + | ||
| 57 | +static unsigned long testing; | ||
| 58 | +#define TESTING_VERBOSE 0x1 | ||
| 59 | +#define TESTING_READV 0x2 | ||
| 60 | +#define TESTING_READ_RESID 0x4 | ||
| 61 | +#define TESTING_WRITEV 0x8 | ||
| 62 | +#define TESTING_WRITE_RESID 0x10 | ||
| 63 | + | ||
| 64 | +static void read_address(const char *str, struct sockaddr_in *sin); | ||
| 65 | +static void start_daemon(void); | ||
| 66 | +static int cpu_count(void); | ||
| 67 | +static void set_concurrency(int nproc); | ||
| 68 | +static void *handle_request(void *arg); | ||
| 69 | +static void print_sys_error(const char *msg); | ||
| 70 | + | ||
| 71 | + | ||
| 72 | +/* | ||
| 73 | + * This program acts as a generic gateway. It listens for connections | ||
| 74 | + * to a local address ('-l' option). Upon accepting a client connection, | ||
| 75 | + * it connects to the specified remote address ('-r' option) and then | ||
| 76 | + * just pumps the data through without any modification. | ||
| 77 | + */ | ||
| 78 | +int main(int argc, char *argv[]) | ||
| 79 | +{ | ||
| 80 | + extern char *optarg; | ||
| 81 | + int opt, sock, n; | ||
| 82 | + int laddr, raddr, num_procs, alt_ev, one_process; | ||
| 83 | + int serialize_accept = 0; | ||
| 84 | + struct sockaddr_in lcl_addr, cli_addr; | ||
| 85 | + st_netfd_t cli_nfd, srv_nfd; | ||
| 86 | + | ||
| 87 | + prog = argv[0]; | ||
| 88 | + num_procs = laddr = raddr = alt_ev = one_process = 0; | ||
| 89 | + | ||
| 90 | + /* Parse arguments */ | ||
| 91 | + while((opt = getopt(argc, argv, "l:r:p:Saht:X")) != EOF) { | ||
| 92 | + switch (opt) { | ||
| 93 | + case 'a': | ||
| 94 | + alt_ev = 1; | ||
| 95 | + break; | ||
| 96 | + case 'l': | ||
| 97 | + read_address(optarg, &lcl_addr); | ||
| 98 | + laddr = 1; | ||
| 99 | + break; | ||
| 100 | + case 'r': | ||
| 101 | + read_address(optarg, &rmt_addr); | ||
| 102 | + if (rmt_addr.sin_addr.s_addr == INADDR_ANY) { | ||
| 103 | + fprintf(stderr, "%s: invalid remote address: %s\n", prog, optarg); | ||
| 104 | + exit(1); | ||
| 105 | + } | ||
| 106 | + raddr = 1; | ||
| 107 | + break; | ||
| 108 | + case 'p': | ||
| 109 | + num_procs = atoi(optarg); | ||
| 110 | + if (num_procs < 1) { | ||
| 111 | + fprintf(stderr, "%s: invalid number of processes: %s\n", prog, optarg); | ||
| 112 | + exit(1); | ||
| 113 | + } | ||
| 114 | + break; | ||
| 115 | + case 'S': | ||
| 116 | + /* | ||
| 117 | + * Serialization decision is tricky on some platforms. For example, | ||
| 118 | + * Solaris 2.6 and above has kernel sockets implementation, so supposedly | ||
| 119 | + * there is no need for serialization. The ST library may be compiled | ||
| 120 | + * on one OS version, but used on another, so the need for serialization | ||
| 121 | + * should be determined at run time by the application. Since it's just | ||
| 122 | + * an example, the serialization decision is left up to user. | ||
| 123 | + * Only on platforms where the serialization is never needed on any OS | ||
| 124 | + * version st_netfd_serialize_accept() is a no-op. | ||
| 125 | + */ | ||
| 126 | + serialize_accept = 1; | ||
| 127 | + break; | ||
| 128 | + case 't': | ||
| 129 | + testing = strtoul(optarg, NULL, 0); | ||
| 130 | + break; | ||
| 131 | + case 'X': | ||
| 132 | + one_process = 1; | ||
| 133 | + break; | ||
| 134 | + case 'h': | ||
| 135 | + case '?': | ||
| 136 | + fprintf(stderr, "Usage: %s [options] -l <[host]:port> -r <host:port>\n", | ||
| 137 | + prog); | ||
| 138 | + fprintf(stderr, "options are:\n"); | ||
| 139 | + fprintf(stderr, " -p <num_processes> number of parallel processes\n"); | ||
| 140 | + fprintf(stderr, " -S serialize accepts\n"); | ||
| 141 | + fprintf(stderr, " -a use alternate event system\n"); | ||
| 142 | +#ifdef DEBUG | ||
| 143 | + fprintf(stderr, " -t mask testing/debugging mode\n"); | ||
| 144 | + fprintf(stderr, " -X one process, don't daemonize\n"); | ||
| 145 | +#endif | ||
| 146 | + exit(1); | ||
| 147 | + } | ||
| 148 | + } | ||
| 149 | + if (!laddr) { | ||
| 150 | + fprintf(stderr, "%s: local address required\n", prog); | ||
| 151 | + exit(1); | ||
| 152 | + } | ||
| 153 | + if (!raddr) { | ||
| 154 | + fprintf(stderr, "%s: remote address required\n", prog); | ||
| 155 | + exit(1); | ||
| 156 | + } | ||
| 157 | + if (num_procs == 0) | ||
| 158 | + num_procs = cpu_count(); | ||
| 159 | + | ||
| 160 | + fprintf(stderr, "%s: starting proxy daemon on %s:%d\n", prog, | ||
| 161 | + inet_ntoa(lcl_addr.sin_addr), ntohs(lcl_addr.sin_port)); | ||
| 162 | + | ||
| 163 | + /* Start the daemon */ | ||
| 164 | + if (one_process) | ||
| 165 | + num_procs = 1; | ||
| 166 | + else | ||
| 167 | + start_daemon(); | ||
| 168 | + | ||
| 169 | + if (alt_ev) | ||
| 170 | + st_set_eventsys(ST_EVENTSYS_ALT); | ||
| 171 | + | ||
| 172 | + /* Initialize the ST library */ | ||
| 173 | + if (st_init() < 0) { | ||
| 174 | + print_sys_error("st_init"); | ||
| 175 | + exit(1); | ||
| 176 | + } | ||
| 177 | + | ||
| 178 | + /* Create and bind listening socket */ | ||
| 179 | + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { | ||
| 180 | + print_sys_error("socket"); | ||
| 181 | + exit(1); | ||
| 182 | + } | ||
| 183 | + n = 1; | ||
| 184 | + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&n, sizeof(n)) < 0) { | ||
| 185 | + print_sys_error("setsockopt"); | ||
| 186 | + exit(1); | ||
| 187 | + } | ||
| 188 | + if (bind(sock, (struct sockaddr *)&lcl_addr, sizeof(lcl_addr)) < 0) { | ||
| 189 | + print_sys_error("bind"); | ||
| 190 | + exit(1); | ||
| 191 | + } | ||
| 192 | + listen(sock, 128); | ||
| 193 | + if ((srv_nfd = st_netfd_open_socket(sock)) == NULL) { | ||
| 194 | + print_sys_error("st_netfd_open"); | ||
| 195 | + exit(1); | ||
| 196 | + } | ||
| 197 | + /* See the comment regarding serialization decision above */ | ||
| 198 | + if (num_procs > 1 && serialize_accept && st_netfd_serialize_accept(srv_nfd) | ||
| 199 | + < 0) { | ||
| 200 | + print_sys_error("st_netfd_serialize_accept"); | ||
| 201 | + exit(1); | ||
| 202 | + } | ||
| 203 | + | ||
| 204 | + /* Start server processes */ | ||
| 205 | + if (!one_process) | ||
| 206 | + set_concurrency(num_procs); | ||
| 207 | + | ||
| 208 | + for ( ; ; ) { | ||
| 209 | + n = sizeof(cli_addr); | ||
| 210 | + cli_nfd = st_accept(srv_nfd, (struct sockaddr *)&cli_addr, &n, | ||
| 211 | + ST_UTIME_NO_TIMEOUT); | ||
| 212 | + if (cli_nfd == NULL) { | ||
| 213 | + print_sys_error("st_accept"); | ||
| 214 | + exit(1); | ||
| 215 | + } | ||
| 216 | + if (st_thread_create(handle_request, cli_nfd, 0, 0) == NULL) { | ||
| 217 | + print_sys_error("st_thread_create"); | ||
| 218 | + exit(1); | ||
| 219 | + } | ||
| 220 | + } | ||
| 221 | + | ||
| 222 | + /* NOTREACHED */ | ||
| 223 | + return 1; | ||
| 224 | +} | ||
| 225 | + | ||
| 226 | + | ||
| 227 | +static void read_address(const char *str, struct sockaddr_in *sin) | ||
| 228 | +{ | ||
| 229 | + char host[128], *p; | ||
| 230 | + struct hostent *hp; | ||
| 231 | + unsigned short port; | ||
| 232 | + | ||
| 233 | + strcpy(host, str); | ||
| 234 | + if ((p = strchr(host, ':')) == NULL) { | ||
| 235 | + fprintf(stderr, "%s: invalid address: %s\n", prog, host); | ||
| 236 | + exit(1); | ||
| 237 | + } | ||
| 238 | + *p++ = '\0'; | ||
| 239 | + port = (unsigned short) atoi(p); | ||
| 240 | + if (port < 1) { | ||
| 241 | + fprintf(stderr, "%s: invalid port: %s\n", prog, p); | ||
| 242 | + exit(1); | ||
| 243 | + } | ||
| 244 | + | ||
| 245 | + memset(sin, 0, sizeof(struct sockaddr_in)); | ||
| 246 | + sin->sin_family = AF_INET; | ||
| 247 | + sin->sin_port = htons(port); | ||
| 248 | + if (host[0] == '\0') { | ||
| 249 | + sin->sin_addr.s_addr = INADDR_ANY; | ||
| 250 | + return; | ||
| 251 | + } | ||
| 252 | + sin->sin_addr.s_addr = inet_addr(host); | ||
| 253 | + if (sin->sin_addr.s_addr == INADDR_NONE) { | ||
| 254 | + /* not dotted-decimal */ | ||
| 255 | + if ((hp = gethostbyname(host)) == NULL) { | ||
| 256 | + fprintf(stderr, "%s: can't resolve address: %s\n", prog, host); | ||
| 257 | + exit(1); | ||
| 258 | + } | ||
| 259 | + memcpy(&sin->sin_addr, hp->h_addr, hp->h_length); | ||
| 260 | + } | ||
| 261 | +} | ||
| 262 | + | ||
| 263 | +#ifdef DEBUG | ||
| 264 | +static void show_iov(const struct iovec *iov, int niov) | ||
| 265 | +{ | ||
| 266 | + int i; | ||
| 267 | + size_t total; | ||
| 268 | + | ||
| 269 | + printf("iov %p has %d entries:\n", iov, niov); | ||
| 270 | + total = 0; | ||
| 271 | + for (i = 0; i < niov; i++) { | ||
| 272 | + printf("iov[%3d] iov_base=%p iov_len=0x%lx(%lu)\n", | ||
| 273 | + i, iov[i].iov_base, (unsigned long) iov[i].iov_len, | ||
| 274 | + (unsigned long) iov[i].iov_len); | ||
| 275 | + total += iov[i].iov_len; | ||
| 276 | + } | ||
| 277 | + printf("total 0x%lx(%ld)\n", (unsigned long) total, (unsigned long) total); | ||
| 278 | +} | ||
| 279 | + | ||
| 280 | +/* | ||
| 281 | + * This version is tricked out to test all the | ||
| 282 | + * st_(read|write)v?(_resid)? variants. Use the non-DEBUG version for | ||
| 283 | + * anything serious. st_(read|write) are all this function really | ||
| 284 | + * needs. | ||
| 285 | + */ | ||
| 286 | +static int pass(st_netfd_t in, st_netfd_t out) | ||
| 287 | +{ | ||
| 288 | + char buf[IOBUFSIZE]; | ||
| 289 | + struct iovec iov[IOV_COUNT]; | ||
| 290 | + int ioviter, nw, nr; | ||
| 291 | + | ||
| 292 | + if (testing & TESTING_READV) { | ||
| 293 | + for (ioviter = 0; ioviter < IOV_COUNT; ioviter++) { | ||
| 294 | + iov[ioviter].iov_base = &buf[ioviter * IOV_LEN]; | ||
| 295 | + iov[ioviter].iov_len = IOV_LEN; | ||
| 296 | + } | ||
| 297 | + if (testing & TESTING_VERBOSE) { | ||
| 298 | + printf("readv(%p)...\n", in); | ||
| 299 | + show_iov(iov, IOV_COUNT); | ||
| 300 | + } | ||
| 301 | + if (testing & TESTING_READ_RESID) { | ||
| 302 | + struct iovec *riov = iov; | ||
| 303 | + int riov_cnt = IOV_COUNT; | ||
| 304 | + if (st_readv_resid(in, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) { | ||
| 305 | + if (testing & TESTING_VERBOSE) { | ||
| 306 | + printf("resid\n"); | ||
| 307 | + show_iov(riov, riov_cnt); | ||
| 308 | + printf("full\n"); | ||
| 309 | + show_iov(iov, IOV_COUNT); | ||
| 310 | + } | ||
| 311 | + nr = 0; | ||
| 312 | + for (ioviter = 0; ioviter < IOV_COUNT; ioviter++) | ||
| 313 | + nr += iov[ioviter].iov_len; | ||
| 314 | + nr = IOBUFSIZE - nr; | ||
| 315 | + } else | ||
| 316 | + nr = -1; | ||
| 317 | + } else | ||
| 318 | + nr = (int) st_readv(in, iov, IOV_COUNT, ST_UTIME_NO_TIMEOUT); | ||
| 319 | + } else { | ||
| 320 | + if (testing & TESTING_READ_RESID) { | ||
| 321 | + size_t resid = IOBUFSIZE; | ||
| 322 | + if (st_read_resid(in, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0) | ||
| 323 | + nr = IOBUFSIZE - resid; | ||
| 324 | + else | ||
| 325 | + nr = -1; | ||
| 326 | + } else | ||
| 327 | + nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT); | ||
| 328 | + } | ||
| 329 | + if (testing & TESTING_VERBOSE) | ||
| 330 | + printf("got 0x%x(%d) E=%d\n", nr, nr, errno); | ||
| 331 | + | ||
| 332 | + if (nr <= 0) | ||
| 333 | + return 0; | ||
| 334 | + | ||
| 335 | + if (testing & TESTING_WRITEV) { | ||
| 336 | + for (nw = 0, ioviter = 0; nw < nr; | ||
| 337 | + nw += iov[ioviter].iov_len, ioviter++) { | ||
| 338 | + iov[ioviter].iov_base = &buf[nw]; | ||
| 339 | + iov[ioviter].iov_len = nr - nw; | ||
| 340 | + if (iov[ioviter].iov_len > IOV_LEN) | ||
| 341 | + iov[ioviter].iov_len = IOV_LEN; | ||
| 342 | + } | ||
| 343 | + if (testing & TESTING_VERBOSE) { | ||
| 344 | + printf("writev(%p)...\n", out); | ||
| 345 | + show_iov(iov, ioviter); | ||
| 346 | + } | ||
| 347 | + if (testing & TESTING_WRITE_RESID) { | ||
| 348 | + struct iovec *riov = iov; | ||
| 349 | + int riov_cnt = ioviter; | ||
| 350 | + if (st_writev_resid(out, &riov, &riov_cnt, ST_UTIME_NO_TIMEOUT) == 0) { | ||
| 351 | + if (testing & TESTING_VERBOSE) { | ||
| 352 | + printf("resid\n"); | ||
| 353 | + show_iov(riov, riov_cnt); | ||
| 354 | + printf("full\n"); | ||
| 355 | + show_iov(iov, ioviter); | ||
| 356 | + } | ||
| 357 | + nw = 0; | ||
| 358 | + while (--ioviter >= 0) | ||
| 359 | + nw += iov[ioviter].iov_len; | ||
| 360 | + nw = nr - nw; | ||
| 361 | + } else | ||
| 362 | + nw = -1; | ||
| 363 | + } else | ||
| 364 | + nw = st_writev(out, iov, ioviter, ST_UTIME_NO_TIMEOUT); | ||
| 365 | + } else { | ||
| 366 | + if (testing & TESTING_WRITE_RESID) { | ||
| 367 | + size_t resid = nr; | ||
| 368 | + if (st_write_resid(out, buf, &resid, ST_UTIME_NO_TIMEOUT) == 0) | ||
| 369 | + nw = nr - resid; | ||
| 370 | + else | ||
| 371 | + nw = -1; | ||
| 372 | + } else | ||
| 373 | + nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT); | ||
| 374 | + } | ||
| 375 | + if (testing & TESTING_VERBOSE) | ||
| 376 | + printf("put 0x%x(%d) E=%d\n", nw, nw, errno); | ||
| 377 | + | ||
| 378 | + if (nw != nr) | ||
| 379 | + return 0; | ||
| 380 | + | ||
| 381 | + return 1; | ||
| 382 | +} | ||
| 383 | +#else /* DEBUG */ | ||
| 384 | +/* | ||
| 385 | + * This version is the simple one suitable for serious use. | ||
| 386 | + */ | ||
| 387 | +static int pass(st_netfd_t in, st_netfd_t out) | ||
| 388 | +{ | ||
| 389 | + char buf[IOBUFSIZE]; | ||
| 390 | + int nw, nr; | ||
| 391 | + | ||
| 392 | + nr = (int) st_read(in, buf, IOBUFSIZE, ST_UTIME_NO_TIMEOUT); | ||
| 393 | + if (nr <= 0) | ||
| 394 | + return 0; | ||
| 395 | + | ||
| 396 | + nw = st_write(out, buf, nr, ST_UTIME_NO_TIMEOUT); | ||
| 397 | + if (nw != nr) | ||
| 398 | + return 0; | ||
| 399 | + | ||
| 400 | + return 1; | ||
| 401 | +} | ||
| 402 | +#endif | ||
| 403 | + | ||
| 404 | +static void *handle_request(void *arg) | ||
| 405 | +{ | ||
| 406 | + struct pollfd pds[2]; | ||
| 407 | + st_netfd_t cli_nfd, rmt_nfd; | ||
| 408 | + int sock; | ||
| 409 | + | ||
| 410 | + cli_nfd = (st_netfd_t) arg; | ||
| 411 | + pds[0].fd = st_netfd_fileno(cli_nfd); | ||
| 412 | + pds[0].events = POLLIN; | ||
| 413 | + | ||
| 414 | + /* Connect to remote host */ | ||
| 415 | + if ((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) { | ||
| 416 | + print_sys_error("socket"); | ||
| 417 | + goto done; | ||
| 418 | + } | ||
| 419 | + if ((rmt_nfd = st_netfd_open_socket(sock)) == NULL) { | ||
| 420 | + print_sys_error("st_netfd_open_socket"); | ||
| 421 | + close(sock); | ||
| 422 | + goto done; | ||
| 423 | + } | ||
| 424 | + if (st_connect(rmt_nfd, (struct sockaddr *)&rmt_addr, | ||
| 425 | + sizeof(rmt_addr), ST_UTIME_NO_TIMEOUT) < 0) { | ||
| 426 | + print_sys_error("st_connect"); | ||
| 427 | + st_netfd_close(rmt_nfd); | ||
| 428 | + goto done; | ||
| 429 | + } | ||
| 430 | + pds[1].fd = sock; | ||
| 431 | + pds[1].events = POLLIN; | ||
| 432 | + | ||
| 433 | + /* | ||
| 434 | + * Now just pump the data through. | ||
| 435 | + * XXX This should use one thread for each direction for true full-duplex. | ||
| 436 | + */ | ||
| 437 | + for ( ; ; ) { | ||
| 438 | + pds[0].revents = 0; | ||
| 439 | + pds[1].revents = 0; | ||
| 440 | + | ||
| 441 | + if (st_poll(pds, 2, ST_UTIME_NO_TIMEOUT) <= 0) { | ||
| 442 | + print_sys_error("st_poll"); | ||
| 443 | + break; | ||
| 444 | + } | ||
| 445 | + | ||
| 446 | + if (pds[0].revents & POLLIN) { | ||
| 447 | + if (!pass(cli_nfd, rmt_nfd)) | ||
| 448 | + break; | ||
| 449 | + } | ||
| 450 | + | ||
| 451 | + if (pds[1].revents & POLLIN) { | ||
| 452 | + if (!pass(rmt_nfd, cli_nfd)) | ||
| 453 | + break; | ||
| 454 | + } | ||
| 455 | + } | ||
| 456 | + st_netfd_close(rmt_nfd); | ||
| 457 | + | ||
| 458 | +done: | ||
| 459 | + | ||
| 460 | + st_netfd_close(cli_nfd); | ||
| 461 | + | ||
| 462 | + return NULL; | ||
| 463 | +} | ||
| 464 | + | ||
| 465 | +static void start_daemon(void) | ||
| 466 | +{ | ||
| 467 | + pid_t pid; | ||
| 468 | + | ||
| 469 | + /* Start forking */ | ||
| 470 | + if ((pid = fork()) < 0) { | ||
| 471 | + print_sys_error("fork"); | ||
| 472 | + exit(1); | ||
| 473 | + } | ||
| 474 | + if (pid > 0) | ||
| 475 | + exit(0); /* parent */ | ||
| 476 | + | ||
| 477 | + /* First child process */ | ||
| 478 | + setsid(); /* become session leader */ | ||
| 479 | + | ||
| 480 | + if ((pid = fork()) < 0) { | ||
| 481 | + print_sys_error("fork"); | ||
| 482 | + exit(1); | ||
| 483 | + } | ||
| 484 | + if (pid > 0) /* first child */ | ||
| 485 | + exit(0); | ||
| 486 | + | ||
| 487 | + chdir("/"); | ||
| 488 | + umask(022); | ||
| 489 | +} | ||
| 490 | + | ||
| 491 | +/* | ||
| 492 | + * Create separate processes ("virtual processors"). Since it's just an | ||
| 493 | + * example, there is no watchdog - the parent just exits leaving children | ||
| 494 | + * on their own. | ||
| 495 | + */ | ||
| 496 | +static void set_concurrency(int nproc) | ||
| 497 | +{ | ||
| 498 | + pid_t pid; | ||
| 499 | + int i; | ||
| 500 | + | ||
| 501 | + if (nproc < 1) | ||
| 502 | + nproc = 1; | ||
| 503 | + | ||
| 504 | + for (i = 0; i < nproc; i++) { | ||
| 505 | + if ((pid = fork()) < 0) { | ||
| 506 | + print_sys_error("fork"); | ||
| 507 | + exit(1); | ||
| 508 | + } | ||
| 509 | + /* Child returns */ | ||
| 510 | + if (pid == 0) | ||
| 511 | + return; | ||
| 512 | + } | ||
| 513 | + | ||
| 514 | + /* Parent just exits */ | ||
| 515 | + exit(0); | ||
| 516 | +} | ||
| 517 | + | ||
| 518 | +static int cpu_count(void) | ||
| 519 | +{ | ||
| 520 | + int n; | ||
| 521 | + | ||
| 522 | +#if defined (_SC_NPROCESSORS_ONLN) | ||
| 523 | + n = (int) sysconf(_SC_NPROCESSORS_ONLN); | ||
| 524 | +#elif defined (_SC_NPROC_ONLN) | ||
| 525 | + n = (int) sysconf(_SC_NPROC_ONLN); | ||
| 526 | +#elif defined (HPUX) | ||
| 527 | +#include <sys/mpctl.h> | ||
| 528 | + n = mpctl(MPC_GETNUMSPUS, 0, 0); | ||
| 529 | +#else | ||
| 530 | + n = -1; | ||
| 531 | + errno = ENOSYS; | ||
| 532 | +#endif | ||
| 533 | + | ||
| 534 | + return n; | ||
| 535 | +} | ||
| 536 | + | ||
| 537 | +static void print_sys_error(const char *msg) | ||
| 538 | +{ | ||
| 539 | + fprintf(stderr, "%s: %s: %s\n", prog, msg, strerror(errno)); | ||
| 540 | +} | ||
| 541 | + |
-
请 注册 或 登录 后发表评论